lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
  32   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
  33   Free Software Foundation, Inc.
  34
  35 This file is not considered part of GNU Emacs.
  36
  37 This program is free software; you can redistribute it and/or modify
  38 it under the terms of the GNU General Public License as published by
  39 the Free Software Foundation; either version 3, or (at your option)
  40 any later version.
  41
  42 This program is distributed in the hope that it will be useful,
  43 but WITHOUT ANY WARRANTY; without even the implied warranty of
  44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  45 GNU General Public License for more details.
  46
  47 You should have received a copy of the GNU General Public License
  48 along with this program; see the file COPYING.  If not, write to the
  49 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  50 Boston, MA 02110-1301, USA. */
  51
  52
  53 /* NB To comply with the above BSD license, copyright information is
  54 reproduced in etc/ETAGS.README.  That file should be updated when the
  55 above notices are.
  56
  57 To the best of our knowledge, this code was originally based on the
  58 ctags.c distributed with BSD4.2, which was copyrighted by the
  59 University of California, as described above. */
  60
  61
  62 /*
  63  * Authors:
  64  *      Ctags originally by Ken Arnold.
  65  *      Fortran added by Jim Kleckner.
  66  *      Ed Pelegri-Llopart added C typedefs.
  67  *      Gnu Emacs TAGS format and modifications by RMS?
  68  * 1989 Sam Kendall added C++.
  69  * 1992 Joseph B. Wells improved C and C++ parsing.
  70  * 1993 Francesco Potortì reorganised C and C++.
  71  * 1994 Line-by-line regexp tags by Tom Tromey.
  72  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  73  * 2002 #line directives by Francesco Potortì.
  74  *
  75  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  76  */
  77
  78 /*
  79  * If you want to add support for a new language, start by looking at the LUA
  80  * language, which is the simplest.  Alternatively, consider shipping a
  81  * configuration file containing regexp definitions for etags.
  82  */
  83
  84 char pot_etags_version[] = "@(#) pot revision number is 17.26";
  85
  86 #define TRUE    1
  87 #define FALSE   0
  88
  89 #ifdef DEBUG
  90 #  undef DEBUG
  91 #  define DEBUG TRUE
  92 #else
  93 #  define DEBUG  FALSE
  94 #  define NDEBUG                /* disable assert */
  95 #endif
  96
  97 #ifdef HAVE_CONFIG_H
  98 # include <config.h>
  99   /* On some systems, Emacs defines static as nothing for the sake
 100      of unexec.  We don't want that here since we don't use unexec. */
 101 # undef static
 102 # ifndef PTR                    /* for XEmacs */
 103 #   define PTR void *
 104 # endif
 105 # ifndef __P                    /* for XEmacs */
 106 #   define __P(args) args
 107 # endif
 108 #else  /* no config.h */
 109 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 110 #   define __P(args) args       /* use prototypes */
 111 #   define PTR void *           /* for generic pointers */
 112 # else /* not standard C */
 113 #   define __P(args) ()         /* no prototypes */
 114 #   define const                /* remove const for old compilers' sake */
 115 #   define PTR long *           /* don't use void* */
 116 # endif
 117 #endif /* !HAVE_CONFIG_H */
 118
 119 #ifndef _GNU_SOURCE
 120 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 121 #endif
 122
 123 /* WIN32_NATIVE is for XEmacs.
 124    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 125 #ifdef WIN32_NATIVE
 126 # undef MSDOS
 127 # undef  WINDOWSNT
 128 # define WINDOWSNT
 129 #endif /* WIN32_NATIVE */
 130
 131 #ifdef MSDOS
 132 # undef MSDOS
 133 # define MSDOS TRUE
 134 # include <fcntl.h>
 135 # include <sys/param.h>
 136 # include <io.h>
 137 # ifndef HAVE_CONFIG_H
 138 #   define DOS_NT
 139 #   include <sys/config.h>
 140 # endif
 141 #else
 142 # define MSDOS FALSE
 143 #endif /* MSDOS */
 144
 145 #ifdef WINDOWSNT
 146 # include <stdlib.h>
 147 # include <fcntl.h>
 148 # include <string.h>
 149 # include <direct.h>
 150 # include <io.h>
 151 # define MAXPATHLEN _MAX_PATH
 152 # undef HAVE_NTGUI
 153 # undef  DOS_NT
 154 # define DOS_NT
 155 # ifndef HAVE_GETCWD
 156 #   define HAVE_GETCWD
 157 # endif /* undef HAVE_GETCWD */
 158 #else /* not WINDOWSNT */
 159 # ifdef STDC_HEADERS
 160 #  include <stdlib.h>
 161 #  include <string.h>
 162 # else /* no standard C headers */
 163     extern char *getenv ();
 164 #  ifdef VMS
 165 #   define EXIT_SUCCESS 1
 166 #   define EXIT_FAILURE 0
 167 #  else /* no VMS */
 168 #   define EXIT_SUCCESS 0
 169 #   define EXIT_FAILURE 1
 170 #  endif
 171 # endif
 172 #endif /* !WINDOWSNT */
 173
 174 #ifdef HAVE_UNISTD_H
 175 # include <unistd.h>
 176 #else
 177 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 178     extern char *getcwd (char *buf, size_t size);
 179 # endif
 180 #endif /* HAVE_UNISTD_H */
 181
 182 #include <stdio.h>
 183 #include <ctype.h>
 184 #include <errno.h>
 185 #ifndef errno
 186   extern int errno;
 187 #endif
 188 #include <sys/types.h>
 189 #include <sys/stat.h>
 190
 191 #include <assert.h>
 192 #ifdef NDEBUG
 193 # undef  assert                 /* some systems have a buggy assert.h */
 194 # define assert(x) ((void) 0)
 195 #endif
 196
 197 #if !defined (S_ISREG) && defined (S_IFREG)
 198 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 199 #endif
 200
 201 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 202 # define NO_LONG_OPTIONS TRUE
 203 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 204   extern char *optarg;
 205   extern int optind, opterr;
 206 #else
 207 # define NO_LONG_OPTIONS FALSE
 208 # include <getopt.h>
 209 #endif /* NO_LONG_OPTIONS */
 210
 211 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 212 # ifdef __CYGWIN__              /* compiling on Cygwin */
 213                              !!! NOTICE !!!
 214  the regex.h distributed with Cygwin is not compatible with etags, alas!
 215 If you want regular expression support, you should delete this notice and
 216               arrange to use the GNU regex.h and regex.c.
 217 # endif
 218 #endif
 219 #include <regex.h>
 220
 221 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 222  Leave it undefined to make the program "etags", which makes emacs-style
 223  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 224 #ifdef CTAGS
 225 # undef  CTAGS
 226 # define CTAGS TRUE
 227 #else
 228 # define CTAGS FALSE
 229 #endif
 230
 231 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 232 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 233 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 234 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 235
 236 #define CHARS 256               /* 2^sizeof(char) */
 237 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 238 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 239 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 240 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 241 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 242 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 243
 244 #define ISALNUM(c)      isalnum (CHAR(c))
 245 #define ISALPHA(c)      isalpha (CHAR(c))
 246 #define ISDIGIT(c)      isdigit (CHAR(c))
 247 #define ISLOWER(c)      islower (CHAR(c))
 248
 249 #define lowcase(c)      tolower (CHAR(c))
 250 #define upcase(c)       toupper (CHAR(c))
 251
 252
 253 /*
 254  *      xnew, xrnew -- allocate, reallocate storage
 255  *
 256  * SYNOPSIS:    Type *xnew (int n, Type);
 257  *              void xrnew (OldPointer, int n, Type);
 258  */
 259 #if DEBUG
 260 # include "chkmalloc.h"
 261 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 262                                                   (n) * sizeof (Type)))
 263 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 264                                         (char *) (op), (n) * sizeof (Type)))
 265 #else
 266 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 267 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 268                                         (char *) (op), (n) * sizeof (Type)))
 269 #endif
 270
 271 #define bool int
 272
 273 typedef void Lang_function __P((FILE *));
 274
 275 typedef struct
 276 {
 277   char *suffix;                 /* file name suffix for this compressor */
 278   char *command;                /* takes one arg and decompresses to stdout */
 279 } compressor;
 280
 281 typedef struct
 282 {
 283   char *name;                   /* language name */
 284   char *help;                   /* detailed help for the language */
 285   Lang_function *function;      /* parse function */
 286   char **suffixes;              /* name suffixes of this language's files */
 287   char **filenames;             /* names of this language's files */
 288   char **interpreters;          /* interpreters for this language */
 289   bool metasource;              /* source used to generate other sources */
 290 } language;
 291
 292 typedef struct fdesc
 293 {
 294   struct fdesc *next;           /* for the linked list */
 295   char *infname;                /* uncompressed input file name */
 296   char *infabsname;             /* absolute uncompressed input file name */
 297   char *infabsdir;              /* absolute dir of input file */
 298   char *taggedfname;            /* file name to write in tagfile */
 299   language *lang;               /* language of file */
 300   char *prop;                   /* file properties to write in tagfile */
 301   bool usecharno;               /* etags tags shall contain char number */
 302   bool written;                 /* entry written in the tags file */
 303 } fdesc;
 304
 305 typedef struct node_st
 306 {                               /* sorting structure */
 307   struct node_st *left, *right; /* left and right sons */
 308   fdesc *fdp;                   /* description of file to whom tag belongs */
 309   char *name;                   /* tag name */
 310   char *regex;                  /* search regexp */
 311   bool valid;                   /* write this tag on the tag file */
 312   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 313   bool been_warned;             /* warning already given for duplicated tag */
 314   int lno;                      /* line number tag is on */
 315   long cno;                     /* character number line starts on */
 316 } node;
 317
 318 /*
 319  * A `linebuffer' is a structure which holds a line of text.
 320  * `readline_internal' reads a line from a stream into a linebuffer
 321  * and works regardless of the length of the line.
 322  * SIZE is the size of BUFFER, LEN is the length of the string in
 323  * BUFFER after readline reads it.
 324  */
 325 typedef struct
 326 {
 327   long size;
 328   int len;
 329   char *buffer;
 330 } linebuffer;
 331
 332 /* Used to support mixing of --lang and file names. */
 333 typedef struct
 334 {
 335   enum {
 336     at_language,                /* a language specification */
 337     at_regexp,                  /* a regular expression */
 338     at_filename,                /* a file name */
 339     at_stdin,                   /* read from stdin here */
 340     at_end                      /* stop parsing the list */
 341   } arg_type;                   /* argument type */
 342   language *lang;               /* language associated with the argument */
 343   char *what;                   /* the argument itself */
 344 } argument;
 345
 346 /* Structure defining a regular expression. */
 347 typedef struct regexp
 348 {
 349   struct regexp *p_next;        /* pointer to next in list */
 350   language *lang;               /* if set, use only for this language */
 351   char *pattern;                /* the regexp pattern */
 352   char *name;                   /* tag name */
 353   struct re_pattern_buffer *pat; /* the compiled pattern */
 354   struct re_registers regs;     /* re registers */
 355   bool error_signaled;          /* already signaled for this regexp */
 356   bool force_explicit_name;     /* do not allow implict tag name */
 357   bool ignore_case;             /* ignore case when matching */
 358   bool multi_line;              /* do a multi-line match on the whole file */
 359 } regexp;
 360
 361
 362 /* Many compilers barf on this:
 363         Lang_function Ada_funcs;
 364    so let's write it this way */
 365 static void Ada_funcs __P((FILE *));
 366 static void Asm_labels __P((FILE *));
 367 static void C_entries __P((int c_ext, FILE *));
 368 static void default_C_entries __P((FILE *));
 369 static void plain_C_entries __P((FILE *));
 370 static void Cjava_entries __P((FILE *));
 371 static void Cobol_paragraphs __P((FILE *));
 372 static void Cplusplus_entries __P((FILE *));
 373 static void Cstar_entries __P((FILE *));
 374 static void Erlang_functions __P((FILE *));
 375 static void Forth_words __P((FILE *));
 376 static void Fortran_functions __P((FILE *));
 377 static void HTML_labels __P((FILE *));
 378 static void Lisp_functions __P((FILE *));
 379 static void Lua_functions __P((FILE *));
 380 static void Makefile_targets __P((FILE *));
 381 static void Pascal_functions __P((FILE *));
 382 static void Perl_functions __P((FILE *));
 383 static void PHP_functions __P((FILE *));
 384 static void PS_functions __P((FILE *));
 385 static void Prolog_functions __P((FILE *));
 386 static void Python_functions __P((FILE *));
 387 static void Scheme_functions __P((FILE *));
 388 static void TeX_commands __P((FILE *));
 389 static void Texinfo_nodes __P((FILE *));
 390 static void Yacc_entries __P((FILE *));
 391 static void just_read_file __P((FILE *));
 392
 393 static void print_language_names __P((void));
 394 static void print_version __P((void));
 395 static void print_help __P((argument *));
 396 int main __P((int, char **));
 397
 398 static compressor *get_compressor_from_suffix __P((char *, char **));
 399 static language *get_language_from_langname __P((const char *));
 400 static language *get_language_from_interpreter __P((char *));
 401 static language *get_language_from_filename __P((char *, bool));
 402 static void readline __P((linebuffer *, FILE *));
 403 static long readline_internal __P((linebuffer *, FILE *));
 404 static bool nocase_tail __P((char *));
 405 static void get_tag __P((char *, char **));
 406
 407 static void analyse_regex __P((char *));
 408 static void free_regexps __P((void));
 409 static void regex_tag_multiline __P((void));
 410 static void error __P((const char *, const char *));
 411 static void suggest_asking_for_help __P((void));
 412 void fatal __P((char *, char *));
 413 static void pfatal __P((char *));
 414 static void add_node __P((node *, node **));
 415
 416 static void init __P((void));
 417 static void process_file_name __P((char *, language *));
 418 static void process_file __P((FILE *, char *, language *));
 419 static void find_entries __P((FILE *));
 420 static void free_tree __P((node *));
 421 static void free_fdesc __P((fdesc *));
 422 static void pfnote __P((char *, bool, char *, int, int, long));
 423 static void make_tag __P((char *, int, bool, char *, int, int, long));
 424 static void invalidate_nodes __P((fdesc *, node **));
 425 static void put_entries __P((node *));
 426
 427 static char *concat __P((char *, char *, char *));
 428 static char *skip_spaces __P((char *));
 429 static char *skip_non_spaces __P((char *));
 430 static char *savenstr __P((char *, int));
 431 static char *savestr __P((char *));
 432 static char *etags_strchr __P((const char *, int));
 433 static char *etags_strrchr __P((const char *, int));
 434 static int etags_strcasecmp __P((const char *, const char *));
 435 static int etags_strncasecmp __P((const char *, const char *, int));
 436 static char *etags_getcwd __P((void));
 437 static char *relative_filename __P((char *, char *));
 438 static char *absolute_filename __P((char *, char *));
 439 static char *absolute_dirname __P((char *, char *));
 440 static bool filename_is_absolute __P((char *f));
 441 static void canonicalize_filename __P((char *));
 442 static void linebuffer_init __P((linebuffer *));
 443 static void linebuffer_setlen __P((linebuffer *, int));
 444 static PTR xmalloc __P((unsigned int));
 445 static PTR xrealloc __P((char *, unsigned int));
 446
 447 \f
 448 static char searchar = '/';     /* use /.../ searches */
 449
 450 static char *tagfile;           /* output file */
 451 static char *progname;          /* name this program was invoked with */
 452 static char *cwd;               /* current working directory */
 453 static char *tagfiledir;        /* directory of tagfile */
 454 static FILE *tagf;              /* ioptr for tags file */
 455
 456 static fdesc *fdhead;           /* head of file description list */
 457 static fdesc *curfdp;           /* current file description */
 458 static int lineno;              /* line number of current line */
 459 static long charno;             /* current character number */
 460 static long linecharno;         /* charno of start of current line */
 461 static char *dbp;               /* pointer to start of current tag */
 462
 463 static const int invalidcharno = -1;
 464
 465 static node *nodehead;          /* the head of the binary tree of tags */
 466 static node *last_node;         /* the last node created */
 467
 468 static linebuffer lb;           /* the current line */
 469 static linebuffer filebuf;      /* a buffer containing the whole file */
 470 static linebuffer token_name;   /* a buffer containing a tag name */
 471
 472 /* boolean "functions" (see init)       */
 473 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 474 static char
 475   /* white chars */
 476   *white = " \f\t\n\r\v",
 477   /* not in a name */
 478   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 479   /* token ending chars */
 480   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 481   /* token starting chars */
 482   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 483   /* valid in-token chars */
 484   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 485
 486 static bool append_to_tagfile;  /* -a: append to tags */
 487 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 488 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 489 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 490                                 /* 0 struct/enum/union decls, and C++ */
 491                                 /* member functions. */
 492 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 493                                 /* constants and variables. */
 494                                 /* -D: opposite of -d.  Default under ctags. */
 495 static bool globals;            /* create tags for global variables */
 496 static bool members;            /* create tags for C member variables */
 497 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 498 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 499 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 500 static bool update;             /* -u: update tags */
 501 static bool vgrind_style;       /* -v: create vgrind style index output */
 502 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 503 static bool cxref_style;        /* -x: create cxref style output */
 504 static bool cplusplus;          /* .[hc] means C++, not C */
 505 static bool ignoreindent;       /* -I: ignore indentation in C */
 506 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 507
 508 /* STDIN is defined in LynxOS system headers */
 509 #ifdef STDIN
 510 # undef STDIN
 511 #endif
 512
 513 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 514 static bool parsing_stdin;      /* --parse-stdin used */
 515
 516 static regexp *p_head;          /* list of all regexps */
 517 static bool need_filebuf;       /* some regexes are multi-line */
 518
 519 static struct option longopts[] =
 520 {
 521   { "append",             no_argument,       NULL,               'a'   },
 522   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 523   { "c++",                no_argument,       NULL,               'C'   },
 524   { "declarations",       no_argument,       &declarations,      TRUE  },
 525   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 526   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 527   { "help",               no_argument,       NULL,               'h'   },
 528   { "help",               no_argument,       NULL,               'H'   },
 529   { "ignore-indentation", no_argument,       NULL,               'I'   },
 530   { "language",           required_argument, NULL,               'l'   },
 531   { "members",            no_argument,       &members,           TRUE  },
 532   { "no-members",         no_argument,       &members,           FALSE },
 533   { "output",             required_argument, NULL,               'o'   },
 534   { "regex",              required_argument, NULL,               'r'   },
 535   { "no-regex",           no_argument,       NULL,               'R'   },
 536   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 537   { "parse-stdin",        required_argument, NULL,               STDIN },
 538   { "version",            no_argument,       NULL,               'V'   },
 539
 540 #if CTAGS /* Ctags options */
 541   { "backward-search",    no_argument,       NULL,               'B'   },
 542   { "cxref",              no_argument,       NULL,               'x'   },
 543   { "defines",            no_argument,       NULL,               'd'   },
 544   { "globals",            no_argument,       &globals,           TRUE  },
 545   { "typedefs",           no_argument,       NULL,               't'   },
 546   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 547   { "update",             no_argument,       NULL,               'u'   },
 548   { "vgrind",             no_argument,       NULL,               'v'   },
 549   { "no-warn",            no_argument,       NULL,               'w'   },
 550
 551 #else /* Etags options */
 552   { "no-defines",         no_argument,       NULL,               'D'   },
 553   { "no-globals",         no_argument,       &globals,           FALSE },
 554   { "include",            required_argument, NULL,               'i'   },
 555 #endif
 556   { NULL }
 557 };
 558
 559 static compressor compressors[] =
 560 {
 561   { "z", "gzip -d -c"},
 562   { "Z", "gzip -d -c"},
 563   { "gz", "gzip -d -c"},
 564   { "GZ", "gzip -d -c"},
 565   { "bz2", "bzip2 -d -c" },
 566   { NULL }
 567 };
 568
 569 /*
 570  * Language stuff.
 571  */
 572
 573 /* Ada code */
 574 static char *Ada_suffixes [] =
 575   { "ads", "adb", "ada", NULL };
 576 static char Ada_help [] =
 577 "In Ada code, functions, procedures, packages, tasks and types are\n\
 578 tags.  Use the `--packages-only' option to create tags for\n\
 579 packages only.\n\
 580 Ada tag names have suffixes indicating the type of entity:\n\
 581         Entity type:    Qualifier:\n\
 582         ------------    ----------\n\
 583         function        /f\n\
 584         procedure       /p\n\
 585         package spec    /s\n\
 586         package body    /b\n\
 587         type            /t\n\
 588         task            /k\n\
 589 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 590 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 591 will just search for any tag `bidule'.";
 592
 593 /* Assembly code */
 594 static char *Asm_suffixes [] =
 595   { "a",        /* Unix assembler */
 596     "asm", /* Microcontroller assembly */
 597     "def", /* BSO/Tasking definition includes  */
 598     "inc", /* Microcontroller include files */
 599     "ins", /* Microcontroller include files */
 600     "s", "sa", /* Unix assembler */
 601     "S",   /* cpp-processed Unix assembler */
 602     "src", /* BSO/Tasking C compiler output */
 603     NULL
 604   };
 605 static char Asm_help [] =
 606 "In assembler code, labels appearing at the beginning of a line,\n\
 607 followed by a colon, are tags.";
 608
 609
 610 /* Note that .c and .h can be considered C++, if the --c++ flag was
 611    given, or if the `class' or `template' keyowrds are met inside the file.
 612    That is why default_C_entries is called for these. */
 613 static char *default_C_suffixes [] =
 614   { "c", "h", NULL };
 615 static char default_C_help [] =
 616 "In C code, any C function or typedef is a tag, and so are\n\
 617 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 618 definitions and `enum' constants are tags unless you specify\n\
 619 `--no-defines'.  Global variables are tags unless you specify\n\
 620 `--no-globals' and so are struct members unless you specify\n\
 621 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 622 `--no-members' can make the tags table file much smaller.\n\
 623 You can tag function declarations and external variables by\n\
 624 using `--declarations'.";
 625
 626 static char *Cplusplus_suffixes [] =
 627   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 628     "M",                        /* Objective C++ */
 629     "pdb",                      /* Postscript with C syntax */
 630     NULL };
 631 static char Cplusplus_help [] =
 632 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 633 --help --lang=c --lang=c++ for full help.)\n\
 634 In addition to C tags, member functions are also recognized.  Member\n\
 635 variables are recognized unless you use the `--no-members' option.\n\
 636 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 637 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 638 `operator+'.";
 639
 640 static char *Cjava_suffixes [] =
 641   { "java", NULL };
 642 static char Cjava_help [] =
 643 "In Java code, all the tags constructs of C and C++ code are\n\
 644 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 645
 646
 647 static char *Cobol_suffixes [] =
 648   { "COB", "cob", NULL };
 649 static char Cobol_help [] =
 650 "In Cobol code, tags are paragraph names; that is, any word\n\
 651 starting in column 8 and followed by a period.";
 652
 653 static char *Cstar_suffixes [] =
 654   { "cs", "hs", NULL };
 655
 656 static char *Erlang_suffixes [] =
 657   { "erl", "hrl", NULL };
 658 static char Erlang_help [] =
 659 "In Erlang code, the tags are the functions, records and macros\n\
 660 defined in the file.";
 661
 662 char *Forth_suffixes [] =
 663   { "fth", "tok", NULL };
 664 static char Forth_help [] =
 665 "In Forth code, tags are words defined by `:',\n\
 666 constant, code, create, defer, value, variable, buffer:, field.";
 667
 668 static char *Fortran_suffixes [] =
 669   { "F", "f", "f90", "for", NULL };
 670 static char Fortran_help [] =
 671 "In Fortran code, functions, subroutines and block data are tags.";
 672
 673 static char *HTML_suffixes [] =
 674   { "htm", "html", "shtml", NULL };
 675 static char HTML_help [] =
 676 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 677 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 678 occurrences of `id='.";
 679
 680 static char *Lisp_suffixes [] =
 681   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 682 static char Lisp_help [] =
 683 "In Lisp code, any function defined with `defun', any variable\n\
 684 defined with `defvar' or `defconst', and in general the first\n\
 685 argument of any expression that starts with `(def' in column zero\n\
 686 is a tag.";
 687
 688 static char *Lua_suffixes [] =
 689   { "lua", "LUA", NULL };
 690 static char Lua_help [] =
 691 "In Lua scripts, all functions are tags.";
 692
 693 static char *Makefile_filenames [] =
 694   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 695 static char Makefile_help [] =
 696 "In makefiles, targets are tags; additionally, variables are tags\n\
 697 unless you specify `--no-globals'.";
 698
 699 static char *Objc_suffixes [] =
 700   { "lm",                       /* Objective lex file */
 701     "m",                        /* Objective C file */
 702      NULL };
 703 static char Objc_help [] =
 704 "In Objective C code, tags include Objective C definitions for classes,\n\
 705 class categories, methods and protocols.  Tags for variables and\n\
 706 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 707 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 708
 709 static char *Pascal_suffixes [] =
 710   { "p", "pas", NULL };
 711 static char Pascal_help [] =
 712 "In Pascal code, the tags are the functions and procedures defined\n\
 713 in the file.";
 714 /* " // this is for working around an Emacs highlighting bug... */
 715
 716 static char *Perl_suffixes [] =
 717   { "pl", "pm", NULL };
 718 static char *Perl_interpreters [] =
 719   { "perl", "@PERL@", NULL };
 720 static char Perl_help [] =
 721 "In Perl code, the tags are the packages, subroutines and variables\n\
 722 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 723 `--globals' if you want to tag global variables.  Tags for\n\
 724 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 725 defined in the default package is `main::SUB'.";
 726
 727 static char *PHP_suffixes [] =
 728   { "php", "php3", "php4", NULL };
 729 static char PHP_help [] =
 730 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 731 the `--no-members' option, vars are tags too.";
 732
 733 static char *plain_C_suffixes [] =
 734   { "pc",                       /* Pro*C file */
 735      NULL };
 736
 737 static char *PS_suffixes [] =
 738   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 739 static char PS_help [] =
 740 "In PostScript code, the tags are the functions.";
 741
 742 static char *Prolog_suffixes [] =
 743   { "prolog", NULL };
 744 static char Prolog_help [] =
 745 "In Prolog code, tags are predicates and rules at the beginning of\n\
 746 line.";
 747
 748 static char *Python_suffixes [] =
 749   { "py", NULL };
 750 static char Python_help [] =
 751 "In Python code, `def' or `class' at the beginning of a line\n\
 752 generate a tag.";
 753
 754 /* Can't do the `SCM' or `scm' prefix with a version number. */
 755 static char *Scheme_suffixes [] =
 756   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 757 static char Scheme_help [] =
 758 "In Scheme code, tags include anything defined with `def' or with a\n\
 759 construct whose name starts with `def'.  They also include\n\
 760 variables set with `set!' at top level in the file.";
 761
 762 static char *TeX_suffixes [] =
 763   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 764 static char TeX_help [] =
 765 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 766 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 767 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 768 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 769 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 770 \n\
 771 Other commands can be specified by setting the environment variable\n\
 772 `TEXTAGS' to a colon-separated list like, for example,\n\
 773      TEXTAGS=\"mycommand:myothercommand\".";
 774
 775
 776 static char *Texinfo_suffixes [] =
 777   { "texi", "texinfo", "txi", NULL };
 778 static char Texinfo_help [] =
 779 "for texinfo files, lines starting with @node are tagged.";
 780
 781 static char *Yacc_suffixes [] =
 782   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 783 static char Yacc_help [] =
 784 "In Bison or Yacc input files, each rule defines as a tag the\n\
 785 nonterminal it constructs.  The portions of the file that contain\n\
 786 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 787 for full help).";
 788
 789 static char auto_help [] =
 790 "`auto' is not a real language, it indicates to use\n\
 791 a default language for files base on file name suffix and file contents.";
 792
 793 static char none_help [] =
 794 "`none' is not a real language, it indicates to only do\n\
 795 regexp processing on files.";
 796
 797 static char no_lang_help [] =
 798 "No detailed help available for this language.";
 799
 800
 801 /*
 802  * Table of languages.
 803  *
 804  * It is ok for a given function to be listed under more than one
 805  * name.  I just didn't.
 806  */
 807
 808 static language lang_names [] =
 809 {
 810   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 811   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 812   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 813   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 814   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 815   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 816   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 817   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 818   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 819   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 820   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 821   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 822   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 823   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 824   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 825   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 826   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 827   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 828   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 829   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 830   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 831   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 832   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 833   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 834   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 835   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 836   { "auto",      auto_help },                      /* default guessing scheme */
 837   { "none",      none_help,      just_read_file }, /* regexp matching only */
 838   { NULL }                /* end of list */
 839 };
 840
 841 \f
 842 static void
 843 print_language_names ()
 844 {
 845   language *lang;
 846   char **name, **ext;
 847
 848   puts ("\nThese are the currently supported languages, along with the\n\
 849 default file names and dot suffixes:");
 850   for (lang = lang_names; lang->name != NULL; lang++)
 851     {
 852       printf ("  %-*s", 10, lang->name);
 853       if (lang->filenames != NULL)
 854         for (name = lang->filenames; *name != NULL; name++)
 855           printf (" %s", *name);
 856       if (lang->suffixes != NULL)
 857         for (ext = lang->suffixes; *ext != NULL; ext++)
 858           printf (" .%s", *ext);
 859       puts ("");
 860     }
 861   puts ("where `auto' means use default language for files based on file\n\
 862 name suffix, and `none' means only do regexp processing on files.\n\
 863 If no language is specified and no matching suffix is found,\n\
 864 the first line of the file is read for a sharp-bang (#!) sequence\n\
 865 followed by the name of an interpreter.  If no such sequence is found,\n\
 866 Fortran is tried first; if no tags are found, C is tried next.\n\
 867 When parsing any C file, a \"class\" or \"template\" keyword\n\
 868 switches to C++.");
 869   puts ("Compressed files are supported using gzip and bzip2.\n\
 870 \n\
 871 For detailed help on a given language use, for example,\n\
 872 etags --help --lang=ada.");
 873 }
 874
 875 #ifndef EMACS_NAME
 876 # define EMACS_NAME "standalone"
 877 #endif
 878 #ifndef VERSION
 879 # define VERSION "version"
 880 #endif
 881 static void
 882 print_version ()
 883 {
 884   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 885   puts ("Copyright (C) 2007 Free Software Foundation, Inc.");
 886   puts ("This program is distributed under the terms in ETAGS.README");
 887
 888   exit (EXIT_SUCCESS);
 889 }
 890
 891 static void
 892 print_help (argbuffer)
 893      argument *argbuffer;
 894 {
 895   bool help_for_lang = FALSE;
 896
 897   for (; argbuffer->arg_type != at_end; argbuffer++)
 898     if (argbuffer->arg_type == at_language)
 899       {
 900         if (help_for_lang)
 901           puts ("");
 902         puts (argbuffer->lang->help);
 903         help_for_lang = TRUE;
 904       }
 905
 906   if (help_for_lang)
 907     exit (EXIT_SUCCESS);
 908
 909   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 910 \n\
 911 These are the options accepted by %s.\n", progname, progname);
 912   if (NO_LONG_OPTIONS)
 913     puts ("WARNING: long option names do not work with this executable,\n\
 914 as it is not linked with GNU getopt.");
 915   else
 916     puts ("You may use unambiguous abbreviations for the long option names.");
 917   puts ("  A - as file name means read names from stdin (one per line).\n\
 918 Absolute names are stored in the output file as they are.\n\
 919 Relative ones are stored relative to the output file's directory.\n");
 920
 921   puts ("-a, --append\n\
 922         Append tag entries to existing tags file.");
 923
 924   puts ("--packages-only\n\
 925         For Ada files, only generate tags for packages.");
 926
 927   if (CTAGS)
 928     puts ("-B, --backward-search\n\
 929         Write the search commands for the tag entries using '?', the\n\
 930         backward-search command instead of '/', the forward-search command.");
 931
 932   /* This option is mostly obsolete, because etags can now automatically
 933      detect C++.  Retained for backward compatibility and for debugging and
 934      experimentation.  In principle, we could want to tag as C++ even
 935      before any "class" or "template" keyword.
 936   puts ("-C, --c++\n\
 937         Treat files whose name suffix defaults to C language as C++ files.");
 938   */
 939
 940   puts ("--declarations\n\
 941         In C and derived languages, create tags for function declarations,");
 942   if (CTAGS)
 943     puts ("\tand create tags for extern variables if --globals is used.");
 944   else
 945     puts
 946       ("\tand create tags for extern variables unless --no-globals is used.");
 947
 948   if (CTAGS)
 949     puts ("-d, --defines\n\
 950         Create tag entries for C #define constants and enum constants, too.");
 951   else
 952     puts ("-D, --no-defines\n\
 953         Don't create tag entries for C #define constants and enum constants.\n\
 954         This makes the tags file smaller.");
 955
 956   if (!CTAGS)
 957     puts ("-i FILE, --include=FILE\n\
 958         Include a note in tag file indicating that, when searching for\n\
 959         a tag, one should also consult the tags file FILE after\n\
 960         checking the current file.");
 961
 962   puts ("-l LANG, --language=LANG\n\
 963         Force the following files to be considered as written in the\n\
 964         named language up to the next --language=LANG option.");
 965
 966   if (CTAGS)
 967     puts ("--globals\n\
 968         Create tag entries for global variables in some languages.");
 969   else
 970     puts ("--no-globals\n\
 971         Do not create tag entries for global variables in some\n\
 972         languages.  This makes the tags file smaller.");
 973   puts ("--no-members\n\
 974         Do not create tag entries for members of structures\n\
 975         in some languages.");
 976
 977   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 978         Make a tag for each line matching a regular expression pattern\n\
 979         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 980         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 981         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 982         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 983   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 984         For example Tcl named tags can be created with:\n\
 985           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 986         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 987         `m' means to allow multi-line matches, `s' implies `m' and\n\
 988         causes dot to match any character, including newline.");
 989   puts ("-R, --no-regex\n\
 990         Don't create tags from regexps for the following files.");
 991   puts ("-I, --ignore-indentation\n\
 992         In C and C++ do not assume that a closing brace in the first\n\
 993         column is the final brace of a function or structure definition.");
 994   puts ("-o FILE, --output=FILE\n\
 995         Write the tags to FILE.");
 996   puts ("--parse-stdin=NAME\n\
 997         Read from standard input and record tags as belonging to file NAME.");
 998
 999   if (CTAGS)
1000     {
1001       puts ("-t, --typedefs\n\
1002         Generate tag entries for C and Ada typedefs.");
1003       puts ("-T, --typedefs-and-c++\n\
1004         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1005         and C++ member functions.");
1006     }
1007
1008   if (CTAGS)
1009     puts ("-u, --update\n\
1010         Update the tag entries for the given files, leaving tag\n\
1011         entries for other files in place.  Currently, this is\n\
1012         implemented by deleting the existing entries for the given\n\
1013         files and then rewriting the new entries at the end of the\n\
1014         tags file.  It is often faster to simply rebuild the entire\n\
1015         tag file than to use this.");
1016
1017   if (CTAGS)
1018     {
1019       puts ("-v, --vgrind\n\
1020         Print on the standard output an index of items intended for\n\
1021         human consumption, similar to the output of vgrind.  The index\n\
1022         is sorted, and gives the page number of each item.");
1023 # if PRINT_UNDOCUMENTED_OPTIONS_HELP
1024       puts ("-w, --no-duplicates\n\
1025         Do not create duplicate tag entries, for compatibility with\n\
1026         traditional ctags.");
1027       puts ("-w, --no-warn\n\
1028         Suppress warning messages about duplicate tag entries.");
1029 # endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
1030       puts ("-x, --cxref\n\
1031         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1032         The output uses line numbers instead of page numbers, but\n\
1033         beyond that the differences are cosmetic; try both to see\n\
1034         which you like.");
1035     }
1036
1037   puts ("-V, --version\n\
1038         Print the version of the program.\n\
1039 -h, --help\n\
1040         Print this help message.\n\
1041         Followed by one or more `--language' options prints detailed\n\
1042         help about tag generation for the specified languages.");
1043
1044   print_language_names ();
1045
1046   puts ("");
1047   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1048
1049   exit (EXIT_SUCCESS);
1050 }
1051
1052 \f
1053 #ifdef VMS                      /* VMS specific functions */
1054
1055 #define EOS     '\0'
1056
1057 /* This is a BUG!  ANY arbitrary limit is a BUG!
1058    Won't someone please fix this?  */
1059 #define MAX_FILE_SPEC_LEN       255
1060 typedef struct  {
1061   short   curlen;
1062   char    body[MAX_FILE_SPEC_LEN + 1];
1063 } vspec;
1064
1065 /*
1066  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1067  returning in each successive call the next file name matching the input
1068  spec. The function expects that each in_spec passed
1069  to it will be processed to completion; in particular, up to and
1070  including the call following that in which the last matching name
1071  is returned, the function ignores the value of in_spec, and will
1072  only start processing a new spec with the following call.
1073  If an error occurs, on return out_spec contains the value
1074  of in_spec when the error occurred.
1075
1076  With each successive file name returned in out_spec, the
1077  function's return value is one. When there are no more matching
1078  names the function returns zero. If on the first call no file
1079  matches in_spec, or there is any other error, -1 is returned.
1080 */
1081
1082 #include        <rmsdef.h>
1083 #include        <descrip.h>
1084 #define         OUTSIZE MAX_FILE_SPEC_LEN
1085 static short
1086 fn_exp (out, in)
1087      vspec *out;
1088      char *in;
1089 {
1090   static long context = 0;
1091   static struct dsc$descriptor_s o;
1092   static struct dsc$descriptor_s i;
1093   static bool pass1 = TRUE;
1094   long status;
1095   short retval;
1096
1097   if (pass1)
1098     {
1099       pass1 = FALSE;
1100       o.dsc$a_pointer = (char *) out;
1101       o.dsc$w_length = (short)OUTSIZE;
1102       i.dsc$a_pointer = in;
1103       i.dsc$w_length = (short)strlen(in);
1104       i.dsc$b_dtype = DSC$K_DTYPE_T;
1105       i.dsc$b_class = DSC$K_CLASS_S;
1106       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1107       o.dsc$b_class = DSC$K_CLASS_VS;
1108     }
1109   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1110     {
1111       out->body[out->curlen] = EOS;
1112       return 1;
1113     }
1114   else if (status == RMS$_NMF)
1115     retval = 0;
1116   else
1117     {
1118       strcpy(out->body, in);
1119       retval = -1;
1120     }
1121   lib$find_file_end(&context);
1122   pass1 = TRUE;
1123   return retval;
1124 }
1125
1126 /*
1127   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1128   name of each file specified by the provided arg expanding wildcards.
1129 */
1130 static char *
1131 gfnames (arg, p_error)
1132      char *arg;
1133      bool *p_error;
1134 {
1135   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1136
1137   switch (fn_exp (&filename, arg))
1138     {
1139     case 1:
1140       *p_error = FALSE;
1141       return filename.body;
1142     case 0:
1143       *p_error = FALSE;
1144       return NULL;
1145     default:
1146       *p_error = TRUE;
1147       return filename.body;
1148     }
1149 }
1150
1151 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1152 system (cmd)
1153      char *cmd;
1154 {
1155   error ("%s", "system() function not implemented under VMS");
1156 }
1157 #endif
1158
1159 #define VERSION_DELIM   ';'
1160 char *massage_name (s)
1161      char *s;
1162 {
1163   char *start = s;
1164
1165   for ( ; *s; s++)
1166     if (*s == VERSION_DELIM)
1167       {
1168         *s = EOS;
1169         break;
1170       }
1171     else
1172       *s = lowcase (*s);
1173   return start;
1174 }
1175 #endif /* VMS */
1176
1177 \f
1178 int
1179 main (argc, argv)
1180      int argc;
1181      char *argv[];
1182 {
1183   int i;
1184   unsigned int nincluded_files;
1185   char **included_files;
1186   argument *argbuffer;
1187   int current_arg, file_count;
1188   linebuffer filename_lb;
1189   bool help_asked = FALSE;
1190 #ifdef VMS
1191   bool got_err;
1192 #endif
1193  char *optstring;
1194  int opt;
1195
1196
1197 #ifdef DOS_NT
1198   _fmode = O_BINARY;   /* all of files are treated as binary files */
1199 #endif /* DOS_NT */
1200
1201   progname = argv[0];
1202   nincluded_files = 0;
1203   included_files = xnew (argc, char *);
1204   current_arg = 0;
1205   file_count = 0;
1206
1207   /* Allocate enough no matter what happens.  Overkill, but each one
1208      is small. */
1209   argbuffer = xnew (argc, argument);
1210
1211   /*
1212    * If etags, always find typedefs and structure tags.  Why not?
1213    * Also default to find macro constants, enum constants, struct
1214    * members and global variables.
1215    */
1216   if (!CTAGS)
1217     {
1218       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1219       globals = TRUE;
1220     }
1221
1222   /* When the optstring begins with a '-' getopt_long does not rearrange the
1223      non-options arguments to be at the end, but leaves them alone. */
1224   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1225                       "ac:Cf:Il:o:r:RSVhH",
1226                       (CTAGS) ? "BxdtTuvw" : "Di:");
1227
1228   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1229     switch (opt)
1230       {
1231       case 0:
1232         /* If getopt returns 0, then it has already processed a
1233            long-named option.  We should do nothing.  */
1234         break;
1235
1236       case 1:
1237         /* This means that a file name has been seen.  Record it. */
1238         argbuffer[current_arg].arg_type = at_filename;
1239         argbuffer[current_arg].what     = optarg;
1240         ++current_arg;
1241         ++file_count;
1242         break;
1243
1244       case STDIN:
1245         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1246         argbuffer[current_arg].arg_type = at_stdin;
1247         argbuffer[current_arg].what     = optarg;
1248         ++current_arg;
1249         ++file_count;
1250         if (parsing_stdin)
1251           fatal ("cannot parse standard input more than once", (char *)NULL);
1252         parsing_stdin = TRUE;
1253         break;
1254
1255         /* Common options. */
1256       case 'a': append_to_tagfile = TRUE;       break;
1257       case 'C': cplusplus = TRUE;               break;
1258       case 'f':         /* for compatibility with old makefiles */
1259       case 'o':
1260         if (tagfile)
1261           {
1262             error ("-o option may only be given once.", (char *)NULL);
1263             suggest_asking_for_help ();
1264             /* NOTREACHED */
1265           }
1266         tagfile = optarg;
1267         break;
1268       case 'I':
1269       case 'S':         /* for backward compatibility */
1270         ignoreindent = TRUE;
1271         break;
1272       case 'l':
1273         {
1274           language *lang = get_language_from_langname (optarg);
1275           if (lang != NULL)
1276             {
1277               argbuffer[current_arg].lang = lang;
1278               argbuffer[current_arg].arg_type = at_language;
1279               ++current_arg;
1280             }
1281         }
1282         break;
1283       case 'c':
1284         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1285         optarg = concat (optarg, "i", ""); /* memory leak here */
1286         /* FALLTHRU */
1287       case 'r':
1288         argbuffer[current_arg].arg_type = at_regexp;
1289         argbuffer[current_arg].what = optarg;
1290         ++current_arg;
1291         break;
1292       case 'R':
1293         argbuffer[current_arg].arg_type = at_regexp;
1294         argbuffer[current_arg].what = NULL;
1295         ++current_arg;
1296         break;
1297       case 'V':
1298         print_version ();
1299         break;
1300       case 'h':
1301       case 'H':
1302         help_asked = TRUE;
1303         break;
1304
1305         /* Etags options */
1306       case 'D': constantypedefs = FALSE;                        break;
1307       case 'i': included_files[nincluded_files++] = optarg;     break;
1308
1309         /* Ctags options. */
1310       case 'B': searchar = '?';                                 break;
1311       case 'd': constantypedefs = TRUE;                         break;
1312       case 't': typedefs = TRUE;                                break;
1313       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1314       case 'u': update = TRUE;                                  break;
1315       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1316       case 'x': cxref_style = TRUE;                             break;
1317       case 'w': no_warnings = TRUE;                             break;
1318       default:
1319         suggest_asking_for_help ();
1320         /* NOTREACHED */
1321       }
1322
1323   /* No more options.  Store the rest of arguments. */
1324   for (; optind < argc; optind++)
1325     {
1326       argbuffer[current_arg].arg_type = at_filename;
1327       argbuffer[current_arg].what = argv[optind];
1328       ++current_arg;
1329       ++file_count;
1330     }
1331
1332   argbuffer[current_arg].arg_type = at_end;
1333
1334   if (help_asked)
1335     print_help (argbuffer);
1336     /* NOTREACHED */
1337
1338   if (nincluded_files == 0 && file_count == 0)
1339     {
1340       error ("no input files specified.", (char *)NULL);
1341       suggest_asking_for_help ();
1342       /* NOTREACHED */
1343     }
1344
1345   if (tagfile == NULL)
1346     tagfile = CTAGS ? "tags" : "TAGS";
1347   cwd = etags_getcwd ();        /* the current working directory */
1348   if (cwd[strlen (cwd) - 1] != '/')
1349     {
1350       char *oldcwd = cwd;
1351       cwd = concat (oldcwd, "/", "");
1352       free (oldcwd);
1353     }
1354   /* Relative file names are made relative to the current directory. */
1355   if (streq (tagfile, "-")
1356       || strneq (tagfile, "/dev/", 5))
1357     tagfiledir = cwd;
1358   else
1359     tagfiledir = absolute_dirname (tagfile, cwd);
1360
1361   init ();                      /* set up boolean "functions" */
1362
1363   linebuffer_init (&lb);
1364   linebuffer_init (&filename_lb);
1365   linebuffer_init (&filebuf);
1366   linebuffer_init (&token_name);
1367
1368   if (!CTAGS)
1369     {
1370       if (streq (tagfile, "-"))
1371         {
1372           tagf = stdout;
1373 #ifdef DOS_NT
1374           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1375              doesn't take effect until after `stdout' is already open). */
1376           if (!isatty (fileno (stdout)))
1377             setmode (fileno (stdout), O_BINARY);
1378 #endif /* DOS_NT */
1379         }
1380       else
1381         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1382       if (tagf == NULL)
1383         pfatal (tagfile);
1384     }
1385
1386   /*
1387    * Loop through files finding functions.
1388    */
1389   for (i = 0; i < current_arg; i++)
1390     {
1391       static language *lang;    /* non-NULL if language is forced */
1392       char *this_file;
1393
1394       switch (argbuffer[i].arg_type)
1395         {
1396         case at_language:
1397           lang = argbuffer[i].lang;
1398           break;
1399         case at_regexp:
1400           analyse_regex (argbuffer[i].what);
1401           break;
1402         case at_filename:
1403 #ifdef VMS
1404           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1405             {
1406               if (got_err)
1407                 {
1408                   error ("can't find file %s\n", this_file);
1409                   argc--, argv++;
1410                 }
1411               else
1412                 {
1413                   this_file = massage_name (this_file);
1414                 }
1415 #else
1416               this_file = argbuffer[i].what;
1417 #endif
1418               /* Input file named "-" means read file names from stdin
1419                  (one per line) and use them. */
1420               if (streq (this_file, "-"))
1421                 {
1422                   if (parsing_stdin)
1423                     fatal ("cannot parse standard input AND read file names from it",
1424                            (char *)NULL);
1425                   while (readline_internal (&filename_lb, stdin) > 0)
1426                     process_file_name (filename_lb.buffer, lang);
1427                 }
1428               else
1429                 process_file_name (this_file, lang);
1430 #ifdef VMS
1431             }
1432 #endif
1433           break;
1434         case at_stdin:
1435           this_file = argbuffer[i].what;
1436           process_file (stdin, this_file, lang);
1437           break;
1438         }
1439     }
1440
1441   free_regexps ();
1442   free (lb.buffer);
1443   free (filebuf.buffer);
1444   free (token_name.buffer);
1445
1446   if (!CTAGS || cxref_style)
1447     {
1448       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1449       put_entries (nodehead);
1450       free_tree (nodehead);
1451       nodehead = NULL;
1452       if (!CTAGS)
1453         {
1454           fdesc *fdp;
1455
1456           /* Output file entries that have no tags. */
1457           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1458             if (!fdp->written)
1459               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1460
1461           while (nincluded_files-- > 0)
1462             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1463
1464           if (fclose (tagf) == EOF)
1465             pfatal (tagfile);
1466         }
1467
1468       exit (EXIT_SUCCESS);
1469     }
1470
1471   if (update)
1472     {
1473       char cmd[BUFSIZ];
1474       for (i = 0; i < current_arg; ++i)
1475         {
1476           switch (argbuffer[i].arg_type)
1477             {
1478             case at_filename:
1479             case at_stdin:
1480               break;
1481             default:
1482               continue;         /* the for loop */
1483             }
1484           sprintf (cmd,
1485                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1486                    tagfile, argbuffer[i].what, tagfile);
1487           if (system (cmd) != EXIT_SUCCESS)
1488             fatal ("failed to execute shell command", (char *)NULL);
1489         }
1490       append_to_tagfile = TRUE;
1491     }
1492
1493   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1494   if (tagf == NULL)
1495     pfatal (tagfile);
1496   put_entries (nodehead);       /* write all the tags (CTAGS) */
1497   free_tree (nodehead);
1498   nodehead = NULL;
1499   if (fclose (tagf) == EOF)
1500     pfatal (tagfile);
1501
1502   if (CTAGS)
1503     if (append_to_tagfile || update)
1504       {
1505         char cmd[2*BUFSIZ+20];
1506         /* Maybe these should be used:
1507            setenv ("LC_COLLATE", "C", 1);
1508            setenv ("LC_ALL", "C", 1); */
1509         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1510         exit (system (cmd));
1511       }
1512   return EXIT_SUCCESS;
1513 }
1514
1515
1516 /*
1517  * Return a compressor given the file name.  If EXTPTR is non-zero,
1518  * return a pointer into FILE where the compressor-specific
1519  * extension begins.  If no compressor is found, NULL is returned
1520  * and EXTPTR is not significant.
1521  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1522  */
1523 static compressor *
1524 get_compressor_from_suffix (file, extptr)
1525      char *file;
1526      char **extptr;
1527 {
1528   compressor *compr;
1529   char *slash, *suffix;
1530
1531   /* This relies on FN to be after canonicalize_filename,
1532      so we don't need to consider backslashes on DOS_NT.  */
1533   slash = etags_strrchr (file, '/');
1534   suffix = etags_strrchr (file, '.');
1535   if (suffix == NULL || suffix < slash)
1536     return NULL;
1537   if (extptr != NULL)
1538     *extptr = suffix;
1539   suffix += 1;
1540   /* Let those poor souls who live with DOS 8+3 file name limits get
1541      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1542      Only the first do loop is run if not MSDOS */
1543   do
1544     {
1545       for (compr = compressors; compr->suffix != NULL; compr++)
1546         if (streq (compr->suffix, suffix))
1547           return compr;
1548       if (!MSDOS)
1549         break;                  /* do it only once: not really a loop */
1550       if (extptr != NULL)
1551         *extptr = ++suffix;
1552     } while (*suffix != '\0');
1553   return NULL;
1554 }
1555
1556
1557
1558 /*
1559  * Return a language given the name.
1560  */
1561 static language *
1562 get_language_from_langname (name)
1563      const char *name;
1564 {
1565   language *lang;
1566
1567   if (name == NULL)
1568     error ("empty language name", (char *)NULL);
1569   else
1570     {
1571       for (lang = lang_names; lang->name != NULL; lang++)
1572         if (streq (name, lang->name))
1573           return lang;
1574       error ("unknown language \"%s\"", name);
1575     }
1576
1577   return NULL;
1578 }
1579
1580
1581 /*
1582  * Return a language given the interpreter name.
1583  */
1584 static language *
1585 get_language_from_interpreter (interpreter)
1586      char *interpreter;
1587 {
1588   language *lang;
1589   char **iname;
1590
1591   if (interpreter == NULL)
1592     return NULL;
1593   for (lang = lang_names; lang->name != NULL; lang++)
1594     if (lang->interpreters != NULL)
1595       for (iname = lang->interpreters; *iname != NULL; iname++)
1596         if (streq (*iname, interpreter))
1597             return lang;
1598
1599   return NULL;
1600 }
1601
1602
1603
1604 /*
1605  * Return a language given the file name.
1606  */
1607 static language *
1608 get_language_from_filename (file, case_sensitive)
1609      char *file;
1610      bool case_sensitive;
1611 {
1612   language *lang;
1613   char **name, **ext, *suffix;
1614
1615   /* Try whole file name first. */
1616   for (lang = lang_names; lang->name != NULL; lang++)
1617     if (lang->filenames != NULL)
1618       for (name = lang->filenames; *name != NULL; name++)
1619         if ((case_sensitive)
1620             ? streq (*name, file)
1621             : strcaseeq (*name, file))
1622           return lang;
1623
1624   /* If not found, try suffix after last dot. */
1625   suffix = etags_strrchr (file, '.');
1626   if (suffix == NULL)
1627     return NULL;
1628   suffix += 1;
1629   for (lang = lang_names; lang->name != NULL; lang++)
1630     if (lang->suffixes != NULL)
1631       for (ext = lang->suffixes; *ext != NULL; ext++)
1632         if ((case_sensitive)
1633             ? streq (*ext, suffix)
1634             : strcaseeq (*ext, suffix))
1635           return lang;
1636   return NULL;
1637 }
1638
1639 \f
1640 /*
1641  * This routine is called on each file argument.
1642  */
1643 static void
1644 process_file_name (file, lang)
1645      char *file;
1646      language *lang;
1647 {
1648   struct stat stat_buf;
1649   FILE *inf;
1650   fdesc *fdp;
1651   compressor *compr;
1652   char *compressed_name, *uncompressed_name;
1653   char *ext, *real_name;
1654   int retval;
1655
1656   canonicalize_filename (file);
1657   if (streq (file, tagfile) && !streq (tagfile, "-"))
1658     {
1659       error ("skipping inclusion of %s in self.", file);
1660       return;
1661     }
1662   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1663     {
1664       compressed_name = NULL;
1665       real_name = uncompressed_name = savestr (file);
1666     }
1667   else
1668     {
1669       real_name = compressed_name = savestr (file);
1670       uncompressed_name = savenstr (file, ext - file);
1671     }
1672
1673   /* If the canonicalized uncompressed name
1674      has already been dealt with, skip it silently. */
1675   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1676     {
1677       assert (fdp->infname != NULL);
1678       if (streq (uncompressed_name, fdp->infname))
1679         goto cleanup;
1680     }
1681
1682   if (stat (real_name, &stat_buf) != 0)
1683     {
1684       /* Reset real_name and try with a different name. */
1685       real_name = NULL;
1686       if (compressed_name != NULL) /* try with the given suffix */
1687         {
1688           if (stat (uncompressed_name, &stat_buf) == 0)
1689             real_name = uncompressed_name;
1690         }
1691       else                      /* try all possible suffixes */
1692         {
1693           for (compr = compressors; compr->suffix != NULL; compr++)
1694             {
1695               compressed_name = concat (file, ".", compr->suffix);
1696               if (stat (compressed_name, &stat_buf) != 0)
1697                 {
1698                   if (MSDOS)
1699                     {
1700                       char *suf = compressed_name + strlen (file);
1701                       size_t suflen = strlen (compr->suffix) + 1;
1702                       for ( ; suf[1]; suf++, suflen--)
1703                         {
1704                           memmove (suf, suf + 1, suflen);
1705                           if (stat (compressed_name, &stat_buf) == 0)
1706                             {
1707                               real_name = compressed_name;
1708                               break;
1709                             }
1710                         }
1711                       if (real_name != NULL)
1712                         break;
1713                     } /* MSDOS */
1714                   free (compressed_name);
1715                   compressed_name = NULL;
1716                 }
1717               else
1718                 {
1719                   real_name = compressed_name;
1720                   break;
1721                 }
1722             }
1723         }
1724       if (real_name == NULL)
1725         {
1726           perror (file);
1727           goto cleanup;
1728         }
1729     } /* try with a different name */
1730
1731   if (!S_ISREG (stat_buf.st_mode))
1732     {
1733       error ("skipping %s: it is not a regular file.", real_name);
1734       goto cleanup;
1735     }
1736   if (real_name == compressed_name)
1737     {
1738       char *cmd = concat (compr->command, " ", real_name);
1739       inf = (FILE *) popen (cmd, "r");
1740       free (cmd);
1741     }
1742   else
1743     inf = fopen (real_name, "r");
1744   if (inf == NULL)
1745     {
1746       perror (real_name);
1747       goto cleanup;
1748     }
1749
1750   process_file (inf, uncompressed_name, lang);
1751
1752   if (real_name == compressed_name)
1753     retval = pclose (inf);
1754   else
1755     retval = fclose (inf);
1756   if (retval < 0)
1757     pfatal (file);
1758
1759  cleanup:
1760   if (compressed_name) free (compressed_name);
1761   if (uncompressed_name) free (uncompressed_name);
1762   last_node = NULL;
1763   curfdp = NULL;
1764   return;
1765 }
1766
1767 static void
1768 process_file (fh, fn, lang)
1769      FILE *fh;
1770      char *fn;
1771      language *lang;
1772 {
1773   static const fdesc emptyfdesc;
1774   fdesc *fdp;
1775
1776   /* Create a new input file description entry. */
1777   fdp = xnew (1, fdesc);
1778   *fdp = emptyfdesc;
1779   fdp->next = fdhead;
1780   fdp->infname = savestr (fn);
1781   fdp->lang = lang;
1782   fdp->infabsname = absolute_filename (fn, cwd);
1783   fdp->infabsdir = absolute_dirname (fn, cwd);
1784   if (filename_is_absolute (fn))
1785     {
1786       /* An absolute file name.  Canonicalize it. */
1787       fdp->taggedfname = absolute_filename (fn, NULL);
1788     }
1789   else
1790     {
1791       /* A file name relative to cwd.  Make it relative
1792          to the directory of the tags file. */
1793       fdp->taggedfname = relative_filename (fn, tagfiledir);
1794     }
1795   fdp->usecharno = TRUE;        /* use char position when making tags */
1796   fdp->prop = NULL;
1797   fdp->written = FALSE;         /* not written on tags file yet */
1798
1799   fdhead = fdp;
1800   curfdp = fdhead;              /* the current file description */
1801
1802   find_entries (fh);
1803
1804   /* If not Ctags, and if this is not metasource and if it contained no #line
1805      directives, we can write the tags and free all nodes pointing to
1806      curfdp. */
1807   if (!CTAGS
1808       && curfdp->usecharno      /* no #line directives in this file */
1809       && !curfdp->lang->metasource)
1810     {
1811       node *np, *prev;
1812
1813       /* Look for the head of the sublist relative to this file.  See add_node
1814          for the structure of the node tree. */
1815       prev = NULL;
1816       for (np = nodehead; np != NULL; prev = np, np = np->left)
1817         if (np->fdp == curfdp)
1818           break;
1819
1820       /* If we generated tags for this file, write and delete them. */
1821       if (np != NULL)
1822         {
1823           /* This is the head of the last sublist, if any.  The following
1824              instructions depend on this being true. */
1825           assert (np->left == NULL);
1826
1827           assert (fdhead == curfdp);
1828           assert (last_node->fdp == curfdp);
1829           put_entries (np);     /* write tags for file curfdp->taggedfname */
1830           free_tree (np);       /* remove the written nodes */
1831           if (prev == NULL)
1832             nodehead = NULL;    /* no nodes left */
1833           else
1834             prev->left = NULL;  /* delete the pointer to the sublist */
1835         }
1836     }
1837 }
1838
1839 /*
1840  * This routine sets up the boolean pseudo-functions which work
1841  * by setting boolean flags dependent upon the corresponding character.
1842  * Every char which is NOT in that string is not a white char.  Therefore,
1843  * all of the array "_wht" is set to FALSE, and then the elements
1844  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1845  * of a char is TRUE if it is the string "white", else FALSE.
1846  */
1847 static void
1848 init ()
1849 {
1850   register char *sp;
1851   register int i;
1852
1853   for (i = 0; i < CHARS; i++)
1854     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1855   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1856   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1857   notinname('\0') = notinname('\n');
1858   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1859   begtoken('\0') = begtoken('\n');
1860   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1861   intoken('\0') = intoken('\n');
1862   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1863   endtoken('\0') = endtoken('\n');
1864 }
1865
1866 /*
1867  * This routine opens the specified file and calls the function
1868  * which finds the function and type definitions.
1869  */
1870 static void
1871 find_entries (inf)
1872      FILE *inf;
1873 {
1874   char *cp;
1875   language *lang = curfdp->lang;
1876   Lang_function *parser = NULL;
1877
1878   /* If user specified a language, use it. */
1879   if (lang != NULL && lang->function != NULL)
1880     {
1881       parser = lang->function;
1882     }
1883
1884   /* Else try to guess the language given the file name. */
1885   if (parser == NULL)
1886     {
1887       lang = get_language_from_filename (curfdp->infname, TRUE);
1888       if (lang != NULL && lang->function != NULL)
1889         {
1890           curfdp->lang = lang;
1891           parser = lang->function;
1892         }
1893     }
1894
1895   /* Else look for sharp-bang as the first two characters. */
1896   if (parser == NULL
1897       && readline_internal (&lb, inf) > 0
1898       && lb.len >= 2
1899       && lb.buffer[0] == '#'
1900       && lb.buffer[1] == '!')
1901     {
1902       char *lp;
1903
1904       /* Set lp to point at the first char after the last slash in the
1905          line or, if no slashes, at the first nonblank.  Then set cp to
1906          the first successive blank and terminate the string. */
1907       lp = etags_strrchr (lb.buffer+2, '/');
1908       if (lp != NULL)
1909         lp += 1;
1910       else
1911         lp = skip_spaces (lb.buffer + 2);
1912       cp = skip_non_spaces (lp);
1913       *cp = '\0';
1914
1915       if (strlen (lp) > 0)
1916         {
1917           lang = get_language_from_interpreter (lp);
1918           if (lang != NULL && lang->function != NULL)
1919             {
1920               curfdp->lang = lang;
1921               parser = lang->function;
1922             }
1923         }
1924     }
1925
1926   /* We rewind here, even if inf may be a pipe.  We fail if the
1927      length of the first line is longer than the pipe block size,
1928      which is unlikely. */
1929   rewind (inf);
1930
1931   /* Else try to guess the language given the case insensitive file name. */
1932   if (parser == NULL)
1933     {
1934       lang = get_language_from_filename (curfdp->infname, FALSE);
1935       if (lang != NULL && lang->function != NULL)
1936         {
1937           curfdp->lang = lang;
1938           parser = lang->function;
1939         }
1940     }
1941
1942   /* Else try Fortran or C. */
1943   if (parser == NULL)
1944     {
1945       node *old_last_node = last_node;
1946
1947       curfdp->lang = get_language_from_langname ("fortran");
1948       find_entries (inf);
1949
1950       if (old_last_node == last_node)
1951         /* No Fortran entries found.  Try C. */
1952         {
1953           /* We do not tag if rewind fails.
1954              Only the file name will be recorded in the tags file. */
1955           rewind (inf);
1956           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1957           find_entries (inf);
1958         }
1959       return;
1960     }
1961
1962   if (!no_line_directive
1963       && curfdp->lang != NULL && curfdp->lang->metasource)
1964     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1965        file, or anyway we parsed a file that is automatically generated from
1966        this one.  If this is the case, the bingo.c file contained #line
1967        directives that generated tags pointing to this file.  Let's delete
1968        them all before parsing this file, which is the real source. */
1969     {
1970       fdesc **fdpp = &fdhead;
1971       while (*fdpp != NULL)
1972         if (*fdpp != curfdp
1973             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1974           /* We found one of those!  We must delete both the file description
1975              and all tags referring to it. */
1976           {
1977             fdesc *badfdp = *fdpp;
1978
1979             /* Delete the tags referring to badfdp->taggedfname
1980                that were obtained from badfdp->infname. */
1981             invalidate_nodes (badfdp, &nodehead);
1982
1983             *fdpp = badfdp->next; /* remove the bad description from the list */
1984             free_fdesc (badfdp);
1985           }
1986         else
1987           fdpp = &(*fdpp)->next; /* advance the list pointer */
1988     }
1989
1990   assert (parser != NULL);
1991
1992   /* Generic initialisations before reading from file. */
1993   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1994
1995   /* Generic initialisations before parsing file with readline. */
1996   lineno = 0;                  /* reset global line number */
1997   charno = 0;                  /* reset global char number */
1998   linecharno = 0;              /* reset global char number of line start */
1999
2000   parser (inf);
2001
2002   regex_tag_multiline ();
2003 }
2004
2005 \f
2006 /*
2007  * Check whether an implicitly named tag should be created,
2008  * then call `pfnote'.
2009  * NAME is a string that is internally copied by this function.
2010  *
2011  * TAGS format specification
2012  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2013  * The following is explained in some more detail in etc/ETAGS.EBNF.
2014  *
2015  * make_tag creates tags with "implicit tag names" (unnamed tags)
2016  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2017  *  1. NAME does not contain any of the characters in NONAM;
2018  *  2. LINESTART contains name as either a rightmost, or rightmost but
2019  *     one character, substring;
2020  *  3. the character, if any, immediately before NAME in LINESTART must
2021  *     be a character in NONAM;
2022  *  4. the character, if any, immediately after NAME in LINESTART must
2023  *     also be a character in NONAM.
2024  *
2025  * The implementation uses the notinname() macro, which recognises the
2026  * characters stored in the string `nonam'.
2027  * etags.el needs to use the same characters that are in NONAM.
2028  */
2029 static void
2030 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2031      char *name;                /* tag name, or NULL if unnamed */
2032      int namelen;               /* tag length */
2033      bool is_func;              /* tag is a function */
2034      char *linestart;           /* start of the line where tag is */
2035      int linelen;               /* length of the line where tag is */
2036      int lno;                   /* line number */
2037      long cno;                  /* character number */
2038 {
2039   bool named = (name != NULL && namelen > 0);
2040
2041   if (!CTAGS && named)          /* maybe set named to false */
2042     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2043        such that etags.el can guess a name from it. */
2044     {
2045       int i;
2046       register char *cp = name;
2047
2048       for (i = 0; i < namelen; i++)
2049         if (notinname (*cp++))
2050           break;
2051       if (i == namelen)                         /* rule #1 */
2052         {
2053           cp = linestart + linelen - namelen;
2054           if (notinname (linestart[linelen-1]))
2055             cp -= 1;                            /* rule #4 */
2056           if (cp >= linestart                   /* rule #2 */
2057               && (cp == linestart
2058                   || notinname (cp[-1]))        /* rule #3 */
2059               && strneq (name, cp, namelen))    /* rule #2 */
2060             named = FALSE;      /* use implicit tag name */
2061         }
2062     }
2063
2064   if (named)
2065     name = savenstr (name, namelen);
2066   else
2067     name = NULL;
2068   pfnote (name, is_func, linestart, linelen, lno, cno);
2069 }
2070
2071 /* Record a tag. */
2072 static void
2073 pfnote (name, is_func, linestart, linelen, lno, cno)
2074      char *name;                /* tag name, or NULL if unnamed */
2075      bool is_func;              /* tag is a function */
2076      char *linestart;           /* start of the line where tag is */
2077      int linelen;               /* length of the line where tag is */
2078      int lno;                   /* line number */
2079      long cno;                  /* character number */
2080 {
2081   register node *np;
2082
2083   assert (name == NULL || name[0] != '\0');
2084   if (CTAGS && name == NULL)
2085     return;
2086
2087   np = xnew (1, node);
2088
2089   /* If ctags mode, change name "main" to M<thisfilename>. */
2090   if (CTAGS && !cxref_style && streq (name, "main"))
2091     {
2092       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2093       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2094       fp = etags_strrchr (np->name, '.');
2095       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2096         fp[0] = '\0';
2097     }
2098   else
2099     np->name = name;
2100   np->valid = TRUE;
2101   np->been_warned = FALSE;
2102   np->fdp = curfdp;
2103   np->is_func = is_func;
2104   np->lno = lno;
2105   if (np->fdp->usecharno)
2106     /* Our char numbers are 0-base, because of C language tradition?
2107        ctags compatibility?  old versions compatibility?   I don't know.
2108        Anyway, since emacs's are 1-base we expect etags.el to take care
2109        of the difference.  If we wanted to have 1-based numbers, we would
2110        uncomment the +1 below. */
2111     np->cno = cno /* + 1 */ ;
2112   else
2113     np->cno = invalidcharno;
2114   np->left = np->right = NULL;
2115   if (CTAGS && !cxref_style)
2116     {
2117       if (strlen (linestart) < 50)
2118         np->regex = concat (linestart, "$", "");
2119       else
2120         np->regex = savenstr (linestart, 50);
2121     }
2122   else
2123     np->regex = savenstr (linestart, linelen);
2124
2125   add_node (np, &nodehead);
2126 }
2127
2128 /*
2129  * free_tree ()
2130  *      recurse on left children, iterate on right children.
2131  */
2132 static void
2133 free_tree (np)
2134      register node *np;
2135 {
2136   while (np)
2137     {
2138       register node *node_right = np->right;
2139       free_tree (np->left);
2140       if (np->name != NULL)
2141         free (np->name);
2142       free (np->regex);
2143       free (np);
2144       np = node_right;
2145     }
2146 }
2147
2148 /*
2149  * free_fdesc ()
2150  *      delete a file description
2151  */
2152 static void
2153 free_fdesc (fdp)
2154      register fdesc *fdp;
2155 {
2156   if (fdp->infname != NULL) free (fdp->infname);
2157   if (fdp->infabsname != NULL) free (fdp->infabsname);
2158   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2159   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2160   if (fdp->prop != NULL) free (fdp->prop);
2161   free (fdp);
2162 }
2163
2164 /*
2165  * add_node ()
2166  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2167  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2168  *      balancing.
2169  *
2170  *      add_node is the only function allowed to add nodes, so it can
2171  *      maintain state.
2172  */
2173 static void
2174 add_node (np, cur_node_p)
2175      node *np, **cur_node_p;
2176 {
2177   register int dif;
2178   register node *cur_node = *cur_node_p;
2179
2180   if (cur_node == NULL)
2181     {
2182       *cur_node_p = np;
2183       last_node = np;
2184       return;
2185     }
2186
2187   if (!CTAGS)
2188     /* Etags Mode */
2189     {
2190       /* For each file name, tags are in a linked sublist on the right
2191          pointer.  The first tags of different files are a linked list
2192          on the left pointer.  last_node points to the end of the last
2193          used sublist. */
2194       if (last_node != NULL && last_node->fdp == np->fdp)
2195         {
2196           /* Let's use the same sublist as the last added node. */
2197           assert (last_node->right == NULL);
2198           last_node->right = np;
2199           last_node = np;
2200         }
2201       else if (cur_node->fdp == np->fdp)
2202         {
2203           /* Scanning the list we found the head of a sublist which is
2204              good for us.  Let's scan this sublist. */
2205           add_node (np, &cur_node->right);
2206         }
2207       else
2208         /* The head of this sublist is not good for us.  Let's try the
2209            next one. */
2210         add_node (np, &cur_node->left);
2211     } /* if ETAGS mode */
2212
2213   else
2214     {
2215       /* Ctags Mode */
2216       dif = strcmp (np->name, cur_node->name);
2217
2218       /*
2219        * If this tag name matches an existing one, then
2220        * do not add the node, but maybe print a warning.
2221        */
2222       if (no_duplicates && !dif)
2223         {
2224           if (np->fdp == cur_node->fdp)
2225             {
2226               if (!no_warnings)
2227                 {
2228                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2229                            np->fdp->infname, lineno, np->name);
2230                   fprintf (stderr, "Second entry ignored\n");
2231                 }
2232             }
2233           else if (!cur_node->been_warned && !no_warnings)
2234             {
2235               fprintf
2236                 (stderr,
2237                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2238                  np->fdp->infname, cur_node->fdp->infname, np->name);
2239               cur_node->been_warned = TRUE;
2240             }
2241           return;
2242         }
2243
2244       /* Actually add the node */
2245       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2246     } /* if CTAGS mode */
2247 }
2248
2249 /*
2250  * invalidate_nodes ()
2251  *      Scan the node tree and invalidate all nodes pointing to the
2252  *      given file description (CTAGS case) or free them (ETAGS case).
2253  */
2254 static void
2255 invalidate_nodes (badfdp, npp)
2256      fdesc *badfdp;
2257      node **npp;
2258 {
2259   node *np = *npp;
2260
2261   if (np == NULL)
2262     return;
2263
2264   if (CTAGS)
2265     {
2266       if (np->left != NULL)
2267         invalidate_nodes (badfdp, &np->left);
2268       if (np->fdp == badfdp)
2269         np->valid = FALSE;
2270       if (np->right != NULL)
2271         invalidate_nodes (badfdp, &np->right);
2272     }
2273   else
2274     {
2275       assert (np->fdp != NULL);
2276       if (np->fdp == badfdp)
2277         {
2278           *npp = np->left;      /* detach the sublist from the list */
2279           np->left = NULL;      /* isolate it */
2280           free_tree (np);       /* free it */
2281           invalidate_nodes (badfdp, npp);
2282         }
2283       else
2284         invalidate_nodes (badfdp, &np->left);
2285     }
2286 }
2287
2288 \f
2289 static int total_size_of_entries __P((node *));
2290 static int number_len __P((long));
2291
2292 /* Length of a non-negative number's decimal representation. */
2293 static int
2294 number_len (num)
2295      long num;
2296 {
2297   int len = 1;
2298   while ((num /= 10) > 0)
2299     len += 1;
2300   return len;
2301 }
2302
2303 /*
2304  * Return total number of characters that put_entries will output for
2305  * the nodes in the linked list at the right of the specified node.
2306  * This count is irrelevant with etags.el since emacs 19.34 at least,
2307  * but is still supplied for backward compatibility.
2308  */
2309 static int
2310 total_size_of_entries (np)
2311      register node *np;
2312 {
2313   register int total = 0;
2314
2315   for (; np != NULL; np = np->right)
2316     if (np->valid)
2317       {
2318         total += strlen (np->regex) + 1;                /* pat\177 */
2319         if (np->name != NULL)
2320           total += strlen (np->name) + 1;               /* name\001 */
2321         total += number_len ((long) np->lno) + 1;       /* lno, */
2322         if (np->cno != invalidcharno)                   /* cno */
2323           total += number_len (np->cno);
2324         total += 1;                                     /* newline */
2325       }
2326
2327   return total;
2328 }
2329
2330 static void
2331 put_entries (np)
2332      register node *np;
2333 {
2334   register char *sp;
2335   static fdesc *fdp = NULL;
2336
2337   if (np == NULL)
2338     return;
2339
2340   /* Output subentries that precede this one */
2341   if (CTAGS)
2342     put_entries (np->left);
2343
2344   /* Output this entry */
2345   if (np->valid)
2346     {
2347       if (!CTAGS)
2348         {
2349           /* Etags mode */
2350           if (fdp != np->fdp)
2351             {
2352               fdp = np->fdp;
2353               fprintf (tagf, "\f\n%s,%d\n",
2354                        fdp->taggedfname, total_size_of_entries (np));
2355               fdp->written = TRUE;
2356             }
2357           fputs (np->regex, tagf);
2358           fputc ('\177', tagf);
2359           if (np->name != NULL)
2360             {
2361               fputs (np->name, tagf);
2362               fputc ('\001', tagf);
2363             }
2364           fprintf (tagf, "%d,", np->lno);
2365           if (np->cno != invalidcharno)
2366             fprintf (tagf, "%ld", np->cno);
2367           fputs ("\n", tagf);
2368         }
2369       else
2370         {
2371           /* Ctags mode */
2372           if (np->name == NULL)
2373             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2374
2375           if (cxref_style)
2376             {
2377               if (vgrind_style)
2378                 fprintf (stdout, "%s %s %d\n",
2379                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2380               else
2381                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2382                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2383             }
2384           else
2385             {
2386               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2387
2388               if (np->is_func)
2389                 {               /* function or #define macro with args */
2390                   putc (searchar, tagf);
2391                   putc ('^', tagf);
2392
2393                   for (sp = np->regex; *sp; sp++)
2394                     {
2395                       if (*sp == '\\' || *sp == searchar)
2396                         putc ('\\', tagf);
2397                       putc (*sp, tagf);
2398                     }
2399                   putc (searchar, tagf);
2400                 }
2401               else
2402                 {               /* anything else; text pattern inadequate */
2403                   fprintf (tagf, "%d", np->lno);
2404                 }
2405               putc ('\n', tagf);
2406             }
2407         }
2408     } /* if this node contains a valid tag */
2409
2410   /* Output subentries that follow this one */
2411   put_entries (np->right);
2412   if (!CTAGS)
2413     put_entries (np->left);
2414 }
2415
2416 \f
2417 /* C extensions. */
2418 #define C_EXT   0x00fff         /* C extensions */
2419 #define C_PLAIN 0x00000         /* C */
2420 #define C_PLPL  0x00001         /* C++ */
2421 #define C_STAR  0x00003         /* C* */
2422 #define C_JAVA  0x00005         /* JAVA */
2423 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2424 #define YACC    0x10000         /* yacc file */
2425
2426 /*
2427  * The C symbol tables.
2428  */
2429 enum sym_type
2430 {
2431   st_none,
2432   st_C_objprot, st_C_objimpl, st_C_objend,
2433   st_C_gnumacro,
2434   st_C_ignore, st_C_attribute,
2435   st_C_javastruct,
2436   st_C_operator,
2437   st_C_class, st_C_template,
2438   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2439 };
2440
2441 static unsigned int hash __P((const char *, unsigned int));
2442 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2443 static enum sym_type C_symtype __P((char *, int, int));
2444
2445 /* Feed stuff between (but not including) %[ and %] lines to:
2446      gperf -m 5
2447 %[
2448 %compare-strncmp
2449 %enum
2450 %struct-type
2451 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2452 %%
2453 if,             0,                      st_C_ignore
2454 for,            0,                      st_C_ignore
2455 while,          0,                      st_C_ignore
2456 switch,         0,                      st_C_ignore
2457 return,         0,                      st_C_ignore
2458 __attribute__,  0,                      st_C_attribute
2459 @interface,     0,                      st_C_objprot
2460 @protocol,      0,                      st_C_objprot
2461 @implementation,0,                      st_C_objimpl
2462 @end,           0,                      st_C_objend
2463 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2464 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2465 friend,         C_PLPL,                 st_C_ignore
2466 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2467 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2468 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2469 class,          0,                      st_C_class
2470 namespace,      C_PLPL,                 st_C_struct
2471 domain,         C_STAR,                 st_C_struct
2472 union,          0,                      st_C_struct
2473 struct,         0,                      st_C_struct
2474 extern,         0,                      st_C_extern
2475 enum,           0,                      st_C_enum
2476 typedef,        0,                      st_C_typedef
2477 define,         0,                      st_C_define
2478 undef,          0,                      st_C_define
2479 operator,       C_PLPL,                 st_C_operator
2480 template,       0,                      st_C_template
2481 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2482 DEFUN,          0,                      st_C_gnumacro
2483 SYSCALL,        0,                      st_C_gnumacro
2484 ENTRY,          0,                      st_C_gnumacro
2485 PSEUDO,         0,                      st_C_gnumacro
2486 # These are defined inside C functions, so currently they are not met.
2487 # EXFUN used in glibc, DEFVAR_* in emacs.
2488 #EXFUN,         0,                      st_C_gnumacro
2489 #DEFVAR_,       0,                      st_C_gnumacro
2490 %]
2491 and replace lines between %< and %> with its output, then:
2492  - remove the #if characterset check
2493  - make in_word_set static and not inline. */
2494 /*%<*/
2495 /* C code produced by gperf version 3.0.1 */
2496 /* Command-line: gperf -m 5  */
2497 /* Computed positions: -k'2-3' */
2498
2499 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2500 /* maximum key range = 33, duplicates = 0 */
2501
2502 #ifdef __GNUC__
2503 __inline
2504 #else
2505 #ifdef __cplusplus
2506 inline
2507 #endif
2508 #endif
2509 static unsigned int
2510 hash (str, len)
2511      register const char *str;
2512      register unsigned int len;
2513 {
2514   static unsigned char asso_values[] =
2515     {
2516       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2517       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2518       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2519       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2520       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2521       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2522       35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2523       14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2524       35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2525       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2526       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2527        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2528        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2529       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2530       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2531       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2532       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2533       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2534       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2535       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2536       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2537       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2538       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2539       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2540       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2541       35, 35, 35, 35, 35, 35
2542     };
2543   register int hval = len;
2544
2545   switch (hval)
2546     {
2547       default:
2548         hval += asso_values[(unsigned char)str[2]];
2549       /*FALLTHROUGH*/
2550       case 2:
2551         hval += asso_values[(unsigned char)str[1]];
2552         break;
2553     }
2554   return hval;
2555 }
2556
2557 static struct C_stab_entry *
2558 in_word_set (str, len)
2559      register const char *str;
2560      register unsigned int len;
2561 {
2562   enum
2563     {
2564       TOTAL_KEYWORDS = 32,
2565       MIN_WORD_LENGTH = 2,
2566       MAX_WORD_LENGTH = 15,
2567       MIN_HASH_VALUE = 2,
2568       MAX_HASH_VALUE = 34
2569     };
2570
2571   static struct C_stab_entry wordlist[] =
2572     {
2573       {""}, {""},
2574       {"if",            0,                      st_C_ignore},
2575       {""},
2576       {"@end",          0,                      st_C_objend},
2577       {"union",         0,                      st_C_struct},
2578       {"define",                0,                      st_C_define},
2579       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2580       {"template",      0,                      st_C_template},
2581       {"operator",      C_PLPL,                 st_C_operator},
2582       {"@interface",    0,                      st_C_objprot},
2583       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2584       {"friend",                C_PLPL,                 st_C_ignore},
2585       {"typedef",       0,                      st_C_typedef},
2586       {"return",                0,                      st_C_ignore},
2587       {"@implementation",0,                     st_C_objimpl},
2588       {"@protocol",     0,                      st_C_objprot},
2589       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2590       {"extern",                0,                      st_C_extern},
2591       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2592       {"struct",                0,                      st_C_struct},
2593       {"domain",                C_STAR,                 st_C_struct},
2594       {"switch",                0,                      st_C_ignore},
2595       {"enum",          0,                      st_C_enum},
2596       {"for",           0,                      st_C_ignore},
2597       {"namespace",     C_PLPL,                 st_C_struct},
2598       {"class",         0,                      st_C_class},
2599       {"while",         0,                      st_C_ignore},
2600       {"undef",         0,                      st_C_define},
2601       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2602       {"__attribute__", 0,                      st_C_attribute},
2603       {"SYSCALL",       0,                      st_C_gnumacro},
2604       {"ENTRY",         0,                      st_C_gnumacro},
2605       {"PSEUDO",                0,                      st_C_gnumacro},
2606       {"DEFUN",         0,                      st_C_gnumacro}
2607     };
2608
2609   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2610     {
2611       register int key = hash (str, len);
2612
2613       if (key <= MAX_HASH_VALUE && key >= 0)
2614         {
2615           register const char *s = wordlist[key].name;
2616
2617           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2618             return &wordlist[key];
2619         }
2620     }
2621   return 0;
2622 }
2623 /*%>*/
2624
2625 static enum sym_type
2626 C_symtype (str, len, c_ext)
2627      char *str;
2628      int len;
2629      int c_ext;
2630 {
2631   register struct C_stab_entry *se = in_word_set (str, len);
2632
2633   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2634     return st_none;
2635   return se->type;
2636 }
2637
2638 \f
2639 /*
2640  * Ignoring __attribute__ ((list))
2641  */
2642 static bool inattribute;        /* looking at an __attribute__ construct */
2643
2644 /*
2645  * C functions and variables are recognized using a simple
2646  * finite automaton.  fvdef is its state variable.
2647  */
2648 static enum
2649 {
2650   fvnone,                       /* nothing seen */
2651   fdefunkey,                    /* Emacs DEFUN keyword seen */
2652   fdefunname,                   /* Emacs DEFUN name seen */
2653   foperator,                    /* func: operator keyword seen (cplpl) */
2654   fvnameseen,                   /* function or variable name seen */
2655   fstartlist,                   /* func: just after open parenthesis */
2656   finlist,                      /* func: in parameter list */
2657   flistseen,                    /* func: after parameter list */
2658   fignore,                      /* func: before open brace */
2659   vignore                       /* var-like: ignore until ';' */
2660 } fvdef;
2661
2662 static bool fvextern;           /* func or var: extern keyword seen; */
2663
2664 /*
2665  * typedefs are recognized using a simple finite automaton.
2666  * typdef is its state variable.
2667  */
2668 static enum
2669 {
2670   tnone,                        /* nothing seen */
2671   tkeyseen,                     /* typedef keyword seen */
2672   ttypeseen,                    /* defined type seen */
2673   tinbody,                      /* inside typedef body */
2674   tend,                         /* just before typedef tag */
2675   tignore                       /* junk after typedef tag */
2676 } typdef;
2677
2678 /*
2679  * struct-like structures (enum, struct and union) are recognized
2680  * using another simple finite automaton.  `structdef' is its state
2681  * variable.
2682  */
2683 static enum
2684 {
2685   snone,                        /* nothing seen yet,
2686                                    or in struct body if bracelev > 0 */
2687   skeyseen,                     /* struct-like keyword seen */
2688   stagseen,                     /* struct-like tag seen */
2689   scolonseen                    /* colon seen after struct-like tag */
2690 } structdef;
2691
2692 /*
2693  * When objdef is different from onone, objtag is the name of the class.
2694  */
2695 static char *objtag = "<uninited>";
2696
2697 /*
2698  * Yet another little state machine to deal with preprocessor lines.
2699  */
2700 static enum
2701 {
2702   dnone,                        /* nothing seen */
2703   dsharpseen,                   /* '#' seen as first char on line */
2704   ddefineseen,                  /* '#' and 'define' seen */
2705   dignorerest                   /* ignore rest of line */
2706 } definedef;
2707
2708 /*
2709  * State machine for Objective C protocols and implementations.
2710  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2711  */
2712 static enum
2713 {
2714   onone,                        /* nothing seen */
2715   oprotocol,                    /* @interface or @protocol seen */
2716   oimplementation,              /* @implementations seen */
2717   otagseen,                     /* class name seen */
2718   oparenseen,                   /* parenthesis before category seen */
2719   ocatseen,                     /* category name seen */
2720   oinbody,                      /* in @implementation body */
2721   omethodsign,                  /* in @implementation body, after +/- */
2722   omethodtag,                   /* after method name */
2723   omethodcolon,                 /* after method colon */
2724   omethodparm,                  /* after method parameter */
2725   oignore                       /* wait for @end */
2726 } objdef;
2727
2728
2729 /*
2730  * Use this structure to keep info about the token read, and how it
2731  * should be tagged.  Used by the make_C_tag function to build a tag.
2732  */
2733 static struct tok
2734 {
2735   char *line;                   /* string containing the token */
2736   int offset;                   /* where the token starts in LINE */
2737   int length;                   /* token length */
2738   /*
2739     The previous members can be used to pass strings around for generic
2740     purposes.  The following ones specifically refer to creating tags.  In this
2741     case the token contained here is the pattern that will be used to create a
2742     tag.
2743   */
2744   bool valid;                   /* do not create a tag; the token should be
2745                                    invalidated whenever a state machine is
2746                                    reset prematurely */
2747   bool named;                   /* create a named tag */
2748   int lineno;                   /* source line number of tag */
2749   long linepos;                 /* source char number of tag */
2750 } token;                        /* latest token read */
2751
2752 /*
2753  * Variables and functions for dealing with nested structures.
2754  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2755  */
2756 static void pushclass_above __P((int, char *, int));
2757 static void popclass_above __P((int));
2758 static void write_classname __P((linebuffer *, char *qualifier));
2759
2760 static struct {
2761   char **cname;                 /* nested class names */
2762   int *bracelev;                /* nested class brace level */
2763   int nl;                       /* class nesting level (elements used) */
2764   int size;                     /* length of the array */
2765 } cstack;                       /* stack for nested declaration tags */
2766 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2767 #define nestlev         (cstack.nl)
2768 /* After struct keyword or in struct body, not inside a nested function. */
2769 #define instruct        (structdef == snone && nestlev > 0                      \
2770                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2771
2772 static void
2773 pushclass_above (bracelev, str, len)
2774      int bracelev;
2775      char *str;
2776      int len;
2777 {
2778   int nl;
2779
2780   popclass_above (bracelev);
2781   nl = cstack.nl;
2782   if (nl >= cstack.size)
2783     {
2784       int size = cstack.size *= 2;
2785       xrnew (cstack.cname, size, char *);
2786       xrnew (cstack.bracelev, size, int);
2787     }
2788   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2789   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2790   cstack.bracelev[nl] = bracelev;
2791   cstack.nl = nl + 1;
2792 }
2793
2794 static void
2795 popclass_above (bracelev)
2796      int bracelev;
2797 {
2798   int nl;
2799
2800   for (nl = cstack.nl - 1;
2801        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2802        nl--)
2803     {
2804       if (cstack.cname[nl] != NULL)
2805         free (cstack.cname[nl]);
2806       cstack.nl = nl;
2807     }
2808 }
2809
2810 static void
2811 write_classname (cn, qualifier)
2812      linebuffer *cn;
2813      char *qualifier;
2814 {
2815   int i, len;
2816   int qlen = strlen (qualifier);
2817
2818   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2819     {
2820       len = 0;
2821       cn->len = 0;
2822       cn->buffer[0] = '\0';
2823     }
2824   else
2825     {
2826       len = strlen (cstack.cname[0]);
2827       linebuffer_setlen (cn, len);
2828       strcpy (cn->buffer, cstack.cname[0]);
2829     }
2830   for (i = 1; i < cstack.nl; i++)
2831     {
2832       char *s;
2833       int slen;
2834
2835       s = cstack.cname[i];
2836       if (s == NULL)
2837         continue;
2838       slen = strlen (s);
2839       len += slen + qlen;
2840       linebuffer_setlen (cn, len);
2841       strncat (cn->buffer, qualifier, qlen);
2842       strncat (cn->buffer, s, slen);
2843     }
2844 }
2845
2846 \f
2847 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2848 static void make_C_tag __P((bool));
2849
2850 /*
2851  * consider_token ()
2852  *      checks to see if the current token is at the start of a
2853  *      function or variable, or corresponds to a typedef, or
2854  *      is a struct/union/enum tag, or #define, or an enum constant.
2855  *
2856  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2857  *      with args.  C_EXTP points to which language we are looking at.
2858  *
2859  * Globals
2860  *      fvdef                   IN OUT
2861  *      structdef               IN OUT
2862  *      definedef               IN OUT
2863  *      typdef                  IN OUT
2864  *      objdef                  IN OUT
2865  */
2866
2867 static bool
2868 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2869      register char *str;        /* IN: token pointer */
2870      register int len;          /* IN: token length */
2871      register int c;            /* IN: first char after the token */
2872      int *c_extp;               /* IN, OUT: C extensions mask */
2873      int bracelev;              /* IN: brace level */
2874      int parlev;                /* IN: parenthesis level */
2875      bool *is_func_or_var;      /* OUT: function or variable found */
2876 {
2877   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2878      structtype is the type of the preceding struct-like keyword, and
2879      structbracelev is the brace level where it has been seen. */
2880   static enum sym_type structtype;
2881   static int structbracelev;
2882   static enum sym_type toktype;
2883
2884
2885   toktype = C_symtype (str, len, *c_extp);
2886
2887   /*
2888    * Skip __attribute__
2889    */
2890   if (toktype == st_C_attribute)
2891     {
2892       inattribute = TRUE;
2893       return FALSE;
2894      }
2895
2896    /*
2897     * Advance the definedef state machine.
2898     */
2899    switch (definedef)
2900      {
2901      case dnone:
2902        /* We're not on a preprocessor line. */
2903        if (toktype == st_C_gnumacro)
2904          {
2905            fvdef = fdefunkey;
2906            return FALSE;
2907          }
2908        break;
2909      case dsharpseen:
2910        if (toktype == st_C_define)
2911          {
2912            definedef = ddefineseen;
2913          }
2914        else
2915          {
2916            definedef = dignorerest;
2917          }
2918        return FALSE;
2919      case ddefineseen:
2920        /*
2921         * Make a tag for any macro, unless it is a constant
2922         * and constantypedefs is FALSE.
2923         */
2924        definedef = dignorerest;
2925        *is_func_or_var = (c == '(');
2926        if (!*is_func_or_var && !constantypedefs)
2927          return FALSE;
2928        else
2929          return TRUE;
2930      case dignorerest:
2931        return FALSE;
2932      default:
2933        error ("internal error: definedef value.", (char *)NULL);
2934      }
2935
2936    /*
2937     * Now typedefs
2938     */
2939    switch (typdef)
2940      {
2941      case tnone:
2942        if (toktype == st_C_typedef)
2943          {
2944            if (typedefs)
2945              typdef = tkeyseen;
2946            fvextern = FALSE;
2947            fvdef = fvnone;
2948            return FALSE;
2949          }
2950        break;
2951      case tkeyseen:
2952        switch (toktype)
2953          {
2954          case st_none:
2955          case st_C_class:
2956          case st_C_struct:
2957          case st_C_enum:
2958            typdef = ttypeseen;
2959          }
2960        break;
2961      case ttypeseen:
2962        if (structdef == snone && fvdef == fvnone)
2963          {
2964            fvdef = fvnameseen;
2965            return TRUE;
2966          }
2967        break;
2968      case tend:
2969        switch (toktype)
2970          {
2971          case st_C_class:
2972          case st_C_struct:
2973          case st_C_enum:
2974            return FALSE;
2975          }
2976        return TRUE;
2977      }
2978
2979    /*
2980     * This structdef business is NOT invoked when we are ctags and the
2981     * file is plain C.  This is because a struct tag may have the same
2982     * name as another tag, and this loses with ctags.
2983     */
2984    switch (toktype)
2985      {
2986      case st_C_javastruct:
2987        if (structdef == stagseen)
2988          structdef = scolonseen;
2989        return FALSE;
2990      case st_C_template:
2991      case st_C_class:
2992        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2993            && bracelev == 0
2994            && definedef == dnone && structdef == snone
2995            && typdef == tnone && fvdef == fvnone)
2996          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2997        if (toktype == st_C_template)
2998          break;
2999        /* FALLTHRU */
3000      case st_C_struct:
3001      case st_C_enum:
3002        if (parlev == 0
3003            && fvdef != vignore
3004            && (typdef == tkeyseen
3005                || (typedefs_or_cplusplus && structdef == snone)))
3006          {
3007            structdef = skeyseen;
3008            structtype = toktype;
3009            structbracelev = bracelev;
3010            if (fvdef == fvnameseen)
3011              fvdef = fvnone;
3012          }
3013        return FALSE;
3014      }
3015
3016    if (structdef == skeyseen)
3017      {
3018        structdef = stagseen;
3019        return TRUE;
3020      }
3021
3022    if (typdef != tnone)
3023      definedef = dnone;
3024
3025    /* Detect Objective C constructs. */
3026    switch (objdef)
3027      {
3028      case onone:
3029        switch (toktype)
3030          {
3031          case st_C_objprot:
3032            objdef = oprotocol;
3033            return FALSE;
3034          case st_C_objimpl:
3035            objdef = oimplementation;
3036            return FALSE;
3037          }
3038        break;
3039      case oimplementation:
3040        /* Save the class tag for functions or variables defined inside. */
3041        objtag = savenstr (str, len);
3042        objdef = oinbody;
3043        return FALSE;
3044      case oprotocol:
3045        /* Save the class tag for categories. */
3046        objtag = savenstr (str, len);
3047        objdef = otagseen;
3048        *is_func_or_var = TRUE;
3049        return TRUE;
3050      case oparenseen:
3051        objdef = ocatseen;
3052        *is_func_or_var = TRUE;
3053        return TRUE;
3054      case oinbody:
3055        break;
3056      case omethodsign:
3057        if (parlev == 0)
3058          {
3059            fvdef = fvnone;
3060            objdef = omethodtag;
3061            linebuffer_setlen (&token_name, len);
3062            strncpy (token_name.buffer, str, len);
3063            token_name.buffer[len] = '\0';
3064            return TRUE;
3065          }
3066        return FALSE;
3067      case omethodcolon:
3068        if (parlev == 0)
3069          objdef = omethodparm;
3070        return FALSE;
3071      case omethodparm:
3072        if (parlev == 0)
3073          {
3074            fvdef = fvnone;
3075            objdef = omethodtag;
3076            linebuffer_setlen (&token_name, token_name.len + len);
3077            strncat (token_name.buffer, str, len);
3078            return TRUE;
3079          }
3080        return FALSE;
3081      case oignore:
3082        if (toktype == st_C_objend)
3083          {
3084            /* Memory leakage here: the string pointed by objtag is
3085               never released, because many tests would be needed to
3086               avoid breaking on incorrect input code.  The amount of
3087               memory leaked here is the sum of the lengths of the
3088               class tags.
3089            free (objtag); */
3090            objdef = onone;
3091          }
3092        return FALSE;
3093      }
3094
3095    /* A function, variable or enum constant? */
3096    switch (toktype)
3097      {
3098      case st_C_extern:
3099        fvextern = TRUE;
3100        switch  (fvdef)
3101          {
3102          case finlist:
3103          case flistseen:
3104          case fignore:
3105          case vignore:
3106            break;
3107          default:
3108            fvdef = fvnone;
3109          }
3110        return FALSE;
3111      case st_C_ignore:
3112        fvextern = FALSE;
3113        fvdef = vignore;
3114        return FALSE;
3115      case st_C_operator:
3116        fvdef = foperator;
3117        *is_func_or_var = TRUE;
3118        return TRUE;
3119      case st_none:
3120        if (constantypedefs
3121            && structdef == snone
3122            && structtype == st_C_enum && bracelev > structbracelev)
3123          return TRUE;           /* enum constant */
3124        switch (fvdef)
3125          {
3126          case fdefunkey:
3127            if (bracelev > 0)
3128              break;
3129            fvdef = fdefunname;  /* GNU macro */
3130            *is_func_or_var = TRUE;
3131            return TRUE;
3132          case fvnone:
3133            switch (typdef)
3134              {
3135              case ttypeseen:
3136                return FALSE;
3137              case tnone:
3138                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3139                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3140                  {
3141                    fvdef = vignore;
3142                    return FALSE;
3143                  }
3144                break;
3145              }
3146           /* FALLTHRU */
3147           case fvnameseen:
3148           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3149             {
3150               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3151                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3152               fvdef = foperator;
3153               *is_func_or_var = TRUE;
3154               return TRUE;
3155             }
3156           if (bracelev > 0 && !instruct)
3157             break;
3158           fvdef = fvnameseen;   /* function or variable */
3159           *is_func_or_var = TRUE;
3160           return TRUE;
3161         }
3162       break;
3163     }
3164
3165   return FALSE;
3166 }
3167
3168 \f
3169 /*
3170  * C_entries often keeps pointers to tokens or lines which are older than
3171  * the line currently read.  By keeping two line buffers, and switching
3172  * them at end of line, it is possible to use those pointers.
3173  */
3174 static struct
3175 {
3176   long linepos;
3177   linebuffer lb;
3178 } lbs[2];
3179
3180 #define current_lb_is_new (newndx == curndx)
3181 #define switch_line_buffers() (curndx = 1 - curndx)
3182
3183 #define curlb (lbs[curndx].lb)
3184 #define newlb (lbs[newndx].lb)
3185 #define curlinepos (lbs[curndx].linepos)
3186 #define newlinepos (lbs[newndx].linepos)
3187
3188 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3189 #define cplpl (c_ext & C_PLPL)
3190 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3191
3192 #define CNL_SAVE_DEFINEDEF()                                            \
3193 do {                                                                    \
3194   curlinepos = charno;                                                  \
3195   readline (&curlb, inf);                                               \
3196   lp = curlb.buffer;                                                    \
3197   quotednl = FALSE;                                                     \
3198   newndx = curndx;                                                      \
3199 } while (0)
3200
3201 #define CNL()                                                           \
3202 do {                                                                    \
3203   CNL_SAVE_DEFINEDEF();                                                 \
3204   if (savetoken.valid)                                                  \
3205     {                                                                   \
3206       token = savetoken;                                                \
3207       savetoken.valid = FALSE;                                          \
3208     }                                                                   \
3209   definedef = dnone;                                                    \
3210 } while (0)
3211
3212
3213 static void
3214 make_C_tag (isfun)
3215      bool isfun;
3216 {
3217   /* This function is never called when token.valid is FALSE, but
3218      we must protect against invalid input or internal errors. */
3219   if (!DEBUG && !token.valid)
3220     return;
3221
3222   if (token.valid)
3223     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3224               token.offset+token.length+1, token.lineno, token.linepos);
3225   else                          /* this case is optimised away if !DEBUG */
3226     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3227               token_name.len + 17, isfun, token.line,
3228               token.offset+token.length+1, token.lineno, token.linepos);
3229
3230   token.valid = FALSE;
3231 }
3232
3233
3234 /*
3235  * C_entries ()
3236  *      This routine finds functions, variables, typedefs,
3237  *      #define's, enum constants and struct/union/enum definitions in
3238  *      C syntax and adds them to the list.
3239  */
3240 static void
3241 C_entries (c_ext, inf)
3242      int c_ext;                 /* extension of C */
3243      FILE *inf;                 /* input file */
3244 {
3245   register char c;              /* latest char read; '\0' for end of line */
3246   register char *lp;            /* pointer one beyond the character `c' */
3247   int curndx, newndx;           /* indices for current and new lb */
3248   register int tokoff;          /* offset in line of start of current token */
3249   register int toklen;          /* length of current token */
3250   char *qualifier;              /* string used to qualify names */
3251   int qlen;                     /* length of qualifier */
3252   int bracelev;                 /* current brace level */
3253   int bracketlev;               /* current bracket level */
3254   int parlev;                   /* current parenthesis level */
3255   int attrparlev;               /* __attribute__ parenthesis level */
3256   int templatelev;              /* current template level */
3257   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3258   bool incomm, inquote, inchar, quotednl, midtoken;
3259   bool yacc_rules;              /* in the rules part of a yacc file */
3260   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3261
3262
3263   linebuffer_init (&lbs[0].lb);
3264   linebuffer_init (&lbs[1].lb);
3265   if (cstack.size == 0)
3266     {
3267       cstack.size = (DEBUG) ? 1 : 4;
3268       cstack.nl = 0;
3269       cstack.cname = xnew (cstack.size, char *);
3270       cstack.bracelev = xnew (cstack.size, int);
3271     }
3272
3273   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3274   curndx = newndx = 0;
3275   lp = curlb.buffer;
3276   *lp = 0;
3277
3278   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3279   structdef = snone; definedef = dnone; objdef = onone;
3280   yacc_rules = FALSE;
3281   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3282   token.valid = savetoken.valid = FALSE;
3283   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3284   if (cjava)
3285     { qualifier = "."; qlen = 1; }
3286   else
3287     { qualifier = "::"; qlen = 2; }
3288
3289
3290   while (!feof (inf))
3291     {
3292       c = *lp++;
3293       if (c == '\\')
3294         {
3295           /* If we are at the end of the line, the next character is a
3296              '\0'; do not skip it, because it is what tells us
3297              to read the next line.  */
3298           if (*lp == '\0')
3299             {
3300               quotednl = TRUE;
3301               continue;
3302             }
3303           lp++;
3304           c = ' ';
3305         }
3306       else if (incomm)
3307         {
3308           switch (c)
3309             {
3310             case '*':
3311               if (*lp == '/')
3312                 {
3313                   c = *lp++;
3314                   incomm = FALSE;
3315                 }
3316               break;
3317             case '\0':
3318               /* Newlines inside comments do not end macro definitions in
3319                  traditional cpp. */
3320               CNL_SAVE_DEFINEDEF ();
3321               break;
3322             }
3323           continue;
3324         }
3325       else if (inquote)
3326         {
3327           switch (c)
3328             {
3329             case '"':
3330               inquote = FALSE;
3331               break;
3332             case '\0':
3333               /* Newlines inside strings do not end macro definitions
3334                  in traditional cpp, even though compilers don't
3335                  usually accept them. */
3336               CNL_SAVE_DEFINEDEF ();
3337               break;
3338             }
3339           continue;
3340         }
3341       else if (inchar)
3342         {
3343           switch (c)
3344             {
3345             case '\0':
3346               /* Hmmm, something went wrong. */
3347               CNL ();
3348               /* FALLTHRU */
3349             case '\'':
3350               inchar = FALSE;
3351               break;
3352             }
3353           continue;
3354         }
3355       else if (bracketlev > 0)
3356         {
3357           switch (c)
3358             {
3359             case ']':
3360               if (--bracketlev > 0)
3361                 continue;
3362               break;
3363             case '\0':
3364               CNL_SAVE_DEFINEDEF ();
3365               break;
3366             }
3367           continue;
3368         }
3369       else switch (c)
3370         {
3371         case '"':
3372           inquote = TRUE;
3373           if (inattribute)
3374             break;
3375           switch (fvdef)
3376             {
3377             case fdefunkey:
3378             case fstartlist:
3379             case finlist:
3380             case fignore:
3381             case vignore:
3382               break;
3383             default:
3384               fvextern = FALSE;
3385               fvdef = fvnone;
3386             }
3387           continue;
3388         case '\'':
3389           inchar = TRUE;
3390           if (inattribute)
3391             break;
3392           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3393             {
3394               fvextern = FALSE;
3395               fvdef = fvnone;
3396             }
3397           continue;
3398         case '/':
3399           if (*lp == '*')
3400             {
3401               lp++;
3402               incomm = TRUE;
3403               continue;
3404             }
3405           else if (/* cplpl && */ *lp == '/')
3406             {
3407               c = '\0';
3408               break;
3409             }
3410           else
3411             break;
3412         case '%':
3413           if ((c_ext & YACC) && *lp == '%')
3414             {
3415               /* Entering or exiting rules section in yacc file. */
3416               lp++;
3417               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3418               typdef = tnone; structdef = snone;
3419               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3420               bracelev = 0;
3421               yacc_rules = !yacc_rules;
3422               continue;
3423             }
3424           else
3425             break;
3426         case '#':
3427           if (definedef == dnone)
3428             {
3429               char *cp;
3430               bool cpptoken = TRUE;
3431
3432               /* Look back on this line.  If all blanks, or nonblanks
3433                  followed by an end of comment, this is a preprocessor
3434                  token. */
3435               for (cp = newlb.buffer; cp < lp-1; cp++)
3436                 if (!iswhite (*cp))
3437                   {
3438                     if (*cp == '*' && *(cp+1) == '/')
3439                       {
3440                         cp++;
3441                         cpptoken = TRUE;
3442                       }
3443                     else
3444                       cpptoken = FALSE;
3445                   }
3446               if (cpptoken)
3447                 definedef = dsharpseen;
3448             } /* if (definedef == dnone) */
3449           continue;
3450         case '[':
3451           bracketlev++;
3452             continue;
3453         } /* switch (c) */
3454
3455
3456       /* Consider token only if some involved conditions are satisfied. */
3457       if (typdef != tignore
3458           && definedef != dignorerest
3459           && fvdef != finlist
3460           && templatelev == 0
3461           && (definedef != dnone
3462               || structdef != scolonseen)
3463           && !inattribute)
3464         {
3465           if (midtoken)
3466             {
3467               if (endtoken (c))
3468                 {
3469                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3470                     /* This handles :: in the middle,
3471                        but not at the beginning of an identifier.
3472                        Also, space-separated :: is not recognised. */
3473                     {
3474                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3475                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3476                       lp += 2;
3477                       toklen += 2;
3478                       c = lp[-1];
3479                       goto still_in_token;
3480                     }
3481                   else
3482                     {
3483                       bool funorvar = FALSE;
3484
3485                       if (yacc_rules
3486                           || consider_token (newlb.buffer + tokoff, toklen, c,
3487                                              &c_ext, bracelev, parlev,
3488                                              &funorvar))
3489                         {
3490                           if (fvdef == foperator)
3491                             {
3492                               char *oldlp = lp;
3493                               lp = skip_spaces (lp-1);
3494                               if (*lp != '\0')
3495                                 lp += 1;
3496                               while (*lp != '\0'
3497                                      && !iswhite (*lp) && *lp != '(')
3498                                 lp += 1;
3499                               c = *lp++;
3500                               toklen += lp - oldlp;
3501                             }
3502                           token.named = FALSE;
3503                           if (!plainc
3504                               && nestlev > 0 && definedef == dnone)
3505                             /* in struct body */
3506                             {
3507                               write_classname (&token_name, qualifier);
3508                               linebuffer_setlen (&token_name,
3509                                                  token_name.len+qlen+toklen);
3510                               strcat (token_name.buffer, qualifier);
3511                               strncat (token_name.buffer,
3512                                        newlb.buffer + tokoff, toklen);
3513                               token.named = TRUE;
3514                             }
3515                           else if (objdef == ocatseen)
3516                             /* Objective C category */
3517                             {
3518                               int len = strlen (objtag) + 2 + toklen;
3519                               linebuffer_setlen (&token_name, len);
3520                               strcpy (token_name.buffer, objtag);
3521                               strcat (token_name.buffer, "(");
3522                               strncat (token_name.buffer,
3523                                        newlb.buffer + tokoff, toklen);
3524                               strcat (token_name.buffer, ")");
3525                               token.named = TRUE;
3526                             }
3527                           else if (objdef == omethodtag
3528                                    || objdef == omethodparm)
3529                             /* Objective C method */
3530                             {
3531                               token.named = TRUE;
3532                             }
3533                           else if (fvdef == fdefunname)
3534                             /* GNU DEFUN and similar macros */
3535                             {
3536                               bool defun = (newlb.buffer[tokoff] == 'F');
3537                               int off = tokoff;
3538                               int len = toklen;
3539
3540                               /* Rewrite the tag so that emacs lisp DEFUNs
3541                                  can be found by their elisp name */
3542                               if (defun)
3543                                 {
3544                                   off += 1;
3545                                   len -= 1;
3546                                 }
3547                               linebuffer_setlen (&token_name, len);
3548                               strncpy (token_name.buffer,
3549                                        newlb.buffer + off, len);
3550                               token_name.buffer[len] = '\0';
3551                               if (defun)
3552                                 while (--len >= 0)
3553                                   if (token_name.buffer[len] == '_')
3554                                     token_name.buffer[len] = '-';
3555                               token.named = defun;
3556                             }
3557                           else
3558                             {
3559                               linebuffer_setlen (&token_name, toklen);
3560                               strncpy (token_name.buffer,
3561                                        newlb.buffer + tokoff, toklen);
3562                               token_name.buffer[toklen] = '\0';
3563                               /* Name macros and members. */
3564                               token.named = (structdef == stagseen
3565                                              || typdef == ttypeseen
3566                                              || typdef == tend
3567                                              || (funorvar
3568                                                  && definedef == dignorerest)
3569                                              || (funorvar
3570                                                  && definedef == dnone
3571                                                  && structdef == snone
3572                                                  && bracelev > 0));
3573                             }
3574                           token.lineno = lineno;
3575                           token.offset = tokoff;
3576                           token.length = toklen;
3577                           token.line = newlb.buffer;
3578                           token.linepos = newlinepos;
3579                           token.valid = TRUE;
3580
3581                           if (definedef == dnone
3582                               && (fvdef == fvnameseen
3583                                   || fvdef == foperator
3584                                   || structdef == stagseen
3585                                   || typdef == tend
3586                                   || typdef == ttypeseen
3587                                   || objdef != onone))
3588                             {
3589                               if (current_lb_is_new)
3590                                 switch_line_buffers ();
3591                             }
3592                           else if (definedef != dnone
3593                                    || fvdef == fdefunname
3594                                    || instruct)
3595                             make_C_tag (funorvar);
3596                         }
3597                       else /* not yacc and consider_token failed */
3598                         {
3599                           if (inattribute && fvdef == fignore)
3600                             {
3601                               /* We have just met __attribute__ after a
3602                                  function parameter list: do not tag the
3603                                  function again. */
3604                               fvdef = fvnone;
3605                             }
3606                         }
3607                       midtoken = FALSE;
3608                     }
3609                 } /* if (endtoken (c)) */
3610               else if (intoken (c))
3611                 still_in_token:
3612                 {
3613                   toklen++;
3614                   continue;
3615                 }
3616             } /* if (midtoken) */
3617           else if (begtoken (c))
3618             {
3619               switch (definedef)
3620                 {
3621                 case dnone:
3622                   switch (fvdef)
3623                     {
3624                     case fstartlist:
3625                       /* This prevents tagging fb in
3626                          void (__attribute__((noreturn)) *fb) (void);
3627                          Fixing this is not easy and not very important. */
3628                       fvdef = finlist;
3629                       continue;
3630                     case flistseen:
3631                       if (plainc || declarations)
3632                         {
3633                           make_C_tag (TRUE); /* a function */
3634                           fvdef = fignore;
3635                         }
3636                       break;
3637                     }
3638                   if (structdef == stagseen && !cjava)
3639                     {
3640                       popclass_above (bracelev);
3641                       structdef = snone;
3642                     }
3643                   break;
3644                 case dsharpseen:
3645                   savetoken = token;
3646                   break;
3647                 }
3648               if (!yacc_rules || lp == newlb.buffer + 1)
3649                 {
3650                   tokoff = lp - 1 - newlb.buffer;
3651                   toklen = 1;
3652                   midtoken = TRUE;
3653                 }
3654               continue;
3655             } /* if (begtoken) */
3656         } /* if must look at token */
3657
3658
3659       /* Detect end of line, colon, comma, semicolon and various braces
3660          after having handled a token.*/
3661       switch (c)
3662         {
3663         case ':':
3664           if (inattribute)
3665             break;
3666           if (yacc_rules && token.offset == 0 && token.valid)
3667             {
3668               make_C_tag (FALSE); /* a yacc function */
3669               break;
3670             }
3671           if (definedef != dnone)
3672             break;
3673           switch (objdef)
3674             {
3675             case  otagseen:
3676               objdef = oignore;
3677               make_C_tag (TRUE); /* an Objective C class */
3678               break;
3679             case omethodtag:
3680             case omethodparm:
3681               objdef = omethodcolon;
3682               linebuffer_setlen (&token_name, token_name.len + 1);
3683               strcat (token_name.buffer, ":");
3684               break;
3685             }
3686           if (structdef == stagseen)
3687             {
3688               structdef = scolonseen;
3689               break;
3690             }
3691           /* Should be useless, but may be work as a safety net. */
3692           if (cplpl && fvdef == flistseen)
3693             {
3694               make_C_tag (TRUE); /* a function */
3695               fvdef = fignore;
3696               break;
3697             }
3698           break;
3699         case ';':
3700           if (definedef != dnone || inattribute)
3701             break;
3702           switch (typdef)
3703             {
3704             case tend:
3705             case ttypeseen:
3706               make_C_tag (FALSE); /* a typedef */
3707               typdef = tnone;
3708               fvdef = fvnone;
3709               break;
3710             case tnone:
3711             case tinbody:
3712             case tignore:
3713               switch (fvdef)
3714                 {
3715                 case fignore:
3716                   if (typdef == tignore || cplpl)
3717                     fvdef = fvnone;
3718                   break;
3719                 case fvnameseen:
3720                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3721                       || (members && instruct))
3722                     make_C_tag (FALSE); /* a variable */
3723                   fvextern = FALSE;
3724                   fvdef = fvnone;
3725                   token.valid = FALSE;
3726                   break;
3727                 case flistseen:
3728                   if ((declarations
3729                        && (cplpl || !instruct)
3730                        && (typdef == tnone || (typdef != tignore && instruct)))
3731                       || (members
3732                           && plainc && instruct))
3733                     make_C_tag (TRUE);  /* a function */
3734                   /* FALLTHRU */
3735                 default:
3736                   fvextern = FALSE;
3737                   fvdef = fvnone;
3738                   if (declarations
3739                        && cplpl && structdef == stagseen)
3740                     make_C_tag (FALSE); /* forward declaration */
3741                   else
3742                     token.valid = FALSE;
3743                 } /* switch (fvdef) */
3744               /* FALLTHRU */
3745             default:
3746               if (!instruct)
3747                 typdef = tnone;
3748             }
3749           if (structdef == stagseen)
3750             structdef = snone;
3751           break;
3752         case ',':
3753           if (definedef != dnone || inattribute)
3754             break;
3755           switch (objdef)
3756             {
3757             case omethodtag:
3758             case omethodparm:
3759               make_C_tag (TRUE); /* an Objective C method */
3760               objdef = oinbody;
3761               break;
3762             }
3763           switch (fvdef)
3764             {
3765             case fdefunkey:
3766             case foperator:
3767             case fstartlist:
3768             case finlist:
3769             case fignore:
3770             case vignore:
3771               break;
3772             case fdefunname:
3773               fvdef = fignore;
3774               break;
3775             case fvnameseen:
3776               if (parlev == 0
3777                   && ((globals
3778                        && bracelev == 0
3779                        && templatelev == 0
3780                        && (!fvextern || declarations))
3781                       || (members && instruct)))
3782                   make_C_tag (FALSE); /* a variable */
3783               break;
3784             case flistseen:
3785               if ((declarations && typdef == tnone && !instruct)
3786                   || (members && typdef != tignore && instruct))
3787                 {
3788                   make_C_tag (TRUE); /* a function */
3789                   fvdef = fvnameseen;
3790                 }
3791               else if (!declarations)
3792                 fvdef = fvnone;
3793               token.valid = FALSE;
3794               break;
3795             default:
3796               fvdef = fvnone;
3797             }
3798           if (structdef == stagseen)
3799             structdef = snone;
3800           break;
3801         case ']':
3802           if (definedef != dnone || inattribute)
3803             break;
3804           if (structdef == stagseen)
3805             structdef = snone;
3806           switch (typdef)
3807             {
3808             case ttypeseen:
3809             case tend:
3810               typdef = tignore;
3811               make_C_tag (FALSE);       /* a typedef */
3812               break;
3813             case tnone:
3814             case tinbody:
3815               switch (fvdef)
3816                 {
3817                 case foperator:
3818                 case finlist:
3819                 case fignore:
3820                 case vignore:
3821                   break;
3822                 case fvnameseen:
3823                   if ((members && bracelev == 1)
3824                       || (globals && bracelev == 0
3825                           && (!fvextern || declarations)))
3826                     make_C_tag (FALSE); /* a variable */
3827                   /* FALLTHRU */
3828                 default:
3829                   fvdef = fvnone;
3830                 }
3831               break;
3832             }
3833           break;
3834         case '(':
3835           if (inattribute)
3836             {
3837               attrparlev++;
3838               break;
3839             }
3840           if (definedef != dnone)
3841             break;
3842           if (objdef == otagseen && parlev == 0)
3843             objdef = oparenseen;
3844           switch (fvdef)
3845             {
3846             case fvnameseen:
3847               if (typdef == ttypeseen
3848                   && *lp != '*'
3849                   && !instruct)
3850                 {
3851                   /* This handles constructs like:
3852                      typedef void OperatorFun (int fun); */
3853                   make_C_tag (FALSE);
3854                   typdef = tignore;
3855                   fvdef = fignore;
3856                   break;
3857                 }
3858               /* FALLTHRU */
3859             case foperator:
3860               fvdef = fstartlist;
3861               break;
3862             case flistseen:
3863               fvdef = finlist;
3864               break;
3865             }
3866           parlev++;
3867           break;
3868         case ')':
3869           if (inattribute)
3870             {
3871               if (--attrparlev == 0)
3872                 inattribute = FALSE;
3873               break;
3874             }
3875           if (definedef != dnone)
3876             break;
3877           if (objdef == ocatseen && parlev == 1)
3878             {
3879               make_C_tag (TRUE); /* an Objective C category */
3880               objdef = oignore;
3881             }
3882           if (--parlev == 0)
3883             {
3884               switch (fvdef)
3885                 {
3886                 case fstartlist:
3887                 case finlist:
3888                   fvdef = flistseen;
3889                   break;
3890                 }
3891               if (!instruct
3892                   && (typdef == tend
3893                       || typdef == ttypeseen))
3894                 {
3895                   typdef = tignore;
3896                   make_C_tag (FALSE); /* a typedef */
3897                 }
3898             }
3899           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3900             parlev = 0;
3901           break;
3902         case '{':
3903           if (definedef != dnone)
3904             break;
3905           if (typdef == ttypeseen)
3906             {
3907               /* Whenever typdef is set to tinbody (currently only
3908                  here), typdefbracelev should be set to bracelev. */
3909               typdef = tinbody;
3910               typdefbracelev = bracelev;
3911             }
3912           switch (fvdef)
3913             {
3914             case flistseen:
3915               make_C_tag (TRUE);    /* a function */
3916               /* FALLTHRU */
3917             case fignore:
3918               fvdef = fvnone;
3919               break;
3920             case fvnone:
3921               switch (objdef)
3922                 {
3923                 case otagseen:
3924                   make_C_tag (TRUE); /* an Objective C class */
3925                   objdef = oignore;
3926                   break;
3927                 case omethodtag:
3928                 case omethodparm:
3929                   make_C_tag (TRUE); /* an Objective C method */
3930                   objdef = oinbody;
3931                   break;
3932                 default:
3933                   /* Neutralize `extern "C" {' grot. */
3934                   if (bracelev == 0 && structdef == snone && nestlev == 0
3935                       && typdef == tnone)
3936                     bracelev = -1;
3937                 }
3938               break;
3939             }
3940           switch (structdef)
3941             {
3942             case skeyseen:         /* unnamed struct */
3943               pushclass_above (bracelev, NULL, 0);
3944               structdef = snone;
3945               break;
3946             case stagseen:         /* named struct or enum */
3947             case scolonseen:       /* a class */
3948               pushclass_above (bracelev,token.line+token.offset, token.length);
3949               structdef = snone;
3950               make_C_tag (FALSE);  /* a struct or enum */
3951               break;
3952             }
3953           bracelev++;
3954           break;
3955         case '*':
3956           if (definedef != dnone)
3957             break;
3958           if (fvdef == fstartlist)
3959             {
3960               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3961               token.valid = FALSE;
3962             }
3963           break;
3964         case '}':
3965           if (definedef != dnone)
3966             break;
3967           if (!ignoreindent && lp == newlb.buffer + 1)
3968             {
3969               if (bracelev != 0)
3970                 token.valid = FALSE;
3971               bracelev = 0;     /* reset brace level if first column */
3972               parlev = 0;       /* also reset paren level, just in case... */
3973             }
3974           else if (bracelev > 0)
3975             bracelev--;
3976           else
3977             token.valid = FALSE; /* something gone amiss, token unreliable */
3978           popclass_above (bracelev);
3979           structdef = snone;
3980           /* Only if typdef == tinbody is typdefbracelev significant. */
3981           if (typdef == tinbody && bracelev <= typdefbracelev)
3982             {
3983               assert (bracelev == typdefbracelev);
3984               typdef = tend;
3985             }
3986           break;
3987         case '=':
3988           if (definedef != dnone)
3989             break;
3990           switch (fvdef)
3991             {
3992             case foperator:
3993             case finlist:
3994             case fignore:
3995             case vignore:
3996               break;
3997             case fvnameseen:
3998               if ((members && bracelev == 1)
3999                   || (globals && bracelev == 0 && (!fvextern || declarations)))
4000                 make_C_tag (FALSE); /* a variable */
4001               /* FALLTHRU */
4002             default:
4003               fvdef = vignore;
4004             }
4005           break;
4006         case '<':
4007           if (cplpl
4008               && (structdef == stagseen || fvdef == fvnameseen))
4009             {
4010               templatelev++;
4011               break;
4012             }
4013           goto resetfvdef;
4014         case '>':
4015           if (templatelev > 0)
4016             {
4017               templatelev--;
4018               break;
4019             }
4020           goto resetfvdef;
4021         case '+':
4022         case '-':
4023           if (objdef == oinbody && bracelev == 0)
4024             {
4025               objdef = omethodsign;
4026               break;
4027             }
4028           /* FALLTHRU */
4029         resetfvdef:
4030         case '#': case '~': case '&': case '%': case '/':
4031         case '|': case '^': case '!': case '.': case '?':
4032           if (definedef != dnone)
4033             break;
4034           /* These surely cannot follow a function tag in C. */
4035           switch (fvdef)
4036             {
4037             case foperator:
4038             case finlist:
4039             case fignore:
4040             case vignore:
4041               break;
4042             default:
4043               fvdef = fvnone;
4044             }
4045           break;
4046         case '\0':
4047           if (objdef == otagseen)
4048             {
4049               make_C_tag (TRUE); /* an Objective C class */
4050               objdef = oignore;
4051             }
4052           /* If a macro spans multiple lines don't reset its state. */
4053           if (quotednl)
4054             CNL_SAVE_DEFINEDEF ();
4055           else
4056             CNL ();
4057           break;
4058         } /* switch (c) */
4059
4060     } /* while not eof */
4061
4062   free (lbs[0].lb.buffer);
4063   free (lbs[1].lb.buffer);
4064 }
4065
4066 /*
4067  * Process either a C++ file or a C file depending on the setting
4068  * of a global flag.
4069  */
4070 static void
4071 default_C_entries (inf)
4072      FILE *inf;
4073 {
4074   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4075 }
4076
4077 /* Always do plain C. */
4078 static void
4079 plain_C_entries (inf)
4080      FILE *inf;
4081 {
4082   C_entries (0, inf);
4083 }
4084
4085 /* Always do C++. */
4086 static void
4087 Cplusplus_entries (inf)
4088      FILE *inf;
4089 {
4090   C_entries (C_PLPL, inf);
4091 }
4092
4093 /* Always do Java. */
4094 static void
4095 Cjava_entries (inf)
4096      FILE *inf;
4097 {
4098   C_entries (C_JAVA, inf);
4099 }
4100
4101 /* Always do C*. */
4102 static void
4103 Cstar_entries (inf)
4104      FILE *inf;
4105 {
4106   C_entries (C_STAR, inf);
4107 }
4108
4109 /* Always do Yacc. */
4110 static void
4111 Yacc_entries (inf)
4112      FILE *inf;
4113 {
4114   C_entries (YACC, inf);
4115 }
4116
4117 \f
4118 /* Useful macros. */
4119 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4120   for (;                        /* loop initialization */               \
4121        !feof (file_pointer)     /* loop test */                         \
4122        &&                       /* instructions at start of loop */     \
4123           (readline (&line_buffer, file_pointer),                       \
4124            char_pointer = line_buffer.buffer,                           \
4125            TRUE);                                                       \
4126       )
4127
4128 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4129   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4130    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4131    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4132    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4133
4134 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4135 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4136   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4137    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4138    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4139
4140 /*
4141  * Read a file, but do no processing.  This is used to do regexp
4142  * matching on files that have no language defined.
4143  */
4144 static void
4145 just_read_file (inf)
4146      FILE *inf;
4147 {
4148   register char *dummy;
4149
4150   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4151     continue;
4152 }
4153
4154 \f
4155 /* Fortran parsing */
4156
4157 static void F_takeprec __P((void));
4158 static void F_getit __P((FILE *));
4159
4160 static void
4161 F_takeprec ()
4162 {
4163   dbp = skip_spaces (dbp);
4164   if (*dbp != '*')
4165     return;
4166   dbp++;
4167   dbp = skip_spaces (dbp);
4168   if (strneq (dbp, "(*)", 3))
4169     {
4170       dbp += 3;
4171       return;
4172     }
4173   if (!ISDIGIT (*dbp))
4174     {
4175       --dbp;                    /* force failure */
4176       return;
4177     }
4178   do
4179     dbp++;
4180   while (ISDIGIT (*dbp));
4181 }
4182
4183 static void
4184 F_getit (inf)
4185      FILE *inf;
4186 {
4187   register char *cp;
4188
4189   dbp = skip_spaces (dbp);
4190   if (*dbp == '\0')
4191     {
4192       readline (&lb, inf);
4193       dbp = lb.buffer;
4194       if (dbp[5] != '&')
4195         return;
4196       dbp += 6;
4197       dbp = skip_spaces (dbp);
4198     }
4199   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4200     return;
4201   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4202     continue;
4203   make_tag (dbp, cp-dbp, TRUE,
4204             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4205 }
4206
4207
4208 static void
4209 Fortran_functions (inf)
4210      FILE *inf;
4211 {
4212   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4213     {
4214       if (*dbp == '%')
4215         dbp++;                  /* Ratfor escape to fortran */
4216       dbp = skip_spaces (dbp);
4217       if (*dbp == '\0')
4218         continue;
4219       switch (lowcase (*dbp))
4220         {
4221         case 'i':
4222           if (nocase_tail ("integer"))
4223             F_takeprec ();
4224           break;
4225         case 'r':
4226           if (nocase_tail ("real"))
4227             F_takeprec ();
4228           break;
4229         case 'l':
4230           if (nocase_tail ("logical"))
4231             F_takeprec ();
4232           break;
4233         case 'c':
4234           if (nocase_tail ("complex") || nocase_tail ("character"))
4235             F_takeprec ();
4236           break;
4237         case 'd':
4238           if (nocase_tail ("double"))
4239             {
4240               dbp = skip_spaces (dbp);
4241               if (*dbp == '\0')
4242                 continue;
4243               if (nocase_tail ("precision"))
4244                 break;
4245               continue;
4246             }
4247           break;
4248         }
4249       dbp = skip_spaces (dbp);
4250       if (*dbp == '\0')
4251         continue;
4252       switch (lowcase (*dbp))
4253         {
4254         case 'f':
4255           if (nocase_tail ("function"))
4256             F_getit (inf);
4257           continue;
4258         case 's':
4259           if (nocase_tail ("subroutine"))
4260             F_getit (inf);
4261           continue;
4262         case 'e':
4263           if (nocase_tail ("entry"))
4264             F_getit (inf);
4265           continue;
4266         case 'b':
4267           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4268             {
4269               dbp = skip_spaces (dbp);
4270               if (*dbp == '\0') /* assume un-named */
4271                 make_tag ("blockdata", 9, TRUE,
4272                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4273               else
4274                 F_getit (inf);  /* look for name */
4275             }
4276           continue;
4277         }
4278     }
4279 }
4280
4281 \f
4282 /*
4283  * Ada parsing
4284  * Original code by
4285  * Philippe Waroquiers (1998)
4286  */
4287
4288 static void Ada_getit __P((FILE *, char *));
4289
4290 /* Once we are positioned after an "interesting" keyword, let's get
4291    the real tag value necessary. */
4292 static void
4293 Ada_getit (inf, name_qualifier)
4294      FILE *inf;
4295      char *name_qualifier;
4296 {
4297   register char *cp;
4298   char *name;
4299   char c;
4300
4301   while (!feof (inf))
4302     {
4303       dbp = skip_spaces (dbp);
4304       if (*dbp == '\0'
4305           || (dbp[0] == '-' && dbp[1] == '-'))
4306         {
4307           readline (&lb, inf);
4308           dbp = lb.buffer;
4309         }
4310       switch (lowcase(*dbp))
4311         {
4312         case 'b':
4313           if (nocase_tail ("body"))
4314             {
4315               /* Skipping body of   procedure body   or   package body or ....
4316                  resetting qualifier to body instead of spec. */
4317               name_qualifier = "/b";
4318               continue;
4319             }
4320           break;
4321         case 't':
4322           /* Skipping type of   task type   or   protected type ... */
4323           if (nocase_tail ("type"))
4324             continue;
4325           break;
4326         }
4327       if (*dbp == '"')
4328         {
4329           dbp += 1;
4330           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4331             continue;
4332         }
4333       else
4334         {
4335           dbp = skip_spaces (dbp);
4336           for (cp = dbp;
4337                (*cp != '\0'
4338                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4339                cp++)
4340             continue;
4341           if (cp == dbp)
4342             return;
4343         }
4344       c = *cp;
4345       *cp = '\0';
4346       name = concat (dbp, name_qualifier, "");
4347       *cp = c;
4348       make_tag (name, strlen (name), TRUE,
4349                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4350       free (name);
4351       if (c == '"')
4352         dbp = cp + 1;
4353       return;
4354     }
4355 }
4356
4357 static void
4358 Ada_funcs (inf)
4359      FILE *inf;
4360 {
4361   bool inquote = FALSE;
4362   bool skip_till_semicolumn = FALSE;
4363
4364   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4365     {
4366       while (*dbp != '\0')
4367         {
4368           /* Skip a string i.e. "abcd". */
4369           if (inquote || (*dbp == '"'))
4370             {
4371               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4372               if (dbp != NULL)
4373                 {
4374                   inquote = FALSE;
4375                   dbp += 1;
4376                   continue;     /* advance char */
4377                 }
4378               else
4379                 {
4380                   inquote = TRUE;
4381                   break;        /* advance line */
4382                 }
4383             }
4384
4385           /* Skip comments. */
4386           if (dbp[0] == '-' && dbp[1] == '-')
4387             break;              /* advance line */
4388
4389           /* Skip character enclosed in single quote i.e. 'a'
4390              and skip single quote starting an attribute i.e. 'Image. */
4391           if (*dbp == '\'')
4392             {
4393               dbp++ ;
4394               if (*dbp != '\0')
4395                 dbp++;
4396               continue;
4397             }
4398
4399           if (skip_till_semicolumn)
4400             {
4401               if (*dbp == ';')
4402                 skip_till_semicolumn = FALSE;
4403               dbp++;
4404               continue;         /* advance char */
4405             }
4406
4407           /* Search for beginning of a token.  */
4408           if (!begtoken (*dbp))
4409             {
4410               dbp++;
4411               continue;         /* advance char */
4412             }
4413
4414           /* We are at the beginning of a token. */
4415           switch (lowcase(*dbp))
4416             {
4417             case 'f':
4418               if (!packages_only && nocase_tail ("function"))
4419                 Ada_getit (inf, "/f");
4420               else
4421                 break;          /* from switch */
4422               continue;         /* advance char */
4423             case 'p':
4424               if (!packages_only && nocase_tail ("procedure"))
4425                 Ada_getit (inf, "/p");
4426               else if (nocase_tail ("package"))
4427                 Ada_getit (inf, "/s");
4428               else if (nocase_tail ("protected")) /* protected type */
4429                 Ada_getit (inf, "/t");
4430               else
4431                 break;          /* from switch */
4432               continue;         /* advance char */
4433
4434             case 'u':
4435               if (typedefs && !packages_only && nocase_tail ("use"))
4436                 {
4437                   /* when tagging types, avoid tagging  use type Pack.Typename;
4438                      for this, we will skip everything till a ; */
4439                   skip_till_semicolumn = TRUE;
4440                   continue;     /* advance char */
4441                 }
4442
4443             case 't':
4444               if (!packages_only && nocase_tail ("task"))
4445                 Ada_getit (inf, "/k");
4446               else if (typedefs && !packages_only && nocase_tail ("type"))
4447                 {
4448                   Ada_getit (inf, "/t");
4449                   while (*dbp != '\0')
4450                     dbp += 1;
4451                 }
4452               else
4453                 break;          /* from switch */
4454               continue;         /* advance char */
4455             }
4456
4457           /* Look for the end of the token. */
4458           while (!endtoken (*dbp))
4459             dbp++;
4460
4461         } /* advance char */
4462     } /* advance line */
4463 }
4464
4465 \f
4466 /*
4467  * Unix and microcontroller assembly tag handling
4468  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4469  * Idea by Bob Weiner, Motorola Inc. (1994)
4470  */
4471 static void
4472 Asm_labels (inf)
4473      FILE *inf;
4474 {
4475   register char *cp;
4476
4477   LOOP_ON_INPUT_LINES (inf, lb, cp)
4478     {
4479       /* If first char is alphabetic or one of [_.$], test for colon
4480          following identifier. */
4481       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4482         {
4483           /* Read past label. */
4484           cp++;
4485           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4486             cp++;
4487           if (*cp == ':' || iswhite (*cp))
4488             /* Found end of label, so copy it and add it to the table. */
4489             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4490                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4491         }
4492     }
4493 }
4494
4495 \f
4496 /*
4497  * Perl support
4498  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4499  * Perl variable names: /^(my|local).../
4500  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4501  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4502  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4503  */
4504 static void
4505 Perl_functions (inf)
4506      FILE *inf;
4507 {
4508   char *package = savestr ("main"); /* current package name */
4509   register char *cp;
4510
4511   LOOP_ON_INPUT_LINES (inf, lb, cp)
4512     {
4513       skip_spaces(cp);
4514
4515       if (LOOKING_AT (cp, "package"))
4516         {
4517           free (package);
4518           get_tag (cp, &package);
4519         }
4520       else if (LOOKING_AT (cp, "sub"))
4521         {
4522           char *pos;
4523           char *sp = cp;
4524
4525           while (!notinname (*cp))
4526             cp++;
4527           if (cp == sp)
4528             continue;           /* nothing found */
4529           if ((pos = etags_strchr (sp, ':')) != NULL
4530               && pos < cp && pos[1] == ':')
4531             /* The name is already qualified. */
4532             make_tag (sp, cp - sp, TRUE,
4533                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4534           else
4535             /* Qualify it. */
4536             {
4537               char savechar, *name;
4538
4539               savechar = *cp;
4540               *cp = '\0';
4541               name = concat (package, "::", sp);
4542               *cp = savechar;
4543               make_tag (name, strlen(name), TRUE,
4544                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4545               free (name);
4546             }
4547         }
4548        else if (globals)        /* only if we are tagging global vars */
4549         {
4550           /* Skip a qualifier, if any. */
4551           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4552           /* After "my" or "local", but before any following paren or space. */
4553           char *varstart = cp;
4554
4555           if (qual              /* should this be removed?  If yes, how? */
4556               && (*cp == '$' || *cp == '@' || *cp == '%'))
4557             {
4558               varstart += 1;
4559               do
4560                 cp++;
4561               while (ISALNUM (*cp) || *cp == '_');
4562             }
4563           else if (qual)
4564             {
4565               /* Should be examining a variable list at this point;
4566                  could insist on seeing an open parenthesis. */
4567               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4568                 cp++;
4569             }
4570           else
4571             continue;
4572
4573           make_tag (varstart, cp - varstart, FALSE,
4574                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4575         }
4576     }
4577   free (package);
4578 }
4579
4580
4581 /*
4582  * Python support
4583  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4584  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4585  * More ideas by seb bacon <seb@jamkit.com> (2002)
4586  */
4587 static void
4588 Python_functions (inf)
4589      FILE *inf;
4590 {
4591   register char *cp;
4592
4593   LOOP_ON_INPUT_LINES (inf, lb, cp)
4594     {
4595       cp = skip_spaces (cp);
4596       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4597         {
4598           char *name = cp;
4599           while (!notinname (*cp) && *cp != ':')
4600             cp++;
4601           make_tag (name, cp - name, TRUE,
4602                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4603         }
4604     }
4605 }
4606
4607 \f
4608 /*
4609  * PHP support
4610  * Look for:
4611  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4612  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4613  *  - /^[ \t]*define\(\"[^\"]+/
4614  * Only with --members:
4615  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4616  * Idea by Diez B. Roggisch (2001)
4617  */
4618 static void
4619 PHP_functions (inf)
4620      FILE *inf;
4621 {
4622   register char *cp, *name;
4623   bool search_identifier = FALSE;
4624
4625   LOOP_ON_INPUT_LINES (inf, lb, cp)
4626     {
4627       cp = skip_spaces (cp);
4628       name = cp;
4629       if (search_identifier
4630           && *cp != '\0')
4631         {
4632           while (!notinname (*cp))
4633             cp++;
4634           make_tag (name, cp - name, TRUE,
4635                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4636           search_identifier = FALSE;
4637         }
4638       else if (LOOKING_AT (cp, "function"))
4639         {
4640           if(*cp == '&')
4641             cp = skip_spaces (cp+1);
4642           if(*cp != '\0')
4643             {
4644               name = cp;
4645               while (!notinname (*cp))
4646                 cp++;
4647               make_tag (name, cp - name, TRUE,
4648                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4649             }
4650           else
4651             search_identifier = TRUE;
4652         }
4653       else if (LOOKING_AT (cp, "class"))
4654         {
4655           if (*cp != '\0')
4656             {
4657               name = cp;
4658               while (*cp != '\0' && !iswhite (*cp))
4659                 cp++;
4660               make_tag (name, cp - name, FALSE,
4661                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4662             }
4663           else
4664             search_identifier = TRUE;
4665         }
4666       else if (strneq (cp, "define", 6)
4667                && (cp = skip_spaces (cp+6))
4668                && *cp++ == '('
4669                && (*cp == '"' || *cp == '\''))
4670         {
4671           char quote = *cp++;
4672           name = cp;
4673           while (*cp != quote && *cp != '\0')
4674             cp++;
4675           make_tag (name, cp - name, FALSE,
4676                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4677         }
4678       else if (members
4679                && LOOKING_AT (cp, "var")
4680                && *cp == '$')
4681         {
4682           name = cp;
4683           while (!notinname(*cp))
4684             cp++;
4685           make_tag (name, cp - name, FALSE,
4686                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4687         }
4688     }
4689 }
4690
4691 \f
4692 /*
4693  * Cobol tag functions
4694  * We could look for anything that could be a paragraph name.
4695  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4696  * Idea by Corny de Souza (1993)
4697  */
4698 static void
4699 Cobol_paragraphs (inf)
4700      FILE *inf;
4701 {
4702   register char *bp, *ep;
4703
4704   LOOP_ON_INPUT_LINES (inf, lb, bp)
4705     {
4706       if (lb.len < 9)
4707         continue;
4708       bp += 8;
4709
4710       /* If eoln, compiler option or comment ignore whole line. */
4711       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4712         continue;
4713
4714       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4715         continue;
4716       if (*ep++ == '.')
4717         make_tag (bp, ep - bp, TRUE,
4718                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4719     }
4720 }
4721
4722 \f
4723 /*
4724  * Makefile support
4725  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4726  */
4727 static void
4728 Makefile_targets (inf)
4729      FILE *inf;
4730 {
4731   register char *bp;
4732
4733   LOOP_ON_INPUT_LINES (inf, lb, bp)
4734     {
4735       if (*bp == '\t' || *bp == '#')
4736         continue;
4737       while (*bp != '\0' && *bp != '=' && *bp != ':')
4738         bp++;
4739       if (*bp == ':' || (globals && *bp == '='))
4740         {
4741           /* We should detect if there is more than one tag, but we do not.
4742              We just skip initial and final spaces. */
4743           char * namestart = skip_spaces (lb.buffer);
4744           while (--bp > namestart)
4745             if (!notinname (*bp))
4746               break;
4747           make_tag (namestart, bp - namestart + 1, TRUE,
4748                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4749         }
4750     }
4751 }
4752
4753 \f
4754 /*
4755  * Pascal parsing
4756  * Original code by Mosur K. Mohan (1989)
4757  *
4758  *  Locates tags for procedures & functions.  Doesn't do any type- or
4759  *  var-definitions.  It does look for the keyword "extern" or
4760  *  "forward" immediately following the procedure statement; if found,
4761  *  the tag is skipped.
4762  */
4763 static void
4764 Pascal_functions (inf)
4765      FILE *inf;
4766 {
4767   linebuffer tline;             /* mostly copied from C_entries */
4768   long save_lcno;
4769   int save_lineno, namelen, taglen;
4770   char c, *name;
4771
4772   bool                          /* each of these flags is TRUE iff: */
4773     incomment,                  /* point is inside a comment */
4774     inquote,                    /* point is inside '..' string */
4775     get_tagname,                /* point is after PROCEDURE/FUNCTION
4776                                    keyword, so next item = potential tag */
4777     found_tag,                  /* point is after a potential tag */
4778     inparms,                    /* point is within parameter-list */
4779     verify_tag;                 /* point has passed the parm-list, so the
4780                                    next token will determine whether this
4781                                    is a FORWARD/EXTERN to be ignored, or
4782                                    whether it is a real tag */
4783
4784   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4785   name = NULL;                  /* keep compiler quiet */
4786   dbp = lb.buffer;
4787   *dbp = '\0';
4788   linebuffer_init (&tline);
4789
4790   incomment = inquote = FALSE;
4791   found_tag = FALSE;            /* have a proc name; check if extern */
4792   get_tagname = FALSE;          /* found "procedure" keyword         */
4793   inparms = FALSE;              /* found '(' after "proc"            */
4794   verify_tag = FALSE;           /* check if "extern" is ahead        */
4795
4796
4797   while (!feof (inf))           /* long main loop to get next char */
4798     {
4799       c = *dbp++;
4800       if (c == '\0')            /* if end of line */
4801         {
4802           readline (&lb, inf);
4803           dbp = lb.buffer;
4804           if (*dbp == '\0')
4805             continue;
4806           if (!((found_tag && verify_tag)
4807                 || get_tagname))
4808             c = *dbp++;         /* only if don't need *dbp pointing
4809                                    to the beginning of the name of
4810                                    the procedure or function */
4811         }
4812       if (incomment)
4813         {
4814           if (c == '}')         /* within { } comments */
4815             incomment = FALSE;
4816           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4817             {
4818               dbp++;
4819               incomment = FALSE;
4820             }
4821           continue;
4822         }
4823       else if (inquote)
4824         {
4825           if (c == '\'')
4826             inquote = FALSE;
4827           continue;
4828         }
4829       else
4830         switch (c)
4831           {
4832           case '\'':
4833             inquote = TRUE;     /* found first quote */
4834             continue;
4835           case '{':             /* found open { comment */
4836             incomment = TRUE;
4837             continue;
4838           case '(':
4839             if (*dbp == '*')    /* found open (* comment */
4840               {
4841                 incomment = TRUE;
4842                 dbp++;
4843               }
4844             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4845               inparms = TRUE;
4846             continue;
4847           case ')':             /* end of parms list */
4848             if (inparms)
4849               inparms = FALSE;
4850             continue;
4851           case ';':
4852             if (found_tag && !inparms) /* end of proc or fn stmt */
4853               {
4854                 verify_tag = TRUE;
4855                 break;
4856               }
4857             continue;
4858           }
4859       if (found_tag && verify_tag && (*dbp != ' '))
4860         {
4861           /* Check if this is an "extern" declaration. */
4862           if (*dbp == '\0')
4863             continue;
4864           if (lowcase (*dbp == 'e'))
4865             {
4866               if (nocase_tail ("extern")) /* superfluous, really! */
4867                 {
4868                   found_tag = FALSE;
4869                   verify_tag = FALSE;
4870                 }
4871             }
4872           else if (lowcase (*dbp) == 'f')
4873             {
4874               if (nocase_tail ("forward")) /* check for forward reference */
4875                 {
4876                   found_tag = FALSE;
4877                   verify_tag = FALSE;
4878                 }
4879             }
4880           if (found_tag && verify_tag) /* not external proc, so make tag */
4881             {
4882               found_tag = FALSE;
4883               verify_tag = FALSE;
4884               make_tag (name, namelen, TRUE,
4885                         tline.buffer, taglen, save_lineno, save_lcno);
4886               continue;
4887             }
4888         }
4889       if (get_tagname)          /* grab name of proc or fn */
4890         {
4891           char *cp;
4892
4893           if (*dbp == '\0')
4894             continue;
4895
4896           /* Find block name. */
4897           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4898             continue;
4899
4900           /* Save all values for later tagging. */
4901           linebuffer_setlen (&tline, lb.len);
4902           strcpy (tline.buffer, lb.buffer);
4903           save_lineno = lineno;
4904           save_lcno = linecharno;
4905           name = tline.buffer + (dbp - lb.buffer);
4906           namelen = cp - dbp;
4907           taglen = cp - lb.buffer + 1;
4908
4909           dbp = cp;             /* set dbp to e-o-token */
4910           get_tagname = FALSE;
4911           found_tag = TRUE;
4912           continue;
4913
4914           /* And proceed to check for "extern". */
4915         }
4916       else if (!incomment && !inquote && !found_tag)
4917         {
4918           /* Check for proc/fn keywords. */
4919           switch (lowcase (c))
4920             {
4921             case 'p':
4922               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4923                 get_tagname = TRUE;
4924               continue;
4925             case 'f':
4926               if (nocase_tail ("unction"))
4927                 get_tagname = TRUE;
4928               continue;
4929             }
4930         }
4931     } /* while not eof */
4932
4933   free (tline.buffer);
4934 }
4935
4936 \f
4937 /*
4938  * Lisp tag functions
4939  *  look for (def or (DEF, quote or QUOTE
4940  */
4941
4942 static void L_getit __P((void));
4943
4944 static void
4945 L_getit ()
4946 {
4947   if (*dbp == '\'')             /* Skip prefix quote */
4948     dbp++;
4949   else if (*dbp == '(')
4950   {
4951     dbp++;
4952     /* Try to skip "(quote " */
4953     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4954       /* Ok, then skip "(" before name in (defstruct (foo)) */
4955       dbp = skip_spaces (dbp);
4956   }
4957   get_tag (dbp, NULL);
4958 }
4959
4960 static void
4961 Lisp_functions (inf)
4962      FILE *inf;
4963 {
4964   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4965     {
4966       if (dbp[0] != '(')
4967         continue;
4968
4969       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4970         {
4971           dbp = skip_non_spaces (dbp);
4972           dbp = skip_spaces (dbp);
4973           L_getit ();
4974         }
4975       else
4976         {
4977           /* Check for (foo::defmumble name-defined ... */
4978           do
4979             dbp++;
4980           while (!notinname (*dbp) && *dbp != ':');
4981           if (*dbp == ':')
4982             {
4983               do
4984                 dbp++;
4985               while (*dbp == ':');
4986
4987               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4988                 {
4989                   dbp = skip_non_spaces (dbp);
4990                   dbp = skip_spaces (dbp);
4991                   L_getit ();
4992                 }
4993             }
4994         }
4995     }
4996 }
4997
4998 \f
4999 /*
5000  * Lua script language parsing
5001  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5002  *
5003  *  "function" and "local function" are tags if they start at column 1.
5004  */
5005 static void
5006 Lua_functions (inf)
5007      FILE *inf;
5008 {
5009   register char *bp;
5010
5011   LOOP_ON_INPUT_LINES (inf, lb, bp)
5012     {
5013       if (bp[0] != 'f' && bp[0] != 'l')
5014         continue;
5015
5016       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5017
5018       if (LOOKING_AT (bp, "function"))
5019         get_tag (bp, NULL);
5020     }
5021 }
5022
5023 \f
5024 /*
5025  * Postscript tags
5026  * Just look for lines where the first character is '/'
5027  * Also look at "defineps" for PSWrap
5028  * Ideas by:
5029  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5030  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5031  */
5032 static void
5033 PS_functions (inf)
5034      FILE *inf;
5035 {
5036   register char *bp, *ep;
5037
5038   LOOP_ON_INPUT_LINES (inf, lb, bp)
5039     {
5040       if (bp[0] == '/')
5041         {
5042           for (ep = bp+1;
5043                *ep != '\0' && *ep != ' ' && *ep != '{';
5044                ep++)
5045             continue;
5046           make_tag (bp, ep - bp, TRUE,
5047                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5048         }
5049       else if (LOOKING_AT (bp, "defineps"))
5050         get_tag (bp, NULL);
5051     }
5052 }
5053
5054 \f
5055 /*
5056  * Forth tags
5057  * Ignore anything after \ followed by space or in ( )
5058  * Look for words defined by :
5059  * Look for constant, code, create, defer, value, and variable
5060  * OBP extensions:  Look for buffer:, field,
5061  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5062  */
5063 static void
5064 Forth_words (inf)
5065      FILE *inf;
5066 {
5067   register char *bp;
5068
5069   LOOP_ON_INPUT_LINES (inf, lb, bp)
5070     while ((bp = skip_spaces (bp))[0] != '\0')
5071       if (bp[0] == '\\' && iswhite(bp[1]))
5072         break;                  /* read next line */
5073       else if (bp[0] == '(' && iswhite(bp[1]))
5074         do                      /* skip to ) or eol */
5075           bp++;
5076         while (*bp != ')' && *bp != '\0');
5077       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5078                || LOOKING_AT_NOCASE (bp, "constant")
5079                || LOOKING_AT_NOCASE (bp, "code")
5080                || LOOKING_AT_NOCASE (bp, "create")
5081                || LOOKING_AT_NOCASE (bp, "defer")
5082                || LOOKING_AT_NOCASE (bp, "value")
5083                || LOOKING_AT_NOCASE (bp, "variable")
5084                || LOOKING_AT_NOCASE (bp, "buffer:")
5085                || LOOKING_AT_NOCASE (bp, "field"))
5086         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5087       else
5088         bp = skip_non_spaces (bp);
5089 }
5090
5091 \f
5092 /*
5093  * Scheme tag functions
5094  * look for (def... xyzzy
5095  *          (def... (xyzzy
5096  *          (def ... ((...(xyzzy ....
5097  *          (set! xyzzy
5098  * Original code by Ken Haase (1985?)
5099  */
5100 static void
5101 Scheme_functions (inf)
5102      FILE *inf;
5103 {
5104   register char *bp;
5105
5106   LOOP_ON_INPUT_LINES (inf, lb, bp)
5107     {
5108       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5109         {
5110           bp = skip_non_spaces (bp+4);
5111           /* Skip over open parens and white space */
5112           while (notinname (*bp))
5113             bp++;
5114           get_tag (bp, NULL);
5115         }
5116       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5117         get_tag (bp, NULL);
5118     }
5119 }
5120
5121 \f
5122 /* Find tags in TeX and LaTeX input files.  */
5123
5124 /* TEX_toktab is a table of TeX control sequences that define tags.
5125  * Each entry records one such control sequence.
5126  *
5127  * Original code from who knows whom.
5128  * Ideas by:
5129  *   Stefan Monnier (2002)
5130  */
5131
5132 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5133
5134 /* Default set of control sequences to put into TEX_toktab.
5135    The value of environment var TEXTAGS is prepended to this.  */
5136 static char *TEX_defenv = "\
5137 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5138 :part:appendix:entry:index:def\
5139 :newcommand:renewcommand:newenvironment:renewenvironment";
5140
5141 static void TEX_mode __P((FILE *));
5142 static void TEX_decode_env __P((char *, char *));
5143
5144 static char TEX_esc = '\\';
5145 static char TEX_opgrp = '{';
5146 static char TEX_clgrp = '}';
5147
5148 /*
5149  * TeX/LaTeX scanning loop.
5150  */
5151 static void
5152 TeX_commands (inf)
5153      FILE *inf;
5154 {
5155   char *cp;
5156   linebuffer *key;
5157
5158   /* Select either \ or ! as escape character.  */
5159   TEX_mode (inf);
5160
5161   /* Initialize token table once from environment. */
5162   if (TEX_toktab == NULL)
5163     TEX_decode_env ("TEXTAGS", TEX_defenv);
5164
5165   LOOP_ON_INPUT_LINES (inf, lb, cp)
5166     {
5167       /* Look at each TEX keyword in line. */
5168       for (;;)
5169         {
5170           /* Look for a TEX escape. */
5171           while (*cp++ != TEX_esc)
5172             if (cp[-1] == '\0' || cp[-1] == '%')
5173               goto tex_next_line;
5174
5175           for (key = TEX_toktab; key->buffer != NULL; key++)
5176             if (strneq (cp, key->buffer, key->len))
5177               {
5178                 register char *p;
5179                 int namelen, linelen;
5180                 bool opgrp = FALSE;
5181
5182                 cp = skip_spaces (cp + key->len);
5183                 if (*cp == TEX_opgrp)
5184                   {
5185                     opgrp = TRUE;
5186                     cp++;
5187                   }
5188                 for (p = cp;
5189                      (!iswhite (*p) && *p != '#' &&
5190                       *p != TEX_opgrp && *p != TEX_clgrp);
5191                      p++)
5192                   continue;
5193                 namelen = p - cp;
5194                 linelen = lb.len;
5195                 if (!opgrp || *p == TEX_clgrp)
5196                   {
5197                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5198                       p++;
5199                     linelen = p - lb.buffer + 1;
5200                   }
5201                 make_tag (cp, namelen, TRUE,
5202                           lb.buffer, linelen, lineno, linecharno);
5203                 goto tex_next_line; /* We only tag a line once */
5204               }
5205         }
5206     tex_next_line:
5207       ;
5208     }
5209 }
5210
5211 #define TEX_LESC '\\'
5212 #define TEX_SESC '!'
5213
5214 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5215    chars accordingly. */
5216 static void
5217 TEX_mode (inf)
5218      FILE *inf;
5219 {
5220   int c;
5221
5222   while ((c = getc (inf)) != EOF)
5223     {
5224       /* Skip to next line if we hit the TeX comment char. */
5225       if (c == '%')
5226         while (c != '\n' && c != EOF)
5227           c = getc (inf);
5228       else if (c == TEX_LESC || c == TEX_SESC )
5229         break;
5230     }
5231
5232   if (c == TEX_LESC)
5233     {
5234       TEX_esc = TEX_LESC;
5235       TEX_opgrp = '{';
5236       TEX_clgrp = '}';
5237     }
5238   else
5239     {
5240       TEX_esc = TEX_SESC;
5241       TEX_opgrp = '<';
5242       TEX_clgrp = '>';
5243     }
5244   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5245      No attempt is made to correct the situation. */
5246   rewind (inf);
5247 }
5248
5249 /* Read environment and prepend it to the default string.
5250    Build token table. */
5251 static void
5252 TEX_decode_env (evarname, defenv)
5253      char *evarname;
5254      char *defenv;
5255 {
5256   register char *env, *p;
5257   int i, len;
5258
5259   /* Append default string to environment. */
5260   env = getenv (evarname);
5261   if (!env)
5262     env = defenv;
5263   else
5264     {
5265       char *oldenv = env;
5266       env = concat (oldenv, defenv, "");
5267     }
5268
5269   /* Allocate a token table */
5270   for (len = 1, p = env; p;)
5271     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5272       len++;
5273   TEX_toktab = xnew (len, linebuffer);
5274
5275   /* Unpack environment string into token table. Be careful about */
5276   /* zero-length strings (leading ':', "::" and trailing ':') */
5277   for (i = 0; *env != '\0';)
5278     {
5279       p = etags_strchr (env, ':');
5280       if (!p)                   /* End of environment string. */
5281         p = env + strlen (env);
5282       if (p - env > 0)
5283         {                       /* Only non-zero strings. */
5284           TEX_toktab[i].buffer = savenstr (env, p - env);
5285           TEX_toktab[i].len = p - env;
5286           i++;
5287         }
5288       if (*p)
5289         env = p + 1;
5290       else
5291         {
5292           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5293           TEX_toktab[i].len = 0;
5294           break;
5295         }
5296     }
5297 }
5298
5299 \f
5300 /* Texinfo support.  Dave Love, Mar. 2000.  */
5301 static void
5302 Texinfo_nodes (inf)
5303      FILE * inf;
5304 {
5305   char *cp, *start;
5306   LOOP_ON_INPUT_LINES (inf, lb, cp)
5307     if (LOOKING_AT (cp, "@node"))
5308       {
5309         start = cp;
5310         while (*cp != '\0' && *cp != ',')
5311           cp++;
5312         make_tag (start, cp - start, TRUE,
5313                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5314       }
5315 }
5316
5317 \f
5318 /*
5319  * HTML support.
5320  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5321  * Contents of <a name=xxx> are tags with name xxx.
5322  *
5323  * Francesco Potortì, 2002.
5324  */
5325 static void
5326 HTML_labels (inf)
5327      FILE * inf;
5328 {
5329   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5330   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5331   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5332   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5333   char *end;
5334
5335
5336   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5337
5338   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5339     for (;;)                    /* loop on the same line */
5340       {
5341         if (skiptag)            /* skip HTML tag */
5342           {
5343             while (*dbp != '\0' && *dbp != '>')
5344               dbp++;
5345             if (*dbp == '>')
5346               {
5347                 dbp += 1;
5348                 skiptag = FALSE;
5349                 continue;       /* look on the same line */
5350               }
5351             break;              /* go to next line */
5352           }
5353
5354         else if (intag) /* look for "name=" or "id=" */
5355           {
5356             while (*dbp != '\0' && *dbp != '>'
5357                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5358               dbp++;
5359             if (*dbp == '\0')
5360               break;            /* go to next line */
5361             if (*dbp == '>')
5362               {
5363                 dbp += 1;
5364                 intag = FALSE;
5365                 continue;       /* look on the same line */
5366               }
5367             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5368                 || LOOKING_AT_NOCASE (dbp, "id="))
5369               {
5370                 bool quoted = (dbp[0] == '"');
5371
5372                 if (quoted)
5373                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5374                     continue;
5375                 else
5376                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5377                     continue;
5378                 linebuffer_setlen (&token_name, end - dbp);
5379                 strncpy (token_name.buffer, dbp, end - dbp);
5380                 token_name.buffer[end - dbp] = '\0';
5381
5382                 dbp = end;
5383                 intag = FALSE;  /* we found what we looked for */
5384                 skiptag = TRUE; /* skip to the end of the tag */
5385                 getnext = TRUE; /* then grab the text */
5386                 continue;       /* look on the same line */
5387               }
5388             dbp += 1;
5389           }
5390
5391         else if (getnext)       /* grab next tokens and tag them */
5392           {
5393             dbp = skip_spaces (dbp);
5394             if (*dbp == '\0')
5395               break;            /* go to next line */
5396             if (*dbp == '<')
5397               {
5398                 intag = TRUE;
5399                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5400                 continue;       /* look on the same line */
5401               }
5402
5403             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5404               continue;
5405             make_tag (token_name.buffer, token_name.len, TRUE,
5406                       dbp, end - dbp, lineno, linecharno);
5407             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5408             getnext = FALSE;
5409             break;              /* go to next line */
5410           }
5411
5412         else                    /* look for an interesting HTML tag */
5413           {
5414             while (*dbp != '\0' && *dbp != '<')
5415               dbp++;
5416             if (*dbp == '\0')
5417               break;            /* go to next line */
5418             intag = TRUE;
5419             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5420               {
5421                 inanchor = TRUE;
5422                 continue;       /* look on the same line */
5423               }
5424             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5425                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5426                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5427                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5428               {
5429                 intag = FALSE;
5430                 getnext = TRUE;
5431                 continue;       /* look on the same line */
5432               }
5433             dbp += 1;
5434           }
5435       }
5436 }
5437
5438 \f
5439 /*
5440  * Prolog support
5441  *
5442  * Assumes that the predicate or rule starts at column 0.
5443  * Only the first clause of a predicate or rule is added.
5444  * Original code by Sunichirou Sugou (1989)
5445  * Rewritten by Anders Lindgren (1996)
5446  */
5447 static int prolog_pr __P((char *, char *));
5448 static void prolog_skip_comment __P((linebuffer *, FILE *));
5449 static int prolog_atom __P((char *, int));
5450
5451 static void
5452 Prolog_functions (inf)
5453      FILE *inf;
5454 {
5455   char *cp, *last;
5456   int len;
5457   int allocated;
5458
5459   allocated = 0;
5460   len = 0;
5461   last = NULL;
5462
5463   LOOP_ON_INPUT_LINES (inf, lb, cp)
5464     {
5465       if (cp[0] == '\0')        /* Empty line */
5466         continue;
5467       else if (iswhite (cp[0])) /* Not a predicate */
5468         continue;
5469       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5470         prolog_skip_comment (&lb, inf);
5471       else if ((len = prolog_pr (cp, last)) > 0)
5472         {
5473           /* Predicate or rule.  Store the function name so that we
5474              only generate a tag for the first clause.  */
5475           if (last == NULL)
5476             last = xnew(len + 1, char);
5477           else if (len + 1 > allocated)
5478             xrnew (last, len + 1, char);
5479           allocated = len + 1;
5480           strncpy (last, cp, len);
5481           last[len] = '\0';
5482         }
5483     }
5484   if (last != NULL)
5485     free (last);
5486 }
5487
5488
5489 static void
5490 prolog_skip_comment (plb, inf)
5491      linebuffer *plb;
5492      FILE *inf;
5493 {
5494   char *cp;
5495
5496   do
5497     {
5498       for (cp = plb->buffer; *cp != '\0'; cp++)
5499         if (cp[0] == '*' && cp[1] == '/')
5500           return;
5501       readline (plb, inf);
5502     }
5503   while (!feof(inf));
5504 }
5505
5506 /*
5507  * A predicate or rule definition is added if it matches:
5508  *     <beginning of line><Prolog Atom><whitespace>(
5509  * or  <beginning of line><Prolog Atom><whitespace>:-
5510  *
5511  * It is added to the tags database if it doesn't match the
5512  * name of the previous clause header.
5513  *
5514  * Return the size of the name of the predicate or rule, or 0 if no
5515  * header was found.
5516  */
5517 static int
5518 prolog_pr (s, last)
5519      char *s;
5520      char *last;                /* Name of last clause. */
5521 {
5522   int pos;
5523   int len;
5524
5525   pos = prolog_atom (s, 0);
5526   if (pos < 1)
5527     return 0;
5528
5529   len = pos;
5530   pos = skip_spaces (s + pos) - s;
5531
5532   if ((s[pos] == '.'
5533        || (s[pos] == '(' && (pos += 1))
5534        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5535       && (last == NULL          /* save only the first clause */
5536           || len != (int)strlen (last)
5537           || !strneq (s, last, len)))
5538         {
5539           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5540           return len;
5541         }
5542   else
5543     return 0;
5544 }
5545
5546 /*
5547  * Consume a Prolog atom.
5548  * Return the number of bytes consumed, or -1 if there was an error.
5549  *
5550  * A prolog atom, in this context, could be one of:
5551  * - An alphanumeric sequence, starting with a lower case letter.
5552  * - A quoted arbitrary string. Single quotes can escape themselves.
5553  *   Backslash quotes everything.
5554  */
5555 static int
5556 prolog_atom (s, pos)
5557      char *s;
5558      int pos;
5559 {
5560   int origpos;
5561
5562   origpos = pos;
5563
5564   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5565     {
5566       /* The atom is unquoted. */
5567       pos++;
5568       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5569         {
5570           pos++;
5571         }
5572       return pos - origpos;
5573     }
5574   else if (s[pos] == '\'')
5575     {
5576       pos++;
5577
5578       for (;;)
5579         {
5580           if (s[pos] == '\'')
5581             {
5582               pos++;
5583               if (s[pos] != '\'')
5584                 break;
5585               pos++;            /* A double quote */
5586             }
5587           else if (s[pos] == '\0')
5588             /* Multiline quoted atoms are ignored. */
5589             return -1;
5590           else if (s[pos] == '\\')
5591             {
5592               if (s[pos+1] == '\0')
5593                 return -1;
5594               pos += 2;
5595             }
5596           else
5597             pos++;
5598         }
5599       return pos - origpos;
5600     }
5601   else
5602     return -1;
5603 }
5604
5605 \f
5606 /*
5607  * Support for Erlang
5608  *
5609  * Generates tags for functions, defines, and records.
5610  * Assumes that Erlang functions start at column 0.
5611  * Original code by Anders Lindgren (1996)
5612  */
5613 static int erlang_func __P((char *, char *));
5614 static void erlang_attribute __P((char *));
5615 static int erlang_atom __P((char *));
5616
5617 static void
5618 Erlang_functions (inf)
5619      FILE *inf;
5620 {
5621   char *cp, *last;
5622   int len;
5623   int allocated;
5624
5625   allocated = 0;
5626   len = 0;
5627   last = NULL;
5628
5629   LOOP_ON_INPUT_LINES (inf, lb, cp)
5630     {
5631       if (cp[0] == '\0')        /* Empty line */
5632         continue;
5633       else if (iswhite (cp[0])) /* Not function nor attribute */
5634         continue;
5635       else if (cp[0] == '%')    /* comment */
5636         continue;
5637       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5638         continue;
5639       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5640         {
5641           erlang_attribute (cp);
5642           if (last != NULL)
5643             {
5644               free (last);
5645               last = NULL;
5646             }
5647         }
5648       else if ((len = erlang_func (cp, last)) > 0)
5649         {
5650           /*
5651            * Function.  Store the function name so that we only
5652            * generates a tag for the first clause.
5653            */
5654           if (last == NULL)
5655             last = xnew (len + 1, char);
5656           else if (len + 1 > allocated)
5657             xrnew (last, len + 1, char);
5658           allocated = len + 1;
5659           strncpy (last, cp, len);
5660           last[len] = '\0';
5661         }
5662     }
5663   if (last != NULL)
5664     free (last);
5665 }
5666
5667
5668 /*
5669  * A function definition is added if it matches:
5670  *     <beginning of line><Erlang Atom><whitespace>(
5671  *
5672  * It is added to the tags database if it doesn't match the
5673  * name of the previous clause header.
5674  *
5675  * Return the size of the name of the function, or 0 if no function
5676  * was found.
5677  */
5678 static int
5679 erlang_func (s, last)
5680      char *s;
5681      char *last;                /* Name of last clause. */
5682 {
5683   int pos;
5684   int len;
5685
5686   pos = erlang_atom (s);
5687   if (pos < 1)
5688     return 0;
5689
5690   len = pos;
5691   pos = skip_spaces (s + pos) - s;
5692
5693   /* Save only the first clause. */
5694   if (s[pos++] == '('
5695       && (last == NULL
5696           || len != (int)strlen (last)
5697           || !strneq (s, last, len)))
5698         {
5699           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5700           return len;
5701         }
5702
5703   return 0;
5704 }
5705
5706
5707 /*
5708  * Handle attributes.  Currently, tags are generated for defines
5709  * and records.
5710  *
5711  * They are on the form:
5712  * -define(foo, bar).
5713  * -define(Foo(M, N), M+N).
5714  * -record(graph, {vtab = notable, cyclic = true}).
5715  */
5716 static void
5717 erlang_attribute (s)
5718      char *s;
5719 {
5720   char *cp = s;
5721
5722   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5723       && *cp++ == '(')
5724     {
5725       int len = erlang_atom (skip_spaces (cp));
5726       if (len > 0)
5727         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5728     }
5729   return;
5730 }
5731
5732
5733 /*
5734  * Consume an Erlang atom (or variable).
5735  * Return the number of bytes consumed, or -1 if there was an error.
5736  */
5737 static int
5738 erlang_atom (s)
5739      char *s;
5740 {
5741   int pos = 0;
5742
5743   if (ISALPHA (s[pos]) || s[pos] == '_')
5744     {
5745       /* The atom is unquoted. */
5746       do
5747         pos++;
5748       while (ISALNUM (s[pos]) || s[pos] == '_');
5749     }
5750   else if (s[pos] == '\'')
5751     {
5752       for (pos++; s[pos] != '\''; pos++)
5753         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5754             || (s[pos] == '\\' && s[++pos] == '\0'))
5755           return 0;
5756       pos++;
5757     }
5758
5759   return pos;
5760 }
5761
5762 \f
5763 static char *scan_separators __P((char *));
5764 static void add_regex __P((char *, language *));
5765 static char *substitute __P((char *, char *, struct re_registers *));
5766
5767 /*
5768  * Take a string like "/blah/" and turn it into "blah", verifying
5769  * that the first and last characters are the same, and handling
5770  * quoted separator characters.  Actually, stops on the occurrence of
5771  * an unquoted separator.  Also process \t, \n, etc. and turn into
5772  * appropriate characters. Works in place.  Null terminates name string.
5773  * Returns pointer to terminating separator, or NULL for
5774  * unterminated regexps.
5775  */
5776 static char *
5777 scan_separators (name)
5778      char *name;
5779 {
5780   char sep = name[0];
5781   char *copyto = name;
5782   bool quoted = FALSE;
5783
5784   for (++name; *name != '\0'; ++name)
5785     {
5786       if (quoted)
5787         {
5788           switch (*name)
5789             {
5790             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5791             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5792             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5793             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5794             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5795             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5796             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5797             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5798             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5799             default:
5800               if (*name == sep)
5801                 *copyto++ = sep;
5802               else
5803                 {
5804                   /* Something else is quoted, so preserve the quote. */
5805                   *copyto++ = '\\';
5806                   *copyto++ = *name;
5807                 }
5808               break;
5809             }
5810           quoted = FALSE;
5811         }
5812       else if (*name == '\\')
5813         quoted = TRUE;
5814       else if (*name == sep)
5815         break;
5816       else
5817         *copyto++ = *name;
5818     }
5819   if (*name != sep)
5820     name = NULL;                /* signal unterminated regexp */
5821
5822   /* Terminate copied string. */
5823   *copyto = '\0';
5824   return name;
5825 }
5826
5827 /* Look at the argument of --regex or --no-regex and do the right
5828    thing.  Same for each line of a regexp file. */
5829 static void
5830 analyse_regex (regex_arg)
5831      char *regex_arg;
5832 {
5833   if (regex_arg == NULL)
5834     {
5835       free_regexps ();          /* --no-regex: remove existing regexps */
5836       return;
5837     }
5838
5839   /* A real --regexp option or a line in a regexp file. */
5840   switch (regex_arg[0])
5841     {
5842       /* Comments in regexp file or null arg to --regex. */
5843     case '\0':
5844     case ' ':
5845     case '\t':
5846       break;
5847
5848       /* Read a regex file.  This is recursive and may result in a
5849          loop, which will stop when the file descriptors are exhausted. */
5850     case '@':
5851       {
5852         FILE *regexfp;
5853         linebuffer regexbuf;
5854         char *regexfile = regex_arg + 1;
5855
5856         /* regexfile is a file containing regexps, one per line. */
5857         regexfp = fopen (regexfile, "r");
5858         if (regexfp == NULL)
5859           {
5860             pfatal (regexfile);
5861             return;
5862           }
5863         linebuffer_init (&regexbuf);
5864         while (readline_internal (&regexbuf, regexfp) > 0)
5865           analyse_regex (regexbuf.buffer);
5866         free (regexbuf.buffer);
5867         fclose (regexfp);
5868       }
5869       break;
5870
5871       /* Regexp to be used for a specific language only. */
5872     case '{':
5873       {
5874         language *lang;
5875         char *lang_name = regex_arg + 1;
5876         char *cp;
5877
5878         for (cp = lang_name; *cp != '}'; cp++)
5879           if (*cp == '\0')
5880             {
5881               error ("unterminated language name in regex: %s", regex_arg);
5882               return;
5883             }
5884         *cp++ = '\0';
5885         lang = get_language_from_langname (lang_name);
5886         if (lang == NULL)
5887           return;
5888         add_regex (cp, lang);
5889       }
5890       break;
5891
5892       /* Regexp to be used for any language. */
5893     default:
5894       add_regex (regex_arg, NULL);
5895       break;
5896     }
5897 }
5898
5899 /* Separate the regexp pattern, compile it,
5900    and care for optional name and modifiers. */
5901 static void
5902 add_regex (regexp_pattern, lang)
5903      char *regexp_pattern;
5904      language *lang;
5905 {
5906   static struct re_pattern_buffer zeropattern;
5907   char sep, *pat, *name, *modifiers;
5908   const char *err;
5909   struct re_pattern_buffer *patbuf;
5910   regexp *rp;
5911   bool
5912     force_explicit_name = TRUE, /* do not use implicit tag names */
5913     ignore_case = FALSE,        /* case is significant */
5914     multi_line = FALSE,         /* matches are done one line at a time */
5915     single_line = FALSE;        /* dot does not match newline */
5916
5917
5918   if (strlen(regexp_pattern) < 3)
5919     {
5920       error ("null regexp", (char *)NULL);
5921       return;
5922     }
5923   sep = regexp_pattern[0];
5924   name = scan_separators (regexp_pattern);
5925   if (name == NULL)
5926     {
5927       error ("%s: unterminated regexp", regexp_pattern);
5928       return;
5929     }
5930   if (name[1] == sep)
5931     {
5932       error ("null name for regexp \"%s\"", regexp_pattern);
5933       return;
5934     }
5935   modifiers = scan_separators (name);
5936   if (modifiers == NULL)        /* no terminating separator --> no name */
5937     {
5938       modifiers = name;
5939       name = "";
5940     }
5941   else
5942     modifiers += 1;             /* skip separator */
5943
5944   /* Parse regex modifiers. */
5945   for (; modifiers[0] != '\0'; modifiers++)
5946     switch (modifiers[0])
5947       {
5948       case 'N':
5949         if (modifiers == name)
5950           error ("forcing explicit tag name but no name, ignoring", NULL);
5951         force_explicit_name = TRUE;
5952         break;
5953       case 'i':
5954         ignore_case = TRUE;
5955         break;
5956       case 's':
5957         single_line = TRUE;
5958         /* FALLTHRU */
5959       case 'm':
5960         multi_line = TRUE;
5961         need_filebuf = TRUE;
5962         break;
5963       default:
5964         {
5965           char wrongmod [2];
5966           wrongmod[0] = modifiers[0];
5967           wrongmod[1] = '\0';
5968           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5969         }
5970         break;
5971       }
5972
5973   patbuf = xnew (1, struct re_pattern_buffer);
5974   *patbuf = zeropattern;
5975   if (ignore_case)
5976     {
5977       static char lc_trans[CHARS];
5978       int i;
5979       for (i = 0; i < CHARS; i++)
5980         lc_trans[i] = lowcase (i);
5981       patbuf->translate = lc_trans;     /* translation table to fold case  */
5982     }
5983
5984   if (multi_line)
5985     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5986   else
5987     pat = regexp_pattern;
5988
5989   if (single_line)
5990     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5991   else
5992     re_set_syntax (RE_SYNTAX_EMACS);
5993
5994   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5995   if (multi_line)
5996     free (pat);
5997   if (err != NULL)
5998     {
5999       error ("%s while compiling pattern", err);
6000       return;
6001     }
6002
6003   rp = p_head;
6004   p_head = xnew (1, regexp);
6005   p_head->pattern = savestr (regexp_pattern);
6006   p_head->p_next = rp;
6007   p_head->lang = lang;
6008   p_head->pat = patbuf;
6009   p_head->name = savestr (name);
6010   p_head->error_signaled = FALSE;
6011   p_head->force_explicit_name = force_explicit_name;
6012   p_head->ignore_case = ignore_case;
6013   p_head->multi_line = multi_line;
6014 }
6015
6016 /*
6017  * Do the substitutions indicated by the regular expression and
6018  * arguments.
6019  */
6020 static char *
6021 substitute (in, out, regs)
6022      char *in, *out;
6023      struct re_registers *regs;
6024 {
6025   char *result, *t;
6026   int size, dig, diglen;
6027
6028   result = NULL;
6029   size = strlen (out);
6030
6031   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6032   if (out[size - 1] == '\\')
6033     fatal ("pattern error in \"%s\"", out);
6034   for (t = etags_strchr (out, '\\');
6035        t != NULL;
6036        t = etags_strchr (t + 2, '\\'))
6037     if (ISDIGIT (t[1]))
6038       {
6039         dig = t[1] - '0';
6040         diglen = regs->end[dig] - regs->start[dig];
6041         size += diglen - 2;
6042       }
6043     else
6044       size -= 1;
6045
6046   /* Allocate space and do the substitutions. */
6047   assert (size >= 0);
6048   result = xnew (size + 1, char);
6049
6050   for (t = result; *out != '\0'; out++)
6051     if (*out == '\\' && ISDIGIT (*++out))
6052       {
6053         dig = *out - '0';
6054         diglen = regs->end[dig] - regs->start[dig];
6055         strncpy (t, in + regs->start[dig], diglen);
6056         t += diglen;
6057       }
6058     else
6059       *t++ = *out;
6060   *t = '\0';
6061
6062   assert (t <= result + size);
6063   assert (t - result == (int)strlen (result));
6064
6065   return result;
6066 }
6067
6068 /* Deallocate all regexps. */
6069 static void
6070 free_regexps ()
6071 {
6072   regexp *rp;
6073   while (p_head != NULL)
6074     {
6075       rp = p_head->p_next;
6076       free (p_head->pattern);
6077       free (p_head->name);
6078       free (p_head);
6079       p_head = rp;
6080     }
6081   return;
6082 }
6083
6084 /*
6085  * Reads the whole file as a single string from `filebuf' and looks for
6086  * multi-line regular expressions, creating tags on matches.
6087  * readline already dealt with normal regexps.
6088  *
6089  * Idea by Ben Wing <ben@666.com> (2002).
6090  */
6091 static void
6092 regex_tag_multiline ()
6093 {
6094   char *buffer = filebuf.buffer;
6095   regexp *rp;
6096   char *name;
6097
6098   for (rp = p_head; rp != NULL; rp = rp->p_next)
6099     {
6100       int match = 0;
6101
6102       if (!rp->multi_line)
6103         continue;               /* skip normal regexps */
6104
6105       /* Generic initialisations before parsing file from memory. */
6106       lineno = 1;               /* reset global line number */
6107       charno = 0;               /* reset global char number */
6108       linecharno = 0;           /* reset global char number of line start */
6109
6110       /* Only use generic regexps or those for the current language. */
6111       if (rp->lang != NULL && rp->lang != curfdp->lang)
6112         continue;
6113
6114       while (match >= 0 && match < filebuf.len)
6115         {
6116           match = re_search (rp->pat, buffer, filebuf.len, charno,
6117                              filebuf.len - match, &rp->regs);
6118           switch (match)
6119             {
6120             case -2:
6121               /* Some error. */
6122               if (!rp->error_signaled)
6123                 {
6124                   error ("regexp stack overflow while matching \"%s\"",
6125                          rp->pattern);
6126                   rp->error_signaled = TRUE;
6127                 }
6128               break;
6129             case -1:
6130               /* No match. */
6131               break;
6132             default:
6133               if (match == rp->regs.end[0])
6134                 {
6135                   if (!rp->error_signaled)
6136                     {
6137                       error ("regexp matches the empty string: \"%s\"",
6138                              rp->pattern);
6139                       rp->error_signaled = TRUE;
6140                     }
6141                   match = -3;   /* exit from while loop */
6142                   break;
6143                 }
6144
6145               /* Match occurred.  Construct a tag. */
6146               while (charno < rp->regs.end[0])
6147                 if (buffer[charno++] == '\n')
6148                   lineno++, linecharno = charno;
6149               name = rp->name;
6150               if (name[0] == '\0')
6151                 name = NULL;
6152               else /* make a named tag */
6153                 name = substitute (buffer, rp->name, &rp->regs);
6154               if (rp->force_explicit_name)
6155                 /* Force explicit tag name, if a name is there. */
6156                 pfnote (name, TRUE, buffer + linecharno,
6157                         charno - linecharno + 1, lineno, linecharno);
6158               else
6159                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6160                           charno - linecharno + 1, lineno, linecharno);
6161               break;
6162             }
6163         }
6164     }
6165 }
6166
6167 \f
6168 static bool
6169 nocase_tail (cp)
6170      char *cp;
6171 {
6172   register int len = 0;
6173
6174   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6175     cp++, len++;
6176   if (*cp == '\0' && !intoken (dbp[len]))
6177     {
6178       dbp += len;
6179       return TRUE;
6180     }
6181   return FALSE;
6182 }
6183
6184 static void
6185 get_tag (bp, namepp)
6186      register char *bp;
6187      char **namepp;
6188 {
6189   register char *cp = bp;
6190
6191   if (*bp != '\0')
6192     {
6193       /* Go till you get to white space or a syntactic break */
6194       for (cp = bp + 1; !notinname (*cp); cp++)
6195         continue;
6196       make_tag (bp, cp - bp, TRUE,
6197                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6198     }
6199
6200   if (namepp != NULL)
6201     *namepp = savenstr (bp, cp - bp);
6202 }
6203
6204 /*
6205  * Read a line of text from `stream' into `lbp', excluding the
6206  * newline or CR-NL, if any.  Return the number of characters read from
6207  * `stream', which is the length of the line including the newline.
6208  *
6209  * On DOS or Windows we do not count the CR character, if any before the
6210  * NL, in the returned length; this mirrors the behavior of Emacs on those
6211  * platforms (for text files, it translates CR-NL to NL as it reads in the
6212  * file).
6213  *
6214  * If multi-line regular expressions are requested, each line read is
6215  * appended to `filebuf'.
6216  */
6217 static long
6218 readline_internal (lbp, stream)
6219      linebuffer *lbp;
6220      register FILE *stream;
6221 {
6222   char *buffer = lbp->buffer;
6223   register char *p = lbp->buffer;
6224   register char *pend;
6225   int chars_deleted;
6226
6227   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6228
6229   for (;;)
6230     {
6231       register int c = getc (stream);
6232       if (p == pend)
6233         {
6234           /* We're at the end of linebuffer: expand it. */
6235           lbp->size *= 2;
6236           xrnew (buffer, lbp->size, char);
6237           p += buffer - lbp->buffer;
6238           pend = buffer + lbp->size;
6239           lbp->buffer = buffer;
6240         }
6241       if (c == EOF)
6242         {
6243           *p = '\0';
6244           chars_deleted = 0;
6245           break;
6246         }
6247       if (c == '\n')
6248         {
6249           if (p > buffer && p[-1] == '\r')
6250             {
6251               p -= 1;
6252 #ifdef DOS_NT
6253              /* Assume CRLF->LF translation will be performed by Emacs
6254                 when loading this file, so CRs won't appear in the buffer.
6255                 It would be cleaner to compensate within Emacs;
6256                 however, Emacs does not know how many CRs were deleted
6257                 before any given point in the file.  */
6258               chars_deleted = 1;
6259 #else
6260               chars_deleted = 2;
6261 #endif
6262             }
6263           else
6264             {
6265               chars_deleted = 1;
6266             }
6267           *p = '\0';
6268           break;
6269         }
6270       *p++ = c;
6271     }
6272   lbp->len = p - buffer;
6273
6274   if (need_filebuf              /* we need filebuf for multi-line regexps */
6275       && chars_deleted > 0)     /* not at EOF */
6276     {
6277       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6278         {
6279           /* Expand filebuf. */
6280           filebuf.size *= 2;
6281           xrnew (filebuf.buffer, filebuf.size, char);
6282         }
6283       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6284       filebuf.len += lbp->len;
6285       filebuf.buffer[filebuf.len++] = '\n';
6286       filebuf.buffer[filebuf.len] = '\0';
6287     }
6288
6289   return lbp->len + chars_deleted;
6290 }
6291
6292 /*
6293  * Like readline_internal, above, but in addition try to match the
6294  * input line against relevant regular expressions and manage #line
6295  * directives.
6296  */
6297 static void
6298 readline (lbp, stream)
6299      linebuffer *lbp;
6300      FILE *stream;
6301 {
6302   long result;
6303
6304   linecharno = charno;          /* update global char number of line start */
6305   result = readline_internal (lbp, stream); /* read line */
6306   lineno += 1;                  /* increment global line number */
6307   charno += result;             /* increment global char number */
6308
6309   /* Honour #line directives. */
6310   if (!no_line_directive)
6311     {
6312       static bool discard_until_line_directive;
6313
6314       /* Check whether this is a #line directive. */
6315       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6316         {
6317           unsigned int lno;
6318           int start = 0;
6319
6320           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6321               && start > 0)     /* double quote character found */
6322             {
6323               char *endp = lbp->buffer + start;
6324
6325               while ((endp = etags_strchr (endp, '"')) != NULL
6326                      && endp[-1] == '\\')
6327                 endp++;
6328               if (endp != NULL)
6329                 /* Ok, this is a real #line directive.  Let's deal with it. */
6330                 {
6331                   char *taggedabsname;  /* absolute name of original file */
6332                   char *taggedfname;    /* name of original file as given */
6333                   char *name;           /* temp var */
6334
6335                   discard_until_line_directive = FALSE; /* found it */
6336                   name = lbp->buffer + start;
6337                   *endp = '\0';
6338                   canonicalize_filename (name); /* for DOS */
6339                   taggedabsname = absolute_filename (name, tagfiledir);
6340                   if (filename_is_absolute (name)
6341                       || filename_is_absolute (curfdp->infname))
6342                     taggedfname = savestr (taggedabsname);
6343                   else
6344                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6345
6346                   if (streq (curfdp->taggedfname, taggedfname))
6347                     /* The #line directive is only a line number change.  We
6348                        deal with this afterwards. */
6349                     free (taggedfname);
6350                   else
6351                     /* The tags following this #line directive should be
6352                        attributed to taggedfname.  In order to do this, set
6353                        curfdp accordingly. */
6354                     {
6355                       fdesc *fdp; /* file description pointer */
6356
6357                       /* Go look for a file description already set up for the
6358                          file indicated in the #line directive.  If there is
6359                          one, use it from now until the next #line
6360                          directive. */
6361                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6362                         if (streq (fdp->infname, curfdp->infname)
6363                             && streq (fdp->taggedfname, taggedfname))
6364                           /* If we remove the second test above (after the &&)
6365                              then all entries pertaining to the same file are
6366                              coalesced in the tags file.  If we use it, then
6367                              entries pertaining to the same file but generated
6368                              from different files (via #line directives) will
6369                              go into separate sections in the tags file.  These
6370                              alternatives look equivalent.  The first one
6371                              destroys some apparently useless information. */
6372                           {
6373                             curfdp = fdp;
6374                             free (taggedfname);
6375                             break;
6376                           }
6377                       /* Else, if we already tagged the real file, skip all
6378                          input lines until the next #line directive. */
6379                       if (fdp == NULL) /* not found */
6380                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6381                           if (streq (fdp->infabsname, taggedabsname))
6382                             {
6383                               discard_until_line_directive = TRUE;
6384                               free (taggedfname);
6385                               break;
6386                             }
6387                       /* Else create a new file description and use that from
6388                          now on, until the next #line directive. */
6389                       if (fdp == NULL) /* not found */
6390                         {
6391                           fdp = fdhead;
6392                           fdhead = xnew (1, fdesc);
6393                           *fdhead = *curfdp; /* copy curr. file description */
6394                           fdhead->next = fdp;
6395                           fdhead->infname = savestr (curfdp->infname);
6396                           fdhead->infabsname = savestr (curfdp->infabsname);
6397                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6398                           fdhead->taggedfname = taggedfname;
6399                           fdhead->usecharno = FALSE;
6400                           fdhead->prop = NULL;
6401                           fdhead->written = FALSE;
6402                           curfdp = fdhead;
6403                         }
6404                     }
6405                   free (taggedabsname);
6406                   lineno = lno - 1;
6407                   readline (lbp, stream);
6408                   return;
6409                 } /* if a real #line directive */
6410             } /* if #line is followed by a a number */
6411         } /* if line begins with "#line " */
6412
6413       /* If we are here, no #line directive was found. */
6414       if (discard_until_line_directive)
6415         {
6416           if (result > 0)
6417             {
6418               /* Do a tail recursion on ourselves, thus discarding the contents
6419                  of the line buffer. */
6420               readline (lbp, stream);
6421               return;
6422             }
6423           /* End of file. */
6424           discard_until_line_directive = FALSE;
6425           return;
6426         }
6427     } /* if #line directives should be considered */
6428
6429   {
6430     int match;
6431     regexp *rp;
6432     char *name;
6433
6434     /* Match against relevant regexps. */
6435     if (lbp->len > 0)
6436       for (rp = p_head; rp != NULL; rp = rp->p_next)
6437         {
6438           /* Only use generic regexps or those for the current language.
6439              Also do not use multiline regexps, which is the job of
6440              regex_tag_multiline. */
6441           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6442               || rp->multi_line)
6443             continue;
6444
6445           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6446           switch (match)
6447             {
6448             case -2:
6449               /* Some error. */
6450               if (!rp->error_signaled)
6451                 {
6452                   error ("regexp stack overflow while matching \"%s\"",
6453                          rp->pattern);
6454                   rp->error_signaled = TRUE;
6455                 }
6456               break;
6457             case -1:
6458               /* No match. */
6459               break;
6460             case 0:
6461               /* Empty string matched. */
6462               if (!rp->error_signaled)
6463                 {
6464                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6465                   rp->error_signaled = TRUE;
6466                 }
6467               break;
6468             default:
6469               /* Match occurred.  Construct a tag. */
6470               name = rp->name;
6471               if (name[0] == '\0')
6472                 name = NULL;
6473               else /* make a named tag */
6474                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6475               if (rp->force_explicit_name)
6476                 /* Force explicit tag name, if a name is there. */
6477                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6478               else
6479                 make_tag (name, strlen (name), TRUE,
6480                           lbp->buffer, match, lineno, linecharno);
6481               break;
6482             }
6483         }
6484   }
6485 }
6486
6487 \f
6488 /*
6489  * Return a pointer to a space of size strlen(cp)+1 allocated
6490  * with xnew where the string CP has been copied.
6491  */
6492 static char *
6493 savestr (cp)
6494      char *cp;
6495 {
6496   return savenstr (cp, strlen (cp));
6497 }
6498
6499 /*
6500  * Return a pointer to a space of size LEN+1 allocated with xnew where
6501  * the string CP has been copied for at most the first LEN characters.
6502  */
6503 static char *
6504 savenstr (cp, len)
6505      char *cp;
6506      int len;
6507 {
6508   register char *dp;
6509
6510   dp = xnew (len + 1, char);
6511   strncpy (dp, cp, len);
6512   dp[len] = '\0';
6513   return dp;
6514 }
6515
6516 /*
6517  * Return the ptr in sp at which the character c last
6518  * appears; NULL if not found
6519  *
6520  * Identical to POSIX strrchr, included for portability.
6521  */
6522 static char *
6523 etags_strrchr (sp, c)
6524      register const char *sp;
6525      register int c;
6526 {
6527   register const char *r;
6528
6529   r = NULL;
6530   do
6531     {
6532       if (*sp == c)
6533         r = sp;
6534   } while (*sp++);
6535   return (char *)r;
6536 }
6537
6538 /*
6539  * Return the ptr in sp at which the character c first
6540  * appears; NULL if not found
6541  *
6542  * Identical to POSIX strchr, included for portability.
6543  */
6544 static char *
6545 etags_strchr (sp, c)
6546      register const char *sp;
6547      register int c;
6548 {
6549   do
6550     {
6551       if (*sp == c)
6552         return (char *)sp;
6553     } while (*sp++);
6554   return NULL;
6555 }
6556
6557 /*
6558  * Compare two strings, ignoring case for alphabetic characters.
6559  *
6560  * Same as BSD's strcasecmp, included for portability.
6561  */
6562 static int
6563 etags_strcasecmp (s1, s2)
6564      register const char *s1;
6565      register const char *s2;
6566 {
6567   while (*s1 != '\0'
6568          && (ISALPHA (*s1) && ISALPHA (*s2)
6569              ? lowcase (*s1) == lowcase (*s2)
6570              : *s1 == *s2))
6571     s1++, s2++;
6572
6573   return (ISALPHA (*s1) && ISALPHA (*s2)
6574           ? lowcase (*s1) - lowcase (*s2)
6575           : *s1 - *s2);
6576 }
6577
6578 /*
6579  * Compare two strings, ignoring case for alphabetic characters.
6580  * Stop after a given number of characters
6581  *
6582  * Same as BSD's strncasecmp, included for portability.
6583  */
6584 static int
6585 etags_strncasecmp (s1, s2, n)
6586      register const char *s1;
6587      register const char *s2;
6588      register int n;
6589 {
6590   while (*s1 != '\0' && n-- > 0
6591          && (ISALPHA (*s1) && ISALPHA (*s2)
6592              ? lowcase (*s1) == lowcase (*s2)
6593              : *s1 == *s2))
6594     s1++, s2++;
6595
6596   if (n < 0)
6597     return 0;
6598   else
6599     return (ISALPHA (*s1) && ISALPHA (*s2)
6600             ? lowcase (*s1) - lowcase (*s2)
6601             : *s1 - *s2);
6602 }
6603
6604 /* Skip spaces (end of string is not space), return new pointer. */
6605 static char *
6606 skip_spaces (cp)
6607      char *cp;
6608 {
6609   while (iswhite (*cp))
6610     cp++;
6611   return cp;
6612 }
6613
6614 /* Skip non spaces, except end of string, return new pointer. */
6615 static char *
6616 skip_non_spaces (cp)
6617      char *cp;
6618 {
6619   while (*cp != '\0' && !iswhite (*cp))
6620     cp++;
6621   return cp;
6622 }
6623
6624 /* Print error message and exit.  */
6625 void
6626 fatal (s1, s2)
6627      char *s1, *s2;
6628 {
6629   error (s1, s2);
6630   exit (EXIT_FAILURE);
6631 }
6632
6633 static void
6634 pfatal (s1)
6635      char *s1;
6636 {
6637   perror (s1);
6638   exit (EXIT_FAILURE);
6639 }
6640
6641 static void
6642 suggest_asking_for_help ()
6643 {
6644   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6645            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6646   exit (EXIT_FAILURE);
6647 }
6648
6649 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6650 static void
6651 error (s1, s2)
6652      const char *s1, *s2;
6653 {
6654   fprintf (stderr, "%s: ", progname);
6655   fprintf (stderr, s1, s2);
6656   fprintf (stderr, "\n");
6657 }
6658
6659 /* Return a newly-allocated string whose contents
6660    concatenate those of s1, s2, s3.  */
6661 static char *
6662 concat (s1, s2, s3)
6663      char *s1, *s2, *s3;
6664 {
6665   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6666   char *result = xnew (len1 + len2 + len3 + 1, char);
6667
6668   strcpy (result, s1);
6669   strcpy (result + len1, s2);
6670   strcpy (result + len1 + len2, s3);
6671   result[len1 + len2 + len3] = '\0';
6672
6673   return result;
6674 }
6675
6676 \f
6677 /* Does the same work as the system V getcwd, but does not need to
6678    guess the buffer size in advance. */
6679 static char *
6680 etags_getcwd ()
6681 {
6682 #ifdef HAVE_GETCWD
6683   int bufsize = 200;
6684   char *path = xnew (bufsize, char);
6685
6686   while (getcwd (path, bufsize) == NULL)
6687     {
6688       if (errno != ERANGE)
6689         pfatal ("getcwd");
6690       bufsize *= 2;
6691       free (path);
6692       path = xnew (bufsize, char);
6693     }
6694
6695   canonicalize_filename (path);
6696   return path;
6697
6698 #else /* not HAVE_GETCWD */
6699 #if MSDOS
6700
6701   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6702
6703   getwd (path);
6704
6705   for (p = path; *p != '\0'; p++)
6706     if (*p == '\\')
6707       *p = '/';
6708     else
6709       *p = lowcase (*p);
6710
6711   return strdup (path);
6712 #else /* not MSDOS */
6713   linebuffer path;
6714   FILE *pipe;
6715
6716   linebuffer_init (&path);
6717   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6718   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6719     pfatal ("pwd");
6720   pclose (pipe);
6721
6722   return path.buffer;
6723 #endif /* not MSDOS */
6724 #endif /* not HAVE_GETCWD */
6725 }
6726
6727 /* Return a newly allocated string containing the file name of FILE
6728    relative to the absolute directory DIR (which should end with a slash). */
6729 static char *
6730 relative_filename (file, dir)
6731      char *file, *dir;
6732 {
6733   char *fp, *dp, *afn, *res;
6734   int i;
6735
6736   /* Find the common root of file and dir (with a trailing slash). */
6737   afn = absolute_filename (file, cwd);
6738   fp = afn;
6739   dp = dir;
6740   while (*fp++ == *dp++)
6741     continue;
6742   fp--, dp--;                   /* back to the first differing char */
6743 #ifdef DOS_NT
6744   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6745     return afn;
6746 #endif
6747   do                            /* look at the equal chars until '/' */
6748     fp--, dp--;
6749   while (*fp != '/');
6750
6751   /* Build a sequence of "../" strings for the resulting relative file name. */
6752   i = 0;
6753   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6754     i += 1;
6755   res = xnew (3*i + strlen (fp + 1) + 1, char);
6756   res[0] = '\0';
6757   while (i-- > 0)
6758     strcat (res, "../");
6759
6760   /* Add the file name relative to the common root of file and dir. */
6761   strcat (res, fp + 1);
6762   free (afn);
6763
6764   return res;
6765 }
6766
6767 /* Return a newly allocated string containing the absolute file name
6768    of FILE given DIR (which should end with a slash). */
6769 static char *
6770 absolute_filename (file, dir)
6771      char *file, *dir;
6772 {
6773   char *slashp, *cp, *res;
6774
6775   if (filename_is_absolute (file))
6776     res = savestr (file);
6777 #ifdef DOS_NT
6778   /* We don't support non-absolute file names with a drive
6779      letter, like `d:NAME' (it's too much hassle).  */
6780   else if (file[1] == ':')
6781     fatal ("%s: relative file names with drive letters not supported", file);
6782 #endif
6783   else
6784     res = concat (dir, file, "");
6785
6786   /* Delete the "/dirname/.." and "/." substrings. */
6787   slashp = etags_strchr (res, '/');
6788   while (slashp != NULL && slashp[0] != '\0')
6789     {
6790       if (slashp[1] == '.')
6791         {
6792           if (slashp[2] == '.'
6793               && (slashp[3] == '/' || slashp[3] == '\0'))
6794             {
6795               cp = slashp;
6796               do
6797                 cp--;
6798               while (cp >= res && !filename_is_absolute (cp));
6799               if (cp < res)
6800                 cp = slashp;    /* the absolute name begins with "/.." */
6801 #ifdef DOS_NT
6802               /* Under MSDOS and NT we get `d:/NAME' as absolute
6803                  file name, so the luser could say `d:/../NAME'.
6804                  We silently treat this as `d:/NAME'.  */
6805               else if (cp[0] != '/')
6806                 cp = slashp;
6807 #endif
6808               strcpy (cp, slashp + 3);
6809               slashp = cp;
6810               continue;
6811             }
6812           else if (slashp[2] == '/' || slashp[2] == '\0')
6813             {
6814               strcpy (slashp, slashp + 2);
6815               continue;
6816             }
6817         }
6818
6819       slashp = etags_strchr (slashp + 1, '/');
6820     }
6821
6822   if (res[0] == '\0')           /* just a safety net: should never happen */
6823     {
6824       free (res);
6825       return savestr ("/");
6826     }
6827   else
6828     return res;
6829 }
6830
6831 /* Return a newly allocated string containing the absolute
6832    file name of dir where FILE resides given DIR (which should
6833    end with a slash). */
6834 static char *
6835 absolute_dirname (file, dir)
6836      char *file, *dir;
6837 {
6838   char *slashp, *res;
6839   char save;
6840
6841   canonicalize_filename (file);
6842   slashp = etags_strrchr (file, '/');
6843   if (slashp == NULL)
6844     return savestr (dir);
6845   save = slashp[1];
6846   slashp[1] = '\0';
6847   res = absolute_filename (file, dir);
6848   slashp[1] = save;
6849
6850   return res;
6851 }
6852
6853 /* Whether the argument string is an absolute file name.  The argument
6854    string must have been canonicalized with canonicalize_filename. */
6855 static bool
6856 filename_is_absolute (fn)
6857      char *fn;
6858 {
6859   return (fn[0] == '/'
6860 #ifdef DOS_NT
6861           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6862 #endif
6863           );
6864 }
6865
6866 /* Translate backslashes into slashes.  Works in place. */
6867 static void
6868 canonicalize_filename (fn)
6869      register char *fn;
6870 {
6871 #ifdef DOS_NT
6872   /* Canonicalize drive letter case.  */
6873   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6874     fn[0] = upcase (fn[0]);
6875   /* Convert backslashes to slashes.  */
6876   for (; *fn != '\0'; fn++)
6877     if (*fn == '\\')
6878       *fn = '/';
6879 #else
6880   /* No action. */
6881   fn = NULL;                    /* shut up the compiler */
6882 #endif
6883 }
6884
6885 \f
6886 /* Initialize a linebuffer for use */
6887 static void
6888 linebuffer_init (lbp)
6889      linebuffer *lbp;
6890 {
6891   lbp->size = (DEBUG) ? 3 : 200;
6892   lbp->buffer = xnew (lbp->size, char);
6893   lbp->buffer[0] = '\0';
6894   lbp->len = 0;
6895 }
6896
6897 /* Set the minimum size of a string contained in a linebuffer. */
6898 static void
6899 linebuffer_setlen (lbp, toksize)
6900      linebuffer *lbp;
6901      int toksize;
6902 {
6903   while (lbp->size <= toksize)
6904     {
6905       lbp->size *= 2;
6906       xrnew (lbp->buffer, lbp->size, char);
6907     }
6908   lbp->len = toksize;
6909 }
6910
6911 /* Like malloc but get fatal error if memory is exhausted. */
6912 static PTR
6913 xmalloc (size)
6914      unsigned int size;
6915 {
6916   PTR result = (PTR) malloc (size);
6917   if (result == NULL)
6918     fatal ("virtual memory exhausted", (char *)NULL);
6919   return result;
6920 }
6921
6922 static PTR
6923 xrealloc (ptr, size)
6924      char *ptr;
6925      unsigned int size;
6926 {
6927   PTR result = (PTR) realloc (ptr, size);
6928   if (result == NULL)
6929     fatal ("virtual memory exhausted", (char *)NULL);
6930   return result;
6931 }
6932
6933 /*
6934  * Local Variables:
6935  * indent-tabs-mode: t
6936  * tab-width: 8
6937  * fill-column: 79
6938  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6939  * c-file-style: "gnu"
6940  * End:
6941  */
6942
6943 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6944    (do not change this comment) */
6945
6946 /* etags.c ends here */