lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
  32   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  33   Free Software Foundation, Inc.
  34
  35 This file is not considered part of GNU Emacs.
  36
  37 This program is free software; you can redistribute it and/or modify
  38 it under the terms of the GNU General Public License as published by
  39 the Free Software Foundation; either version 3, or (at your option)
  40 any later version.
  41
  42 This program is distributed in the hope that it will be useful,
  43 but WITHOUT ANY WARRANTY; without even the implied warranty of
  44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  45 GNU General Public License for more details.
  46
  47 You should have received a copy of the GNU General Public License
  48 along with this program; see the file COPYING.  If not, write to the
  49 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  50 Boston, MA 02110-1301, USA. */
  51
  52
  53 /* NB To comply with the above BSD license, copyright information is
  54 reproduced in etc/ETAGS.README.  That file should be updated when the
  55 above notices are.
  56
  57 To the best of our knowledge, this code was originally based on the
  58 ctags.c distributed with BSD4.2, which was copyrighted by the
  59 University of California, as described above. */
  60
  61
  62 /*
  63  * Authors:
  64  * 1983 Ctags originally by Ken Arnold.
  65  * 1984 Fortran added by Jim Kleckner.
  66  * 1984 Ed Pelegri-Llopart added C typedefs.
  67  * 1985 Emacs TAGS format by Richard Stallman.
  68  * 1989 Sam Kendall added C++.
  69  * 1992 Joseph B. Wells improved C and C++ parsing.
  70  * 1993 Francesco Potortì reorganised C and C++.
  71  * 1994 Line-by-line regexp tags by Tom Tromey.
  72  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  73  * 2002 #line directives by Francesco Potortì.
  74  *
  75  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  76  */
  77
  78 /*
  79  * If you want to add support for a new language, start by looking at the LUA
  80  * language, which is the simplest.  Alternatively, consider shipping a
  81  * configuration file containing regexp definitions for etags.
  82  */
  83
  84 char pot_etags_version[] = "@(#) pot revision number is 17.26";
  85
  86 #define TRUE    1
  87 #define FALSE   0
  88
  89 #ifdef DEBUG
  90 #  undef DEBUG
  91 #  define DEBUG TRUE
  92 #else
  93 #  define DEBUG  FALSE
  94 #  define NDEBUG                /* disable assert */
  95 #endif
  96
  97 #ifdef HAVE_CONFIG_H
  98 # include <config.h>
  99   /* On some systems, Emacs defines static as nothing for the sake
 100      of unexec.  We don't want that here since we don't use unexec. */
 101 # undef static
 102 # ifndef PTR                    /* for XEmacs */
 103 #   define PTR void *
 104 # endif
 105 # ifndef __P                    /* for XEmacs */
 106 #   define __P(args) args
 107 # endif
 108 #else  /* no config.h */
 109 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 110 #   define __P(args) args       /* use prototypes */
 111 #   define PTR void *           /* for generic pointers */
 112 # else /* not standard C */
 113 #   define __P(args) ()         /* no prototypes */
 114 #   define const                /* remove const for old compilers' sake */
 115 #   define PTR long *           /* don't use void* */
 116 # endif
 117 #endif /* !HAVE_CONFIG_H */
 118
 119 #ifndef _GNU_SOURCE
 120 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 121 #endif
 122
 123 /* WIN32_NATIVE is for XEmacs.
 124    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 125 #ifdef WIN32_NATIVE
 126 # undef MSDOS
 127 # undef  WINDOWSNT
 128 # define WINDOWSNT
 129 #endif /* WIN32_NATIVE */
 130
 131 #ifdef MSDOS
 132 # undef MSDOS
 133 # define MSDOS TRUE
 134 # include <fcntl.h>
 135 # include <sys/param.h>
 136 # include <io.h>
 137 # ifndef HAVE_CONFIG_H
 138 #   define DOS_NT
 139 #   include <sys/config.h>
 140 # endif
 141 #else
 142 # define MSDOS FALSE
 143 #endif /* MSDOS */
 144
 145 #ifdef WINDOWSNT
 146 # include <stdlib.h>
 147 # include <fcntl.h>
 148 # include <string.h>
 149 # include <direct.h>
 150 # include <io.h>
 151 # define MAXPATHLEN _MAX_PATH
 152 # undef HAVE_NTGUI
 153 # undef  DOS_NT
 154 # define DOS_NT
 155 # ifndef HAVE_GETCWD
 156 #   define HAVE_GETCWD
 157 # endif /* undef HAVE_GETCWD */
 158 #else /* not WINDOWSNT */
 159 # ifdef STDC_HEADERS
 160 #  include <stdlib.h>
 161 #  include <string.h>
 162 # else /* no standard C headers */
 163     extern char *getenv ();
 164 #  ifdef VMS
 165 #   define EXIT_SUCCESS 1
 166 #   define EXIT_FAILURE 0
 167 #  else /* no VMS */
 168 #   define EXIT_SUCCESS 0
 169 #   define EXIT_FAILURE 1
 170 #  endif
 171 # endif
 172 #endif /* !WINDOWSNT */
 173
 174 #ifdef HAVE_UNISTD_H
 175 # include <unistd.h>
 176 #else
 177 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 178     extern char *getcwd (char *buf, size_t size);
 179 # endif
 180 #endif /* HAVE_UNISTD_H */
 181
 182 #include <stdio.h>
 183 #include <ctype.h>
 184 #include <errno.h>
 185 #ifndef errno
 186   extern int errno;
 187 #endif
 188 #include <sys/types.h>
 189 #include <sys/stat.h>
 190
 191 #include <assert.h>
 192 #ifdef NDEBUG
 193 # undef  assert                 /* some systems have a buggy assert.h */
 194 # define assert(x) ((void) 0)
 195 #endif
 196
 197 #if !defined (S_ISREG) && defined (S_IFREG)
 198 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 199 #endif
 200
 201 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 202 # define NO_LONG_OPTIONS TRUE
 203 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 204   extern char *optarg;
 205   extern int optind, opterr;
 206 #else
 207 # define NO_LONG_OPTIONS FALSE
 208 # include <getopt.h>
 209 #endif /* NO_LONG_OPTIONS */
 210
 211 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 212 # ifdef __CYGWIN__              /* compiling on Cygwin */
 213                              !!! NOTICE !!!
 214  the regex.h distributed with Cygwin is not compatible with etags, alas!
 215 If you want regular expression support, you should delete this notice and
 216               arrange to use the GNU regex.h and regex.c.
 217 # endif
 218 #endif
 219 #include <regex.h>
 220
 221 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 222  Leave it undefined to make the program "etags", which makes emacs-style
 223  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 224 #ifdef CTAGS
 225 # undef  CTAGS
 226 # define CTAGS TRUE
 227 #else
 228 # define CTAGS FALSE
 229 #endif
 230
 231 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 232 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 233 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 234 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 235
 236 #define CHARS 256               /* 2^sizeof(char) */
 237 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 238 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 239 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 240 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 241 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 242 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 243
 244 #define ISALNUM(c)      isalnum (CHAR(c))
 245 #define ISALPHA(c)      isalpha (CHAR(c))
 246 #define ISDIGIT(c)      isdigit (CHAR(c))
 247 #define ISLOWER(c)      islower (CHAR(c))
 248
 249 #define lowcase(c)      tolower (CHAR(c))
 250 #define upcase(c)       toupper (CHAR(c))
 251
 252
 253 /*
 254  *      xnew, xrnew -- allocate, reallocate storage
 255  *
 256  * SYNOPSIS:    Type *xnew (int n, Type);
 257  *              void xrnew (OldPointer, int n, Type);
 258  */
 259 #if DEBUG
 260 # include "chkmalloc.h"
 261 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 262                                                   (n) * sizeof (Type)))
 263 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 264                                         (char *) (op), (n) * sizeof (Type)))
 265 #else
 266 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 267 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 268                                         (char *) (op), (n) * sizeof (Type)))
 269 #endif
 270
 271 #define bool int
 272
 273 typedef void Lang_function __P((FILE *));
 274
 275 typedef struct
 276 {
 277   char *suffix;                 /* file name suffix for this compressor */
 278   char *command;                /* takes one arg and decompresses to stdout */
 279 } compressor;
 280
 281 typedef struct
 282 {
 283   char *name;                   /* language name */
 284   char *help;                   /* detailed help for the language */
 285   Lang_function *function;      /* parse function */
 286   char **suffixes;              /* name suffixes of this language's files */
 287   char **filenames;             /* names of this language's files */
 288   char **interpreters;          /* interpreters for this language */
 289   bool metasource;              /* source used to generate other sources */
 290 } language;
 291
 292 typedef struct fdesc
 293 {
 294   struct fdesc *next;           /* for the linked list */
 295   char *infname;                /* uncompressed input file name */
 296   char *infabsname;             /* absolute uncompressed input file name */
 297   char *infabsdir;              /* absolute dir of input file */
 298   char *taggedfname;            /* file name to write in tagfile */
 299   language *lang;               /* language of file */
 300   char *prop;                   /* file properties to write in tagfile */
 301   bool usecharno;               /* etags tags shall contain char number */
 302   bool written;                 /* entry written in the tags file */
 303 } fdesc;
 304
 305 typedef struct node_st
 306 {                               /* sorting structure */
 307   struct node_st *left, *right; /* left and right sons */
 308   fdesc *fdp;                   /* description of file to whom tag belongs */
 309   char *name;                   /* tag name */
 310   char *regex;                  /* search regexp */
 311   bool valid;                   /* write this tag on the tag file */
 312   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 313   bool been_warned;             /* warning already given for duplicated tag */
 314   int lno;                      /* line number tag is on */
 315   long cno;                     /* character number line starts on */
 316 } node;
 317
 318 /*
 319  * A `linebuffer' is a structure which holds a line of text.
 320  * `readline_internal' reads a line from a stream into a linebuffer
 321  * and works regardless of the length of the line.
 322  * SIZE is the size of BUFFER, LEN is the length of the string in
 323  * BUFFER after readline reads it.
 324  */
 325 typedef struct
 326 {
 327   long size;
 328   int len;
 329   char *buffer;
 330 } linebuffer;
 331
 332 /* Used to support mixing of --lang and file names. */
 333 typedef struct
 334 {
 335   enum {
 336     at_language,                /* a language specification */
 337     at_regexp,                  /* a regular expression */
 338     at_filename,                /* a file name */
 339     at_stdin,                   /* read from stdin here */
 340     at_end                      /* stop parsing the list */
 341   } arg_type;                   /* argument type */
 342   language *lang;               /* language associated with the argument */
 343   char *what;                   /* the argument itself */
 344 } argument;
 345
 346 /* Structure defining a regular expression. */
 347 typedef struct regexp
 348 {
 349   struct regexp *p_next;        /* pointer to next in list */
 350   language *lang;               /* if set, use only for this language */
 351   char *pattern;                /* the regexp pattern */
 352   char *name;                   /* tag name */
 353   struct re_pattern_buffer *pat; /* the compiled pattern */
 354   struct re_registers regs;     /* re registers */
 355   bool error_signaled;          /* already signaled for this regexp */
 356   bool force_explicit_name;     /* do not allow implict tag name */
 357   bool ignore_case;             /* ignore case when matching */
 358   bool multi_line;              /* do a multi-line match on the whole file */
 359 } regexp;
 360
 361
 362 /* Many compilers barf on this:
 363         Lang_function Ada_funcs;
 364    so let's write it this way */
 365 static void Ada_funcs __P((FILE *));
 366 static void Asm_labels __P((FILE *));
 367 static void C_entries __P((int c_ext, FILE *));
 368 static void default_C_entries __P((FILE *));
 369 static void plain_C_entries __P((FILE *));
 370 static void Cjava_entries __P((FILE *));
 371 static void Cobol_paragraphs __P((FILE *));
 372 static void Cplusplus_entries __P((FILE *));
 373 static void Cstar_entries __P((FILE *));
 374 static void Erlang_functions __P((FILE *));
 375 static void Forth_words __P((FILE *));
 376 static void Fortran_functions __P((FILE *));
 377 static void HTML_labels __P((FILE *));
 378 static void Lisp_functions __P((FILE *));
 379 static void Lua_functions __P((FILE *));
 380 static void Makefile_targets __P((FILE *));
 381 static void Pascal_functions __P((FILE *));
 382 static void Perl_functions __P((FILE *));
 383 static void PHP_functions __P((FILE *));
 384 static void PS_functions __P((FILE *));
 385 static void Prolog_functions __P((FILE *));
 386 static void Python_functions __P((FILE *));
 387 static void Scheme_functions __P((FILE *));
 388 static void TeX_commands __P((FILE *));
 389 static void Texinfo_nodes __P((FILE *));
 390 static void Yacc_entries __P((FILE *));
 391 static void just_read_file __P((FILE *));
 392
 393 static void print_language_names __P((void));
 394 static void print_version __P((void));
 395 static void print_help __P((argument *));
 396 int main __P((int, char **));
 397
 398 static compressor *get_compressor_from_suffix __P((char *, char **));
 399 static language *get_language_from_langname __P((const char *));
 400 static language *get_language_from_interpreter __P((char *));
 401 static language *get_language_from_filename __P((char *, bool));
 402 static void readline __P((linebuffer *, FILE *));
 403 static long readline_internal __P((linebuffer *, FILE *));
 404 static bool nocase_tail __P((char *));
 405 static void get_tag __P((char *, char **));
 406
 407 static void analyse_regex __P((char *));
 408 static void free_regexps __P((void));
 409 static void regex_tag_multiline __P((void));
 410 static void error __P((const char *, const char *));
 411 static void suggest_asking_for_help __P((void));
 412 void fatal __P((char *, char *));
 413 static void pfatal __P((char *));
 414 static void add_node __P((node *, node **));
 415
 416 static void init __P((void));
 417 static void process_file_name __P((char *, language *));
 418 static void process_file __P((FILE *, char *, language *));
 419 static void find_entries __P((FILE *));
 420 static void free_tree __P((node *));
 421 static void free_fdesc __P((fdesc *));
 422 static void pfnote __P((char *, bool, char *, int, int, long));
 423 static void make_tag __P((char *, int, bool, char *, int, int, long));
 424 static void invalidate_nodes __P((fdesc *, node **));
 425 static void put_entries __P((node *));
 426
 427 static char *concat __P((char *, char *, char *));
 428 static char *skip_spaces __P((char *));
 429 static char *skip_non_spaces __P((char *));
 430 static char *savenstr __P((char *, int));
 431 static char *savestr __P((char *));
 432 static char *etags_strchr __P((const char *, int));
 433 static char *etags_strrchr __P((const char *, int));
 434 static int etags_strcasecmp __P((const char *, const char *));
 435 static int etags_strncasecmp __P((const char *, const char *, int));
 436 static char *etags_getcwd __P((void));
 437 static char *relative_filename __P((char *, char *));
 438 static char *absolute_filename __P((char *, char *));
 439 static char *absolute_dirname __P((char *, char *));
 440 static bool filename_is_absolute __P((char *f));
 441 static void canonicalize_filename __P((char *));
 442 static void linebuffer_init __P((linebuffer *));
 443 static void linebuffer_setlen __P((linebuffer *, int));
 444 static PTR xmalloc __P((unsigned int));
 445 static PTR xrealloc __P((char *, unsigned int));
 446
 447 \f
 448 static char searchar = '/';     /* use /.../ searches */
 449
 450 static char *tagfile;           /* output file */
 451 static char *progname;          /* name this program was invoked with */
 452 static char *cwd;               /* current working directory */
 453 static char *tagfiledir;        /* directory of tagfile */
 454 static FILE *tagf;              /* ioptr for tags file */
 455
 456 static fdesc *fdhead;           /* head of file description list */
 457 static fdesc *curfdp;           /* current file description */
 458 static int lineno;              /* line number of current line */
 459 static long charno;             /* current character number */
 460 static long linecharno;         /* charno of start of current line */
 461 static char *dbp;               /* pointer to start of current tag */
 462
 463 static const int invalidcharno = -1;
 464
 465 static node *nodehead;          /* the head of the binary tree of tags */
 466 static node *last_node;         /* the last node created */
 467
 468 static linebuffer lb;           /* the current line */
 469 static linebuffer filebuf;      /* a buffer containing the whole file */
 470 static linebuffer token_name;   /* a buffer containing a tag name */
 471
 472 /* boolean "functions" (see init)       */
 473 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 474 static char
 475   /* white chars */
 476   *white = " \f\t\n\r\v",
 477   /* not in a name */
 478   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 479   /* token ending chars */
 480   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 481   /* token starting chars */
 482   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 483   /* valid in-token chars */
 484   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 485
 486 static bool append_to_tagfile;  /* -a: append to tags */
 487 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 488 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 489 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 490                                 /* 0 struct/enum/union decls, and C++ */
 491                                 /* member functions. */
 492 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 493                                 /* constants and variables. */
 494                                 /* -D: opposite of -d.  Default under ctags. */
 495 static bool globals;            /* create tags for global variables */
 496 static bool members;            /* create tags for C member variables */
 497 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 498 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 499 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 500 static bool update;             /* -u: update tags */
 501 static bool vgrind_style;       /* -v: create vgrind style index output */
 502 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 503 static bool cxref_style;        /* -x: create cxref style output */
 504 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 505 static bool ignoreindent;       /* -I: ignore indentation in C */
 506 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 507
 508 /* STDIN is defined in LynxOS system headers */
 509 #ifdef STDIN
 510 # undef STDIN
 511 #endif
 512
 513 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 514 static bool parsing_stdin;      /* --parse-stdin used */
 515
 516 static regexp *p_head;          /* list of all regexps */
 517 static bool need_filebuf;       /* some regexes are multi-line */
 518
 519 static struct option longopts[] =
 520 {
 521   { "append",             no_argument,       NULL,               'a'   },
 522   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 523   { "c++",                no_argument,       NULL,               'C'   },
 524   { "declarations",       no_argument,       &declarations,      TRUE  },
 525   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 526   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 527   { "help",               no_argument,       NULL,               'h'   },
 528   { "help",               no_argument,       NULL,               'H'   },
 529   { "ignore-indentation", no_argument,       NULL,               'I'   },
 530   { "language",           required_argument, NULL,               'l'   },
 531   { "members",            no_argument,       &members,           TRUE  },
 532   { "no-members",         no_argument,       &members,           FALSE },
 533   { "output",             required_argument, NULL,               'o'   },
 534   { "regex",              required_argument, NULL,               'r'   },
 535   { "no-regex",           no_argument,       NULL,               'R'   },
 536   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 537   { "parse-stdin",        required_argument, NULL,               STDIN },
 538   { "version",            no_argument,       NULL,               'V'   },
 539
 540 #if CTAGS /* Ctags options */
 541   { "backward-search",    no_argument,       NULL,               'B'   },
 542   { "cxref",              no_argument,       NULL,               'x'   },
 543   { "defines",            no_argument,       NULL,               'd'   },
 544   { "globals",            no_argument,       &globals,           TRUE  },
 545   { "typedefs",           no_argument,       NULL,               't'   },
 546   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 547   { "update",             no_argument,       NULL,               'u'   },
 548   { "vgrind",             no_argument,       NULL,               'v'   },
 549   { "no-warn",            no_argument,       NULL,               'w'   },
 550
 551 #else /* Etags options */
 552   { "no-defines",         no_argument,       NULL,               'D'   },
 553   { "no-globals",         no_argument,       &globals,           FALSE },
 554   { "include",            required_argument, NULL,               'i'   },
 555 #endif
 556   { NULL }
 557 };
 558
 559 static compressor compressors[] =
 560 {
 561   { "z", "gzip -d -c"},
 562   { "Z", "gzip -d -c"},
 563   { "gz", "gzip -d -c"},
 564   { "GZ", "gzip -d -c"},
 565   { "bz2", "bzip2 -d -c" },
 566   { NULL }
 567 };
 568
 569 /*
 570  * Language stuff.
 571  */
 572
 573 /* Ada code */
 574 static char *Ada_suffixes [] =
 575   { "ads", "adb", "ada", NULL };
 576 static char Ada_help [] =
 577 "In Ada code, functions, procedures, packages, tasks and types are\n\
 578 tags.  Use the `--packages-only' option to create tags for\n\
 579 packages only.\n\
 580 Ada tag names have suffixes indicating the type of entity:\n\
 581         Entity type:    Qualifier:\n\
 582         ------------    ----------\n\
 583         function        /f\n\
 584         procedure       /p\n\
 585         package spec    /s\n\
 586         package body    /b\n\
 587         type            /t\n\
 588         task            /k\n\
 589 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 590 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 591 will just search for any tag `bidule'.";
 592
 593 /* Assembly code */
 594 static char *Asm_suffixes [] =
 595   { "a",        /* Unix assembler */
 596     "asm", /* Microcontroller assembly */
 597     "def", /* BSO/Tasking definition includes  */
 598     "inc", /* Microcontroller include files */
 599     "ins", /* Microcontroller include files */
 600     "s", "sa", /* Unix assembler */
 601     "S",   /* cpp-processed Unix assembler */
 602     "src", /* BSO/Tasking C compiler output */
 603     NULL
 604   };
 605 static char Asm_help [] =
 606 "In assembler code, labels appearing at the beginning of a line,\n\
 607 followed by a colon, are tags.";
 608
 609
 610 /* Note that .c and .h can be considered C++, if the --c++ flag was
 611    given, or if the `class' or `template' keywords are met inside the file.
 612    That is why default_C_entries is called for these. */
 613 static char *default_C_suffixes [] =
 614   { "c", "h", NULL };
 615 #if CTAGS                               /* C help for Ctags */
 616 static char default_C_help [] =
 617 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 618 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 619 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 620 Use --globals to tag global variables.\n\
 621 You can tag function declarations and external variables by\n\
 622 using `--declarations', and struct members by using `--members'.";
 623 #else                                   /* C help for Etags */
 624 static char default_C_help [] =
 625 "In C code, any C function or typedef is a tag, and so are\n\
 626 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 627 definitions and `enum' constants are tags unless you specify\n\
 628 `--no-defines'.  Global variables are tags unless you specify\n\
 629 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 630 can make the tags table file much smaller.\n\
 631 You can tag function declarations and external variables by\n\
 632 using `--declarations', and struct members by using `--members'.";
 633 #endif  /* C help for Ctags and Etags */
 634
 635 static char *Cplusplus_suffixes [] =
 636   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 637     "M",                        /* Objective C++ */
 638     "pdb",                      /* Postscript with C syntax */
 639     NULL };
 640 static char Cplusplus_help [] =
 641 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 642 --help --lang=c --lang=c++ for full help.)\n\
 643 In addition to C tags, member functions are also recognized.  Member\n\
 644 variables are also recognized if you use the `--members' option.\n\
 645 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 646 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 647 `operator+'.";
 648
 649 static char *Cjava_suffixes [] =
 650   { "java", NULL };
 651 static char Cjava_help [] =
 652 "In Java code, all the tags constructs of C and C++ code are\n\
 653 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 654
 655
 656 static char *Cobol_suffixes [] =
 657   { "COB", "cob", NULL };
 658 static char Cobol_help [] =
 659 "In Cobol code, tags are paragraph names; that is, any word\n\
 660 starting in column 8 and followed by a period.";
 661
 662 static char *Cstar_suffixes [] =
 663   { "cs", "hs", NULL };
 664
 665 static char *Erlang_suffixes [] =
 666   { "erl", "hrl", NULL };
 667 static char Erlang_help [] =
 668 "In Erlang code, the tags are the functions, records and macros\n\
 669 defined in the file.";
 670
 671 char *Forth_suffixes [] =
 672   { "fth", "tok", NULL };
 673 static char Forth_help [] =
 674 "In Forth code, tags are words defined by `:',\n\
 675 constant, code, create, defer, value, variable, buffer:, field.";
 676
 677 static char *Fortran_suffixes [] =
 678   { "F", "f", "f90", "for", NULL };
 679 static char Fortran_help [] =
 680 "In Fortran code, functions, subroutines and block data are tags.";
 681
 682 static char *HTML_suffixes [] =
 683   { "htm", "html", "shtml", NULL };
 684 static char HTML_help [] =
 685 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 686 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 687 occurrences of `id='.";
 688
 689 static char *Lisp_suffixes [] =
 690   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 691 static char Lisp_help [] =
 692 "In Lisp code, any function defined with `defun', any variable\n\
 693 defined with `defvar' or `defconst', and in general the first\n\
 694 argument of any expression that starts with `(def' in column zero\n\
 695 is a tag.";
 696
 697 static char *Lua_suffixes [] =
 698   { "lua", "LUA", NULL };
 699 static char Lua_help [] =
 700 "In Lua scripts, all functions are tags.";
 701
 702 static char *Makefile_filenames [] =
 703   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 704 static char Makefile_help [] =
 705 "In makefiles, targets are tags; additionally, variables are tags\n\
 706 unless you specify `--no-globals'.";
 707
 708 static char *Objc_suffixes [] =
 709   { "lm",                       /* Objective lex file */
 710     "m",                        /* Objective C file */
 711      NULL };
 712 static char Objc_help [] =
 713 "In Objective C code, tags include Objective C definitions for classes,\n\
 714 class categories, methods and protocols.  Tags for variables and\n\
 715 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 716 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 717
 718 static char *Pascal_suffixes [] =
 719   { "p", "pas", NULL };
 720 static char Pascal_help [] =
 721 "In Pascal code, the tags are the functions and procedures defined\n\
 722 in the file.";
 723 /* " // this is for working around an Emacs highlighting bug... */
 724
 725 static char *Perl_suffixes [] =
 726   { "pl", "pm", NULL };
 727 static char *Perl_interpreters [] =
 728   { "perl", "@PERL@", NULL };
 729 static char Perl_help [] =
 730 "In Perl code, the tags are the packages, subroutines and variables\n\
 731 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 732 `--globals' if you want to tag global variables.  Tags for\n\
 733 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 734 defined in the default package is `main::SUB'.";
 735
 736 static char *PHP_suffixes [] =
 737   { "php", "php3", "php4", NULL };
 738 static char PHP_help [] =
 739 "In PHP code, tags are functions, classes and defines.  When using\n\
 740 the `--members' option, vars are tags too.";
 741
 742 static char *plain_C_suffixes [] =
 743   { "pc",                       /* Pro*C file */
 744      NULL };
 745
 746 static char *PS_suffixes [] =
 747   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 748 static char PS_help [] =
 749 "In PostScript code, the tags are the functions.";
 750
 751 static char *Prolog_suffixes [] =
 752   { "prolog", NULL };
 753 static char Prolog_help [] =
 754 "In Prolog code, tags are predicates and rules at the beginning of\n\
 755 line.";
 756
 757 static char *Python_suffixes [] =
 758   { "py", NULL };
 759 static char Python_help [] =
 760 "In Python code, `def' or `class' at the beginning of a line\n\
 761 generate a tag.";
 762
 763 /* Can't do the `SCM' or `scm' prefix with a version number. */
 764 static char *Scheme_suffixes [] =
 765   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 766 static char Scheme_help [] =
 767 "In Scheme code, tags include anything defined with `def' or with a\n\
 768 construct whose name starts with `def'.  They also include\n\
 769 variables set with `set!' at top level in the file.";
 770
 771 static char *TeX_suffixes [] =
 772   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 773 static char TeX_help [] =
 774 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 775 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 776 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 777 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 778 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 779 \n\
 780 Other commands can be specified by setting the environment variable\n\
 781 `TEXTAGS' to a colon-separated list like, for example,\n\
 782      TEXTAGS=\"mycommand:myothercommand\".";
 783
 784
 785 static char *Texinfo_suffixes [] =
 786   { "texi", "texinfo", "txi", NULL };
 787 static char Texinfo_help [] =
 788 "for texinfo files, lines starting with @node are tagged.";
 789
 790 static char *Yacc_suffixes [] =
 791   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 792 static char Yacc_help [] =
 793 "In Bison or Yacc input files, each rule defines as a tag the\n\
 794 nonterminal it constructs.  The portions of the file that contain\n\
 795 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 796 for full help).";
 797
 798 static char auto_help [] =
 799 "`auto' is not a real language, it indicates to use\n\
 800 a default language for files base on file name suffix and file contents.";
 801
 802 static char none_help [] =
 803 "`none' is not a real language, it indicates to only do\n\
 804 regexp processing on files.";
 805
 806 static char no_lang_help [] =
 807 "No detailed help available for this language.";
 808
 809
 810 /*
 811  * Table of languages.
 812  *
 813  * It is ok for a given function to be listed under more than one
 814  * name.  I just didn't.
 815  */
 816
 817 static language lang_names [] =
 818 {
 819   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 820   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 821   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 822   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 823   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 824   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 825   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 826   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 827   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 828   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 829   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 830   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 831   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 832   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 833   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 834   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 835   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 836   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 837   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 838   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 839   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 840   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 841   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 842   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 843   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 844   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 845   { "auto",      auto_help },                      /* default guessing scheme */
 846   { "none",      none_help,      just_read_file }, /* regexp matching only */
 847   { NULL }                /* end of list */
 848 };
 849
 850 \f
 851 static void
 852 print_language_names ()
 853 {
 854   language *lang;
 855   char **name, **ext;
 856
 857   puts ("\nThese are the currently supported languages, along with the\n\
 858 default file names and dot suffixes:");
 859   for (lang = lang_names; lang->name != NULL; lang++)
 860     {
 861       printf ("  %-*s", 10, lang->name);
 862       if (lang->filenames != NULL)
 863         for (name = lang->filenames; *name != NULL; name++)
 864           printf (" %s", *name);
 865       if (lang->suffixes != NULL)
 866         for (ext = lang->suffixes; *ext != NULL; ext++)
 867           printf (" .%s", *ext);
 868       puts ("");
 869     }
 870   puts ("where `auto' means use default language for files based on file\n\
 871 name suffix, and `none' means only do regexp processing on files.\n\
 872 If no language is specified and no matching suffix is found,\n\
 873 the first line of the file is read for a sharp-bang (#!) sequence\n\
 874 followed by the name of an interpreter.  If no such sequence is found,\n\
 875 Fortran is tried first; if no tags are found, C is tried next.\n\
 876 When parsing any C file, a \"class\" or \"template\" keyword\n\
 877 switches to C++.");
 878   puts ("Compressed files are supported using gzip and bzip2.\n\
 879 \n\
 880 For detailed help on a given language use, for example,\n\
 881 etags --help --lang=ada.");
 882 }
 883
 884 #ifndef EMACS_NAME
 885 # define EMACS_NAME "standalone"
 886 #endif
 887 #ifndef VERSION
 888 # define VERSION "17.26"
 889 #endif
 890 static void
 891 print_version ()
 892 {
 893   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 894   puts ("Copyright (C) 2008 Free Software Foundation, Inc.");
 895   puts ("This program is distributed under the terms in ETAGS.README");
 896
 897   exit (EXIT_SUCCESS);
 898 }
 899
 900 static void
 901 print_help (argbuffer)
 902      argument *argbuffer;
 903 {
 904   bool help_for_lang = FALSE;
 905
 906   for (; argbuffer->arg_type != at_end; argbuffer++)
 907     if (argbuffer->arg_type == at_language)
 908       {
 909         if (help_for_lang)
 910           puts ("");
 911         puts (argbuffer->lang->help);
 912         help_for_lang = TRUE;
 913       }
 914
 915   if (help_for_lang)
 916     exit (EXIT_SUCCESS);
 917
 918   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 919 \n\
 920 These are the options accepted by %s.\n", progname, progname);
 921   if (NO_LONG_OPTIONS)
 922     puts ("WARNING: long option names do not work with this executable,\n\
 923 as it is not linked with GNU getopt.");
 924   else
 925     puts ("You may use unambiguous abbreviations for the long option names.");
 926   puts ("  A - as file name means read names from stdin (one per line).\n\
 927 Absolute names are stored in the output file as they are.\n\
 928 Relative ones are stored relative to the output file's directory.\n");
 929
 930   puts ("-a, --append\n\
 931         Append tag entries to existing tags file.");
 932
 933   puts ("--packages-only\n\
 934         For Ada files, only generate tags for packages.");
 935
 936   if (CTAGS)
 937     puts ("-B, --backward-search\n\
 938         Write the search commands for the tag entries using '?', the\n\
 939         backward-search command instead of '/', the forward-search command.");
 940
 941   /* This option is mostly obsolete, because etags can now automatically
 942      detect C++.  Retained for backward compatibility and for debugging and
 943      experimentation.  In principle, we could want to tag as C++ even
 944      before any "class" or "template" keyword.
 945   puts ("-C, --c++\n\
 946         Treat files whose name suffix defaults to C language as C++ files.");
 947   */
 948
 949   puts ("--declarations\n\
 950         In C and derived languages, create tags for function declarations,");
 951   if (CTAGS)
 952     puts ("\tand create tags for extern variables if --globals is used.");
 953   else
 954     puts
 955       ("\tand create tags for extern variables unless --no-globals is used.");
 956
 957   if (CTAGS)
 958     puts ("-d, --defines\n\
 959         Create tag entries for C #define constants and enum constants, too.");
 960   else
 961     puts ("-D, --no-defines\n\
 962         Don't create tag entries for C #define constants and enum constants.\n\
 963         This makes the tags file smaller.");
 964
 965   if (!CTAGS)
 966     puts ("-i FILE, --include=FILE\n\
 967         Include a note in tag file indicating that, when searching for\n\
 968         a tag, one should also consult the tags file FILE after\n\
 969         checking the current file.");
 970
 971   puts ("-l LANG, --language=LANG\n\
 972         Force the following files to be considered as written in the\n\
 973         named language up to the next --language=LANG option.");
 974
 975   if (CTAGS)
 976     puts ("--globals\n\
 977         Create tag entries for global variables in some languages.");
 978   else
 979     puts ("--no-globals\n\
 980         Do not create tag entries for global variables in some\n\
 981         languages.  This makes the tags file smaller.");
 982   puts ("--members\n\
 983         Create tag entries for members of structures in some languages.");
 984
 985   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 986         Make a tag for each line matching a regular expression pattern\n\
 987         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 988         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 989         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 990         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 991   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 992         For example Tcl named tags can be created with:\n\
 993           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 994         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 995         `m' means to allow multi-line matches, `s' implies `m' and\n\
 996         causes dot to match any character, including newline.");
 997   puts ("-R, --no-regex\n\
 998         Don't create tags from regexps for the following files.");
 999   puts ("-I, --ignore-indentation\n\
1000         In C and C++ do not assume that a closing brace in the first\n\
1001         column is the final brace of a function or structure definition.");
1002   puts ("-o FILE, --output=FILE\n\
1003         Write the tags to FILE.");
1004   puts ("--parse-stdin=NAME\n\
1005         Read from standard input and record tags as belonging to file NAME.");
1006
1007   if (CTAGS)
1008     {
1009       puts ("-t, --typedefs\n\
1010         Generate tag entries for C and Ada typedefs.");
1011       puts ("-T, --typedefs-and-c++\n\
1012         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1013         and C++ member functions.");
1014     }
1015
1016   if (CTAGS)
1017     puts ("-u, --update\n\
1018         Update the tag entries for the given files, leaving tag\n\
1019         entries for other files in place.  Currently, this is\n\
1020         implemented by deleting the existing entries for the given\n\
1021         files and then rewriting the new entries at the end of the\n\
1022         tags file.  It is often faster to simply rebuild the entire\n\
1023         tag file than to use this.");
1024
1025   if (CTAGS)
1026     {
1027       puts ("-v, --vgrind\n\
1028         Print on the standard output an index of items intended for\n\
1029         human consumption, similar to the output of vgrind.  The index\n\
1030         is sorted, and gives the page number of each item.");
1031 # if PRINT_UNDOCUMENTED_OPTIONS_HELP
1032       puts ("-w, --no-duplicates\n\
1033         Do not create duplicate tag entries, for compatibility with\n\
1034         traditional ctags.");
1035       puts ("-w, --no-warn\n\
1036         Suppress warning messages about duplicate tag entries.");
1037 # endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
1038       puts ("-x, --cxref\n\
1039         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1040         The output uses line numbers instead of page numbers, but\n\
1041         beyond that the differences are cosmetic; try both to see\n\
1042         which you like.");
1043     }
1044
1045   puts ("-V, --version\n\
1046         Print the version of the program.\n\
1047 -h, --help\n\
1048         Print this help message.\n\
1049         Followed by one or more `--language' options prints detailed\n\
1050         help about tag generation for the specified languages.");
1051
1052   print_language_names ();
1053
1054   puts ("");
1055   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1056
1057   exit (EXIT_SUCCESS);
1058 }
1059
1060 \f
1061 #ifdef VMS                      /* VMS specific functions */
1062
1063 #define EOS     '\0'
1064
1065 /* This is a BUG!  ANY arbitrary limit is a BUG!
1066    Won't someone please fix this?  */
1067 #define MAX_FILE_SPEC_LEN       255
1068 typedef struct  {
1069   short   curlen;
1070   char    body[MAX_FILE_SPEC_LEN + 1];
1071 } vspec;
1072
1073 /*
1074  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1075  returning in each successive call the next file name matching the input
1076  spec. The function expects that each in_spec passed
1077  to it will be processed to completion; in particular, up to and
1078  including the call following that in which the last matching name
1079  is returned, the function ignores the value of in_spec, and will
1080  only start processing a new spec with the following call.
1081  If an error occurs, on return out_spec contains the value
1082  of in_spec when the error occurred.
1083
1084  With each successive file name returned in out_spec, the
1085  function's return value is one. When there are no more matching
1086  names the function returns zero. If on the first call no file
1087  matches in_spec, or there is any other error, -1 is returned.
1088 */
1089
1090 #include        <rmsdef.h>
1091 #include        <descrip.h>
1092 #define         OUTSIZE MAX_FILE_SPEC_LEN
1093 static short
1094 fn_exp (out, in)
1095      vspec *out;
1096      char *in;
1097 {
1098   static long context = 0;
1099   static struct dsc$descriptor_s o;
1100   static struct dsc$descriptor_s i;
1101   static bool pass1 = TRUE;
1102   long status;
1103   short retval;
1104
1105   if (pass1)
1106     {
1107       pass1 = FALSE;
1108       o.dsc$a_pointer = (char *) out;
1109       o.dsc$w_length = (short)OUTSIZE;
1110       i.dsc$a_pointer = in;
1111       i.dsc$w_length = (short)strlen(in);
1112       i.dsc$b_dtype = DSC$K_DTYPE_T;
1113       i.dsc$b_class = DSC$K_CLASS_S;
1114       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1115       o.dsc$b_class = DSC$K_CLASS_VS;
1116     }
1117   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1118     {
1119       out->body[out->curlen] = EOS;
1120       return 1;
1121     }
1122   else if (status == RMS$_NMF)
1123     retval = 0;
1124   else
1125     {
1126       strcpy(out->body, in);
1127       retval = -1;
1128     }
1129   lib$find_file_end(&context);
1130   pass1 = TRUE;
1131   return retval;
1132 }
1133
1134 /*
1135   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1136   name of each file specified by the provided arg expanding wildcards.
1137 */
1138 static char *
1139 gfnames (arg, p_error)
1140      char *arg;
1141      bool *p_error;
1142 {
1143   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1144
1145   switch (fn_exp (&filename, arg))
1146     {
1147     case 1:
1148       *p_error = FALSE;
1149       return filename.body;
1150     case 0:
1151       *p_error = FALSE;
1152       return NULL;
1153     default:
1154       *p_error = TRUE;
1155       return filename.body;
1156     }
1157 }
1158
1159 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1160 system (cmd)
1161      char *cmd;
1162 {
1163   error ("%s", "system() function not implemented under VMS");
1164 }
1165 #endif
1166
1167 #define VERSION_DELIM   ';'
1168 char *massage_name (s)
1169      char *s;
1170 {
1171   char *start = s;
1172
1173   for ( ; *s; s++)
1174     if (*s == VERSION_DELIM)
1175       {
1176         *s = EOS;
1177         break;
1178       }
1179     else
1180       *s = lowcase (*s);
1181   return start;
1182 }
1183 #endif /* VMS */
1184
1185 \f
1186 int
1187 main (argc, argv)
1188      int argc;
1189      char *argv[];
1190 {
1191   int i;
1192   unsigned int nincluded_files;
1193   char **included_files;
1194   argument *argbuffer;
1195   int current_arg, file_count;
1196   linebuffer filename_lb;
1197   bool help_asked = FALSE;
1198 #ifdef VMS
1199   bool got_err;
1200 #endif
1201  char *optstring;
1202  int opt;
1203
1204
1205 #ifdef DOS_NT
1206   _fmode = O_BINARY;   /* all of files are treated as binary files */
1207 #endif /* DOS_NT */
1208
1209   progname = argv[0];
1210   nincluded_files = 0;
1211   included_files = xnew (argc, char *);
1212   current_arg = 0;
1213   file_count = 0;
1214
1215   /* Allocate enough no matter what happens.  Overkill, but each one
1216      is small. */
1217   argbuffer = xnew (argc, argument);
1218
1219   /*
1220    * If etags, always find typedefs and structure tags.  Why not?
1221    * Also default to find macro constants, enum constants and
1222    * global variables.
1223    */
1224   if (!CTAGS)
1225     {
1226       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1227       globals = TRUE;
1228     }
1229
1230   /* When the optstring begins with a '-' getopt_long does not rearrange the
1231      non-options arguments to be at the end, but leaves them alone. */
1232   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1233                       "ac:Cf:Il:o:r:RSVhH",
1234                       (CTAGS) ? "BxdtTuvw" : "Di:");
1235
1236   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1237     switch (opt)
1238       {
1239       case 0:
1240         /* If getopt returns 0, then it has already processed a
1241            long-named option.  We should do nothing.  */
1242         break;
1243
1244       case 1:
1245         /* This means that a file name has been seen.  Record it. */
1246         argbuffer[current_arg].arg_type = at_filename;
1247         argbuffer[current_arg].what     = optarg;
1248         ++current_arg;
1249         ++file_count;
1250         break;
1251
1252       case STDIN:
1253         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1254         argbuffer[current_arg].arg_type = at_stdin;
1255         argbuffer[current_arg].what     = optarg;
1256         ++current_arg;
1257         ++file_count;
1258         if (parsing_stdin)
1259           fatal ("cannot parse standard input more than once", (char *)NULL);
1260         parsing_stdin = TRUE;
1261         break;
1262
1263         /* Common options. */
1264       case 'a': append_to_tagfile = TRUE;       break;
1265       case 'C': cplusplus = TRUE;               break;
1266       case 'f':         /* for compatibility with old makefiles */
1267       case 'o':
1268         if (tagfile)
1269           {
1270             error ("-o option may only be given once.", (char *)NULL);
1271             suggest_asking_for_help ();
1272             /* NOTREACHED */
1273           }
1274         tagfile = optarg;
1275         break;
1276       case 'I':
1277       case 'S':         /* for backward compatibility */
1278         ignoreindent = TRUE;
1279         break;
1280       case 'l':
1281         {
1282           language *lang = get_language_from_langname (optarg);
1283           if (lang != NULL)
1284             {
1285               argbuffer[current_arg].lang = lang;
1286               argbuffer[current_arg].arg_type = at_language;
1287               ++current_arg;
1288             }
1289         }
1290         break;
1291       case 'c':
1292         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1293         optarg = concat (optarg, "i", ""); /* memory leak here */
1294         /* FALLTHRU */
1295       case 'r':
1296         argbuffer[current_arg].arg_type = at_regexp;
1297         argbuffer[current_arg].what = optarg;
1298         ++current_arg;
1299         break;
1300       case 'R':
1301         argbuffer[current_arg].arg_type = at_regexp;
1302         argbuffer[current_arg].what = NULL;
1303         ++current_arg;
1304         break;
1305       case 'V':
1306         print_version ();
1307         break;
1308       case 'h':
1309       case 'H':
1310         help_asked = TRUE;
1311         break;
1312
1313         /* Etags options */
1314       case 'D': constantypedefs = FALSE;                        break;
1315       case 'i': included_files[nincluded_files++] = optarg;     break;
1316
1317         /* Ctags options. */
1318       case 'B': searchar = '?';                                 break;
1319       case 'd': constantypedefs = TRUE;                         break;
1320       case 't': typedefs = TRUE;                                break;
1321       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1322       case 'u': update = TRUE;                                  break;
1323       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1324       case 'x': cxref_style = TRUE;                             break;
1325       case 'w': no_warnings = TRUE;                             break;
1326       default:
1327         suggest_asking_for_help ();
1328         /* NOTREACHED */
1329       }
1330
1331   /* No more options.  Store the rest of arguments. */
1332   for (; optind < argc; optind++)
1333     {
1334       argbuffer[current_arg].arg_type = at_filename;
1335       argbuffer[current_arg].what = argv[optind];
1336       ++current_arg;
1337       ++file_count;
1338     }
1339
1340   argbuffer[current_arg].arg_type = at_end;
1341
1342   if (help_asked)
1343     print_help (argbuffer);
1344     /* NOTREACHED */
1345
1346   if (nincluded_files == 0 && file_count == 0)
1347     {
1348       error ("no input files specified.", (char *)NULL);
1349       suggest_asking_for_help ();
1350       /* NOTREACHED */
1351     }
1352
1353   if (tagfile == NULL)
1354     tagfile = CTAGS ? "tags" : "TAGS";
1355   cwd = etags_getcwd ();        /* the current working directory */
1356   if (cwd[strlen (cwd) - 1] != '/')
1357     {
1358       char *oldcwd = cwd;
1359       cwd = concat (oldcwd, "/", "");
1360       free (oldcwd);
1361     }
1362   /* Relative file names are made relative to the current directory. */
1363   if (streq (tagfile, "-")
1364       || strneq (tagfile, "/dev/", 5))
1365     tagfiledir = cwd;
1366   else
1367     tagfiledir = absolute_dirname (tagfile, cwd);
1368
1369   init ();                      /* set up boolean "functions" */
1370
1371   linebuffer_init (&lb);
1372   linebuffer_init (&filename_lb);
1373   linebuffer_init (&filebuf);
1374   linebuffer_init (&token_name);
1375
1376   if (!CTAGS)
1377     {
1378       if (streq (tagfile, "-"))
1379         {
1380           tagf = stdout;
1381 #ifdef DOS_NT
1382           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1383              doesn't take effect until after `stdout' is already open). */
1384           if (!isatty (fileno (stdout)))
1385             setmode (fileno (stdout), O_BINARY);
1386 #endif /* DOS_NT */
1387         }
1388       else
1389         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1390       if (tagf == NULL)
1391         pfatal (tagfile);
1392     }
1393
1394   /*
1395    * Loop through files finding functions.
1396    */
1397   for (i = 0; i < current_arg; i++)
1398     {
1399       static language *lang;    /* non-NULL if language is forced */
1400       char *this_file;
1401
1402       switch (argbuffer[i].arg_type)
1403         {
1404         case at_language:
1405           lang = argbuffer[i].lang;
1406           break;
1407         case at_regexp:
1408           analyse_regex (argbuffer[i].what);
1409           break;
1410         case at_filename:
1411 #ifdef VMS
1412           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1413             {
1414               if (got_err)
1415                 {
1416                   error ("can't find file %s\n", this_file);
1417                   argc--, argv++;
1418                 }
1419               else
1420                 {
1421                   this_file = massage_name (this_file);
1422                 }
1423 #else
1424               this_file = argbuffer[i].what;
1425 #endif
1426               /* Input file named "-" means read file names from stdin
1427                  (one per line) and use them. */
1428               if (streq (this_file, "-"))
1429                 {
1430                   if (parsing_stdin)
1431                     fatal ("cannot parse standard input AND read file names from it",
1432                            (char *)NULL);
1433                   while (readline_internal (&filename_lb, stdin) > 0)
1434                     process_file_name (filename_lb.buffer, lang);
1435                 }
1436               else
1437                 process_file_name (this_file, lang);
1438 #ifdef VMS
1439             }
1440 #endif
1441           break;
1442         case at_stdin:
1443           this_file = argbuffer[i].what;
1444           process_file (stdin, this_file, lang);
1445           break;
1446         }
1447     }
1448
1449   free_regexps ();
1450   free (lb.buffer);
1451   free (filebuf.buffer);
1452   free (token_name.buffer);
1453
1454   if (!CTAGS || cxref_style)
1455     {
1456       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1457       put_entries (nodehead);
1458       free_tree (nodehead);
1459       nodehead = NULL;
1460       if (!CTAGS)
1461         {
1462           fdesc *fdp;
1463
1464           /* Output file entries that have no tags. */
1465           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1466             if (!fdp->written)
1467               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1468
1469           while (nincluded_files-- > 0)
1470             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1471
1472           if (fclose (tagf) == EOF)
1473             pfatal (tagfile);
1474         }
1475
1476       exit (EXIT_SUCCESS);
1477     }
1478
1479   /* From here on, we are in (CTAGS && !cxref_style) */
1480   if (update)
1481     {
1482       char cmd[BUFSIZ];
1483       for (i = 0; i < current_arg; ++i)
1484         {
1485           switch (argbuffer[i].arg_type)
1486             {
1487             case at_filename:
1488             case at_stdin:
1489               break;
1490             default:
1491               continue;         /* the for loop */
1492             }
1493           sprintf (cmd,
1494                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1495                    tagfile, argbuffer[i].what, tagfile);
1496           if (system (cmd) != EXIT_SUCCESS)
1497             fatal ("failed to execute shell command", (char *)NULL);
1498         }
1499       append_to_tagfile = TRUE;
1500     }
1501
1502   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1503   if (tagf == NULL)
1504     pfatal (tagfile);
1505   put_entries (nodehead);       /* write all the tags (CTAGS) */
1506   free_tree (nodehead);
1507   nodehead = NULL;
1508   if (fclose (tagf) == EOF)
1509     pfatal (tagfile);
1510
1511   if (CTAGS)
1512     if (append_to_tagfile || update)
1513       {
1514         char cmd[2*BUFSIZ+20];
1515         /* Maybe these should be used:
1516            setenv ("LC_COLLATE", "C", 1);
1517            setenv ("LC_ALL", "C", 1); */
1518         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1519         exit (system (cmd));
1520       }
1521   return EXIT_SUCCESS;
1522 }
1523
1524
1525 /*
1526  * Return a compressor given the file name.  If EXTPTR is non-zero,
1527  * return a pointer into FILE where the compressor-specific
1528  * extension begins.  If no compressor is found, NULL is returned
1529  * and EXTPTR is not significant.
1530  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1531  */
1532 static compressor *
1533 get_compressor_from_suffix (file, extptr)
1534      char *file;
1535      char **extptr;
1536 {
1537   compressor *compr;
1538   char *slash, *suffix;
1539
1540   /* This relies on FN to be after canonicalize_filename,
1541      so we don't need to consider backslashes on DOS_NT.  */
1542   slash = etags_strrchr (file, '/');
1543   suffix = etags_strrchr (file, '.');
1544   if (suffix == NULL || suffix < slash)
1545     return NULL;
1546   if (extptr != NULL)
1547     *extptr = suffix;
1548   suffix += 1;
1549   /* Let those poor souls who live with DOS 8+3 file name limits get
1550      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1551      Only the first do loop is run if not MSDOS */
1552   do
1553     {
1554       for (compr = compressors; compr->suffix != NULL; compr++)
1555         if (streq (compr->suffix, suffix))
1556           return compr;
1557       if (!MSDOS)
1558         break;                  /* do it only once: not really a loop */
1559       if (extptr != NULL)
1560         *extptr = ++suffix;
1561     } while (*suffix != '\0');
1562   return NULL;
1563 }
1564
1565
1566
1567 /*
1568  * Return a language given the name.
1569  */
1570 static language *
1571 get_language_from_langname (name)
1572      const char *name;
1573 {
1574   language *lang;
1575
1576   if (name == NULL)
1577     error ("empty language name", (char *)NULL);
1578   else
1579     {
1580       for (lang = lang_names; lang->name != NULL; lang++)
1581         if (streq (name, lang->name))
1582           return lang;
1583       error ("unknown language \"%s\"", name);
1584     }
1585
1586   return NULL;
1587 }
1588
1589
1590 /*
1591  * Return a language given the interpreter name.
1592  */
1593 static language *
1594 get_language_from_interpreter (interpreter)
1595      char *interpreter;
1596 {
1597   language *lang;
1598   char **iname;
1599
1600   if (interpreter == NULL)
1601     return NULL;
1602   for (lang = lang_names; lang->name != NULL; lang++)
1603     if (lang->interpreters != NULL)
1604       for (iname = lang->interpreters; *iname != NULL; iname++)
1605         if (streq (*iname, interpreter))
1606             return lang;
1607
1608   return NULL;
1609 }
1610
1611
1612
1613 /*
1614  * Return a language given the file name.
1615  */
1616 static language *
1617 get_language_from_filename (file, case_sensitive)
1618      char *file;
1619      bool case_sensitive;
1620 {
1621   language *lang;
1622   char **name, **ext, *suffix;
1623
1624   /* Try whole file name first. */
1625   for (lang = lang_names; lang->name != NULL; lang++)
1626     if (lang->filenames != NULL)
1627       for (name = lang->filenames; *name != NULL; name++)
1628         if ((case_sensitive)
1629             ? streq (*name, file)
1630             : strcaseeq (*name, file))
1631           return lang;
1632
1633   /* If not found, try suffix after last dot. */
1634   suffix = etags_strrchr (file, '.');
1635   if (suffix == NULL)
1636     return NULL;
1637   suffix += 1;
1638   for (lang = lang_names; lang->name != NULL; lang++)
1639     if (lang->suffixes != NULL)
1640       for (ext = lang->suffixes; *ext != NULL; ext++)
1641         if ((case_sensitive)
1642             ? streq (*ext, suffix)
1643             : strcaseeq (*ext, suffix))
1644           return lang;
1645   return NULL;
1646 }
1647
1648 \f
1649 /*
1650  * This routine is called on each file argument.
1651  */
1652 static void
1653 process_file_name (file, lang)
1654      char *file;
1655      language *lang;
1656 {
1657   struct stat stat_buf;
1658   FILE *inf;
1659   fdesc *fdp;
1660   compressor *compr;
1661   char *compressed_name, *uncompressed_name;
1662   char *ext, *real_name;
1663   int retval;
1664
1665   canonicalize_filename (file);
1666   if (streq (file, tagfile) && !streq (tagfile, "-"))
1667     {
1668       error ("skipping inclusion of %s in self.", file);
1669       return;
1670     }
1671   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1672     {
1673       compressed_name = NULL;
1674       real_name = uncompressed_name = savestr (file);
1675     }
1676   else
1677     {
1678       real_name = compressed_name = savestr (file);
1679       uncompressed_name = savenstr (file, ext - file);
1680     }
1681
1682   /* If the canonicalized uncompressed name
1683      has already been dealt with, skip it silently. */
1684   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1685     {
1686       assert (fdp->infname != NULL);
1687       if (streq (uncompressed_name, fdp->infname))
1688         goto cleanup;
1689     }
1690
1691   if (stat (real_name, &stat_buf) != 0)
1692     {
1693       /* Reset real_name and try with a different name. */
1694       real_name = NULL;
1695       if (compressed_name != NULL) /* try with the given suffix */
1696         {
1697           if (stat (uncompressed_name, &stat_buf) == 0)
1698             real_name = uncompressed_name;
1699         }
1700       else                      /* try all possible suffixes */
1701         {
1702           for (compr = compressors; compr->suffix != NULL; compr++)
1703             {
1704               compressed_name = concat (file, ".", compr->suffix);
1705               if (stat (compressed_name, &stat_buf) != 0)
1706                 {
1707                   if (MSDOS)
1708                     {
1709                       char *suf = compressed_name + strlen (file);
1710                       size_t suflen = strlen (compr->suffix) + 1;
1711                       for ( ; suf[1]; suf++, suflen--)
1712                         {
1713                           memmove (suf, suf + 1, suflen);
1714                           if (stat (compressed_name, &stat_buf) == 0)
1715                             {
1716                               real_name = compressed_name;
1717                               break;
1718                             }
1719                         }
1720                       if (real_name != NULL)
1721                         break;
1722                     } /* MSDOS */
1723                   free (compressed_name);
1724                   compressed_name = NULL;
1725                 }
1726               else
1727                 {
1728                   real_name = compressed_name;
1729                   break;
1730                 }
1731             }
1732         }
1733       if (real_name == NULL)
1734         {
1735           perror (file);
1736           goto cleanup;
1737         }
1738     } /* try with a different name */
1739
1740   if (!S_ISREG (stat_buf.st_mode))
1741     {
1742       error ("skipping %s: it is not a regular file.", real_name);
1743       goto cleanup;
1744     }
1745   if (real_name == compressed_name)
1746     {
1747       char *cmd = concat (compr->command, " ", real_name);
1748       inf = (FILE *) popen (cmd, "r");
1749       free (cmd);
1750     }
1751   else
1752     inf = fopen (real_name, "r");
1753   if (inf == NULL)
1754     {
1755       perror (real_name);
1756       goto cleanup;
1757     }
1758
1759   process_file (inf, uncompressed_name, lang);
1760
1761   if (real_name == compressed_name)
1762     retval = pclose (inf);
1763   else
1764     retval = fclose (inf);
1765   if (retval < 0)
1766     pfatal (file);
1767
1768  cleanup:
1769   if (compressed_name) free (compressed_name);
1770   if (uncompressed_name) free (uncompressed_name);
1771   last_node = NULL;
1772   curfdp = NULL;
1773   return;
1774 }
1775
1776 static void
1777 process_file (fh, fn, lang)
1778      FILE *fh;
1779      char *fn;
1780      language *lang;
1781 {
1782   static const fdesc emptyfdesc;
1783   fdesc *fdp;
1784
1785   /* Create a new input file description entry. */
1786   fdp = xnew (1, fdesc);
1787   *fdp = emptyfdesc;
1788   fdp->next = fdhead;
1789   fdp->infname = savestr (fn);
1790   fdp->lang = lang;
1791   fdp->infabsname = absolute_filename (fn, cwd);
1792   fdp->infabsdir = absolute_dirname (fn, cwd);
1793   if (filename_is_absolute (fn))
1794     {
1795       /* An absolute file name.  Canonicalize it. */
1796       fdp->taggedfname = absolute_filename (fn, NULL);
1797     }
1798   else
1799     {
1800       /* A file name relative to cwd.  Make it relative
1801          to the directory of the tags file. */
1802       fdp->taggedfname = relative_filename (fn, tagfiledir);
1803     }
1804   fdp->usecharno = TRUE;        /* use char position when making tags */
1805   fdp->prop = NULL;
1806   fdp->written = FALSE;         /* not written on tags file yet */
1807
1808   fdhead = fdp;
1809   curfdp = fdhead;              /* the current file description */
1810
1811   find_entries (fh);
1812
1813   /* If not Ctags, and if this is not metasource and if it contained no #line
1814      directives, we can write the tags and free all nodes pointing to
1815      curfdp. */
1816   if (!CTAGS
1817       && curfdp->usecharno      /* no #line directives in this file */
1818       && !curfdp->lang->metasource)
1819     {
1820       node *np, *prev;
1821
1822       /* Look for the head of the sublist relative to this file.  See add_node
1823          for the structure of the node tree. */
1824       prev = NULL;
1825       for (np = nodehead; np != NULL; prev = np, np = np->left)
1826         if (np->fdp == curfdp)
1827           break;
1828
1829       /* If we generated tags for this file, write and delete them. */
1830       if (np != NULL)
1831         {
1832           /* This is the head of the last sublist, if any.  The following
1833              instructions depend on this being true. */
1834           assert (np->left == NULL);
1835
1836           assert (fdhead == curfdp);
1837           assert (last_node->fdp == curfdp);
1838           put_entries (np);     /* write tags for file curfdp->taggedfname */
1839           free_tree (np);       /* remove the written nodes */
1840           if (prev == NULL)
1841             nodehead = NULL;    /* no nodes left */
1842           else
1843             prev->left = NULL;  /* delete the pointer to the sublist */
1844         }
1845     }
1846 }
1847
1848 /*
1849  * This routine sets up the boolean pseudo-functions which work
1850  * by setting boolean flags dependent upon the corresponding character.
1851  * Every char which is NOT in that string is not a white char.  Therefore,
1852  * all of the array "_wht" is set to FALSE, and then the elements
1853  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1854  * of a char is TRUE if it is the string "white", else FALSE.
1855  */
1856 static void
1857 init ()
1858 {
1859   register char *sp;
1860   register int i;
1861
1862   for (i = 0; i < CHARS; i++)
1863     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1864   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1865   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1866   notinname('\0') = notinname('\n');
1867   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1868   begtoken('\0') = begtoken('\n');
1869   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1870   intoken('\0') = intoken('\n');
1871   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1872   endtoken('\0') = endtoken('\n');
1873 }
1874
1875 /*
1876  * This routine opens the specified file and calls the function
1877  * which finds the function and type definitions.
1878  */
1879 static void
1880 find_entries (inf)
1881      FILE *inf;
1882 {
1883   char *cp;
1884   language *lang = curfdp->lang;
1885   Lang_function *parser = NULL;
1886
1887   /* If user specified a language, use it. */
1888   if (lang != NULL && lang->function != NULL)
1889     {
1890       parser = lang->function;
1891     }
1892
1893   /* Else try to guess the language given the file name. */
1894   if (parser == NULL)
1895     {
1896       lang = get_language_from_filename (curfdp->infname, TRUE);
1897       if (lang != NULL && lang->function != NULL)
1898         {
1899           curfdp->lang = lang;
1900           parser = lang->function;
1901         }
1902     }
1903
1904   /* Else look for sharp-bang as the first two characters. */
1905   if (parser == NULL
1906       && readline_internal (&lb, inf) > 0
1907       && lb.len >= 2
1908       && lb.buffer[0] == '#'
1909       && lb.buffer[1] == '!')
1910     {
1911       char *lp;
1912
1913       /* Set lp to point at the first char after the last slash in the
1914          line or, if no slashes, at the first nonblank.  Then set cp to
1915          the first successive blank and terminate the string. */
1916       lp = etags_strrchr (lb.buffer+2, '/');
1917       if (lp != NULL)
1918         lp += 1;
1919       else
1920         lp = skip_spaces (lb.buffer + 2);
1921       cp = skip_non_spaces (lp);
1922       *cp = '\0';
1923
1924       if (strlen (lp) > 0)
1925         {
1926           lang = get_language_from_interpreter (lp);
1927           if (lang != NULL && lang->function != NULL)
1928             {
1929               curfdp->lang = lang;
1930               parser = lang->function;
1931             }
1932         }
1933     }
1934
1935   /* We rewind here, even if inf may be a pipe.  We fail if the
1936      length of the first line is longer than the pipe block size,
1937      which is unlikely. */
1938   rewind (inf);
1939
1940   /* Else try to guess the language given the case insensitive file name. */
1941   if (parser == NULL)
1942     {
1943       lang = get_language_from_filename (curfdp->infname, FALSE);
1944       if (lang != NULL && lang->function != NULL)
1945         {
1946           curfdp->lang = lang;
1947           parser = lang->function;
1948         }
1949     }
1950
1951   /* Else try Fortran or C. */
1952   if (parser == NULL)
1953     {
1954       node *old_last_node = last_node;
1955
1956       curfdp->lang = get_language_from_langname ("fortran");
1957       find_entries (inf);
1958
1959       if (old_last_node == last_node)
1960         /* No Fortran entries found.  Try C. */
1961         {
1962           /* We do not tag if rewind fails.
1963              Only the file name will be recorded in the tags file. */
1964           rewind (inf);
1965           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1966           find_entries (inf);
1967         }
1968       return;
1969     }
1970
1971   if (!no_line_directive
1972       && curfdp->lang != NULL && curfdp->lang->metasource)
1973     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1974        file, or anyway we parsed a file that is automatically generated from
1975        this one.  If this is the case, the bingo.c file contained #line
1976        directives that generated tags pointing to this file.  Let's delete
1977        them all before parsing this file, which is the real source. */
1978     {
1979       fdesc **fdpp = &fdhead;
1980       while (*fdpp != NULL)
1981         if (*fdpp != curfdp
1982             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1983           /* We found one of those!  We must delete both the file description
1984              and all tags referring to it. */
1985           {
1986             fdesc *badfdp = *fdpp;
1987
1988             /* Delete the tags referring to badfdp->taggedfname
1989                that were obtained from badfdp->infname. */
1990             invalidate_nodes (badfdp, &nodehead);
1991
1992             *fdpp = badfdp->next; /* remove the bad description from the list */
1993             free_fdesc (badfdp);
1994           }
1995         else
1996           fdpp = &(*fdpp)->next; /* advance the list pointer */
1997     }
1998
1999   assert (parser != NULL);
2000
2001   /* Generic initialisations before reading from file. */
2002   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2003
2004   /* Generic initialisations before parsing file with readline. */
2005   lineno = 0;                  /* reset global line number */
2006   charno = 0;                  /* reset global char number */
2007   linecharno = 0;              /* reset global char number of line start */
2008
2009   parser (inf);
2010
2011   regex_tag_multiline ();
2012 }
2013
2014 \f
2015 /*
2016  * Check whether an implicitly named tag should be created,
2017  * then call `pfnote'.
2018  * NAME is a string that is internally copied by this function.
2019  *
2020  * TAGS format specification
2021  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2022  * The following is explained in some more detail in etc/ETAGS.EBNF.
2023  *
2024  * make_tag creates tags with "implicit tag names" (unnamed tags)
2025  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2026  *  1. NAME does not contain any of the characters in NONAM;
2027  *  2. LINESTART contains name as either a rightmost, or rightmost but
2028  *     one character, substring;
2029  *  3. the character, if any, immediately before NAME in LINESTART must
2030  *     be a character in NONAM;
2031  *  4. the character, if any, immediately after NAME in LINESTART must
2032  *     also be a character in NONAM.
2033  *
2034  * The implementation uses the notinname() macro, which recognises the
2035  * characters stored in the string `nonam'.
2036  * etags.el needs to use the same characters that are in NONAM.
2037  */
2038 static void
2039 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2040      char *name;                /* tag name, or NULL if unnamed */
2041      int namelen;               /* tag length */
2042      bool is_func;              /* tag is a function */
2043      char *linestart;           /* start of the line where tag is */
2044      int linelen;               /* length of the line where tag is */
2045      int lno;                   /* line number */
2046      long cno;                  /* character number */
2047 {
2048   bool named = (name != NULL && namelen > 0);
2049
2050   if (!CTAGS && named)          /* maybe set named to false */
2051     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2052        such that etags.el can guess a name from it. */
2053     {
2054       int i;
2055       register char *cp = name;
2056
2057       for (i = 0; i < namelen; i++)
2058         if (notinname (*cp++))
2059           break;
2060       if (i == namelen)                         /* rule #1 */
2061         {
2062           cp = linestart + linelen - namelen;
2063           if (notinname (linestart[linelen-1]))
2064             cp -= 1;                            /* rule #4 */
2065           if (cp >= linestart                   /* rule #2 */
2066               && (cp == linestart
2067                   || notinname (cp[-1]))        /* rule #3 */
2068               && strneq (name, cp, namelen))    /* rule #2 */
2069             named = FALSE;      /* use implicit tag name */
2070         }
2071     }
2072
2073   if (named)
2074     name = savenstr (name, namelen);
2075   else
2076     name = NULL;
2077   pfnote (name, is_func, linestart, linelen, lno, cno);
2078 }
2079
2080 /* Record a tag. */
2081 static void
2082 pfnote (name, is_func, linestart, linelen, lno, cno)
2083      char *name;                /* tag name, or NULL if unnamed */
2084      bool is_func;              /* tag is a function */
2085      char *linestart;           /* start of the line where tag is */
2086      int linelen;               /* length of the line where tag is */
2087      int lno;                   /* line number */
2088      long cno;                  /* character number */
2089 {
2090   register node *np;
2091
2092   assert (name == NULL || name[0] != '\0');
2093   if (CTAGS && name == NULL)
2094     return;
2095
2096   np = xnew (1, node);
2097
2098   /* If ctags mode, change name "main" to M<thisfilename>. */
2099   if (CTAGS && !cxref_style && streq (name, "main"))
2100     {
2101       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2102       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2103       fp = etags_strrchr (np->name, '.');
2104       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2105         fp[0] = '\0';
2106     }
2107   else
2108     np->name = name;
2109   np->valid = TRUE;
2110   np->been_warned = FALSE;
2111   np->fdp = curfdp;
2112   np->is_func = is_func;
2113   np->lno = lno;
2114   if (np->fdp->usecharno)
2115     /* Our char numbers are 0-base, because of C language tradition?
2116        ctags compatibility?  old versions compatibility?   I don't know.
2117        Anyway, since emacs's are 1-base we expect etags.el to take care
2118        of the difference.  If we wanted to have 1-based numbers, we would
2119        uncomment the +1 below. */
2120     np->cno = cno /* + 1 */ ;
2121   else
2122     np->cno = invalidcharno;
2123   np->left = np->right = NULL;
2124   if (CTAGS && !cxref_style)
2125     {
2126       if (strlen (linestart) < 50)
2127         np->regex = concat (linestart, "$", "");
2128       else
2129         np->regex = savenstr (linestart, 50);
2130     }
2131   else
2132     np->regex = savenstr (linestart, linelen);
2133
2134   add_node (np, &nodehead);
2135 }
2136
2137 /*
2138  * free_tree ()
2139  *      recurse on left children, iterate on right children.
2140  */
2141 static void
2142 free_tree (np)
2143      register node *np;
2144 {
2145   while (np)
2146     {
2147       register node *node_right = np->right;
2148       free_tree (np->left);
2149       if (np->name != NULL)
2150         free (np->name);
2151       free (np->regex);
2152       free (np);
2153       np = node_right;
2154     }
2155 }
2156
2157 /*
2158  * free_fdesc ()
2159  *      delete a file description
2160  */
2161 static void
2162 free_fdesc (fdp)
2163      register fdesc *fdp;
2164 {
2165   if (fdp->infname != NULL) free (fdp->infname);
2166   if (fdp->infabsname != NULL) free (fdp->infabsname);
2167   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2168   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2169   if (fdp->prop != NULL) free (fdp->prop);
2170   free (fdp);
2171 }
2172
2173 /*
2174  * add_node ()
2175  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2176  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2177  *      balancing.
2178  *
2179  *      add_node is the only function allowed to add nodes, so it can
2180  *      maintain state.
2181  */
2182 static void
2183 add_node (np, cur_node_p)
2184      node *np, **cur_node_p;
2185 {
2186   register int dif;
2187   register node *cur_node = *cur_node_p;
2188
2189   if (cur_node == NULL)
2190     {
2191       *cur_node_p = np;
2192       last_node = np;
2193       return;
2194     }
2195
2196   if (!CTAGS)
2197     /* Etags Mode */
2198     {
2199       /* For each file name, tags are in a linked sublist on the right
2200          pointer.  The first tags of different files are a linked list
2201          on the left pointer.  last_node points to the end of the last
2202          used sublist. */
2203       if (last_node != NULL && last_node->fdp == np->fdp)
2204         {
2205           /* Let's use the same sublist as the last added node. */
2206           assert (last_node->right == NULL);
2207           last_node->right = np;
2208           last_node = np;
2209         }
2210       else if (cur_node->fdp == np->fdp)
2211         {
2212           /* Scanning the list we found the head of a sublist which is
2213              good for us.  Let's scan this sublist. */
2214           add_node (np, &cur_node->right);
2215         }
2216       else
2217         /* The head of this sublist is not good for us.  Let's try the
2218            next one. */
2219         add_node (np, &cur_node->left);
2220     } /* if ETAGS mode */
2221
2222   else
2223     {
2224       /* Ctags Mode */
2225       dif = strcmp (np->name, cur_node->name);
2226
2227       /*
2228        * If this tag name matches an existing one, then
2229        * do not add the node, but maybe print a warning.
2230        */
2231       if (no_duplicates && !dif)
2232         {
2233           if (np->fdp == cur_node->fdp)
2234             {
2235               if (!no_warnings)
2236                 {
2237                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2238                            np->fdp->infname, lineno, np->name);
2239                   fprintf (stderr, "Second entry ignored\n");
2240                 }
2241             }
2242           else if (!cur_node->been_warned && !no_warnings)
2243             {
2244               fprintf
2245                 (stderr,
2246                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2247                  np->fdp->infname, cur_node->fdp->infname, np->name);
2248               cur_node->been_warned = TRUE;
2249             }
2250           return;
2251         }
2252
2253       /* Actually add the node */
2254       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2255     } /* if CTAGS mode */
2256 }
2257
2258 /*
2259  * invalidate_nodes ()
2260  *      Scan the node tree and invalidate all nodes pointing to the
2261  *      given file description (CTAGS case) or free them (ETAGS case).
2262  */
2263 static void
2264 invalidate_nodes (badfdp, npp)
2265      fdesc *badfdp;
2266      node **npp;
2267 {
2268   node *np = *npp;
2269
2270   if (np == NULL)
2271     return;
2272
2273   if (CTAGS)
2274     {
2275       if (np->left != NULL)
2276         invalidate_nodes (badfdp, &np->left);
2277       if (np->fdp == badfdp)
2278         np->valid = FALSE;
2279       if (np->right != NULL)
2280         invalidate_nodes (badfdp, &np->right);
2281     }
2282   else
2283     {
2284       assert (np->fdp != NULL);
2285       if (np->fdp == badfdp)
2286         {
2287           *npp = np->left;      /* detach the sublist from the list */
2288           np->left = NULL;      /* isolate it */
2289           free_tree (np);       /* free it */
2290           invalidate_nodes (badfdp, npp);
2291         }
2292       else
2293         invalidate_nodes (badfdp, &np->left);
2294     }
2295 }
2296
2297 \f
2298 static int total_size_of_entries __P((node *));
2299 static int number_len __P((long));
2300
2301 /* Length of a non-negative number's decimal representation. */
2302 static int
2303 number_len (num)
2304      long num;
2305 {
2306   int len = 1;
2307   while ((num /= 10) > 0)
2308     len += 1;
2309   return len;
2310 }
2311
2312 /*
2313  * Return total number of characters that put_entries will output for
2314  * the nodes in the linked list at the right of the specified node.
2315  * This count is irrelevant with etags.el since emacs 19.34 at least,
2316  * but is still supplied for backward compatibility.
2317  */
2318 static int
2319 total_size_of_entries (np)
2320      register node *np;
2321 {
2322   register int total = 0;
2323
2324   for (; np != NULL; np = np->right)
2325     if (np->valid)
2326       {
2327         total += strlen (np->regex) + 1;                /* pat\177 */
2328         if (np->name != NULL)
2329           total += strlen (np->name) + 1;               /* name\001 */
2330         total += number_len ((long) np->lno) + 1;       /* lno, */
2331         if (np->cno != invalidcharno)                   /* cno */
2332           total += number_len (np->cno);
2333         total += 1;                                     /* newline */
2334       }
2335
2336   return total;
2337 }
2338
2339 static void
2340 put_entries (np)
2341      register node *np;
2342 {
2343   register char *sp;
2344   static fdesc *fdp = NULL;
2345
2346   if (np == NULL)
2347     return;
2348
2349   /* Output subentries that precede this one */
2350   if (CTAGS)
2351     put_entries (np->left);
2352
2353   /* Output this entry */
2354   if (np->valid)
2355     {
2356       if (!CTAGS)
2357         {
2358           /* Etags mode */
2359           if (fdp != np->fdp)
2360             {
2361               fdp = np->fdp;
2362               fprintf (tagf, "\f\n%s,%d\n",
2363                        fdp->taggedfname, total_size_of_entries (np));
2364               fdp->written = TRUE;
2365             }
2366           fputs (np->regex, tagf);
2367           fputc ('\177', tagf);
2368           if (np->name != NULL)
2369             {
2370               fputs (np->name, tagf);
2371               fputc ('\001', tagf);
2372             }
2373           fprintf (tagf, "%d,", np->lno);
2374           if (np->cno != invalidcharno)
2375             fprintf (tagf, "%ld", np->cno);
2376           fputs ("\n", tagf);
2377         }
2378       else
2379         {
2380           /* Ctags mode */
2381           if (np->name == NULL)
2382             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2383
2384           if (cxref_style)
2385             {
2386               if (vgrind_style)
2387                 fprintf (stdout, "%s %s %d\n",
2388                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2389               else
2390                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2391                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2392             }
2393           else
2394             {
2395               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2396
2397               if (np->is_func)
2398                 {               /* function or #define macro with args */
2399                   putc (searchar, tagf);
2400                   putc ('^', tagf);
2401
2402                   for (sp = np->regex; *sp; sp++)
2403                     {
2404                       if (*sp == '\\' || *sp == searchar)
2405                         putc ('\\', tagf);
2406                       putc (*sp, tagf);
2407                     }
2408                   putc (searchar, tagf);
2409                 }
2410               else
2411                 {               /* anything else; text pattern inadequate */
2412                   fprintf (tagf, "%d", np->lno);
2413                 }
2414               putc ('\n', tagf);
2415             }
2416         }
2417     } /* if this node contains a valid tag */
2418
2419   /* Output subentries that follow this one */
2420   put_entries (np->right);
2421   if (!CTAGS)
2422     put_entries (np->left);
2423 }
2424
2425 \f
2426 /* C extensions. */
2427 #define C_EXT   0x00fff         /* C extensions */
2428 #define C_PLAIN 0x00000         /* C */
2429 #define C_PLPL  0x00001         /* C++ */
2430 #define C_STAR  0x00003         /* C* */
2431 #define C_JAVA  0x00005         /* JAVA */
2432 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2433 #define YACC    0x10000         /* yacc file */
2434
2435 /*
2436  * The C symbol tables.
2437  */
2438 enum sym_type
2439 {
2440   st_none,
2441   st_C_objprot, st_C_objimpl, st_C_objend,
2442   st_C_gnumacro,
2443   st_C_ignore, st_C_attribute,
2444   st_C_javastruct,
2445   st_C_operator,
2446   st_C_class, st_C_template,
2447   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2448 };
2449
2450 static unsigned int hash __P((const char *, unsigned int));
2451 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2452 static enum sym_type C_symtype __P((char *, int, int));
2453
2454 /* Feed stuff between (but not including) %[ and %] lines to:
2455      gperf -m 5
2456 %[
2457 %compare-strncmp
2458 %enum
2459 %struct-type
2460 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2461 %%
2462 if,             0,                      st_C_ignore
2463 for,            0,                      st_C_ignore
2464 while,          0,                      st_C_ignore
2465 switch,         0,                      st_C_ignore
2466 return,         0,                      st_C_ignore
2467 __attribute__,  0,                      st_C_attribute
2468 @interface,     0,                      st_C_objprot
2469 @protocol,      0,                      st_C_objprot
2470 @implementation,0,                      st_C_objimpl
2471 @end,           0,                      st_C_objend
2472 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2473 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2474 friend,         C_PLPL,                 st_C_ignore
2475 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2476 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2477 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2478 class,          0,                      st_C_class
2479 namespace,      C_PLPL,                 st_C_struct
2480 domain,         C_STAR,                 st_C_struct
2481 union,          0,                      st_C_struct
2482 struct,         0,                      st_C_struct
2483 extern,         0,                      st_C_extern
2484 enum,           0,                      st_C_enum
2485 typedef,        0,                      st_C_typedef
2486 define,         0,                      st_C_define
2487 undef,          0,                      st_C_define
2488 operator,       C_PLPL,                 st_C_operator
2489 template,       0,                      st_C_template
2490 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2491 DEFUN,          0,                      st_C_gnumacro
2492 SYSCALL,        0,                      st_C_gnumacro
2493 ENTRY,          0,                      st_C_gnumacro
2494 PSEUDO,         0,                      st_C_gnumacro
2495 # These are defined inside C functions, so currently they are not met.
2496 # EXFUN used in glibc, DEFVAR_* in emacs.
2497 #EXFUN,         0,                      st_C_gnumacro
2498 #DEFVAR_,       0,                      st_C_gnumacro
2499 %]
2500 and replace lines between %< and %> with its output, then:
2501  - remove the #if characterset check
2502  - make in_word_set static and not inline. */
2503 /*%<*/
2504 /* C code produced by gperf version 3.0.1 */
2505 /* Command-line: gperf -m 5  */
2506 /* Computed positions: -k'2-3' */
2507
2508 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2509 /* maximum key range = 33, duplicates = 0 */
2510
2511 #ifdef __GNUC__
2512 __inline
2513 #else
2514 #ifdef __cplusplus
2515 inline
2516 #endif
2517 #endif
2518 static unsigned int
2519 hash (str, len)
2520      register const char *str;
2521      register unsigned int len;
2522 {
2523   static unsigned char asso_values[] =
2524     {
2525       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2526       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2527       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2528       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2529       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2530       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2531       35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2532       14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2533       35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2534       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2535       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2536        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2537        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2538       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2539       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2540       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2541       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2542       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2543       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2544       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2545       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2546       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2547       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2548       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2549       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2550       35, 35, 35, 35, 35, 35
2551     };
2552   register int hval = len;
2553
2554   switch (hval)
2555     {
2556       default:
2557         hval += asso_values[(unsigned char)str[2]];
2558       /*FALLTHROUGH*/
2559       case 2:
2560         hval += asso_values[(unsigned char)str[1]];
2561         break;
2562     }
2563   return hval;
2564 }
2565
2566 static struct C_stab_entry *
2567 in_word_set (str, len)
2568      register const char *str;
2569      register unsigned int len;
2570 {
2571   enum
2572     {
2573       TOTAL_KEYWORDS = 32,
2574       MIN_WORD_LENGTH = 2,
2575       MAX_WORD_LENGTH = 15,
2576       MIN_HASH_VALUE = 2,
2577       MAX_HASH_VALUE = 34
2578     };
2579
2580   static struct C_stab_entry wordlist[] =
2581     {
2582       {""}, {""},
2583       {"if",            0,                      st_C_ignore},
2584       {""},
2585       {"@end",          0,                      st_C_objend},
2586       {"union",         0,                      st_C_struct},
2587       {"define",                0,                      st_C_define},
2588       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2589       {"template",      0,                      st_C_template},
2590       {"operator",      C_PLPL,                 st_C_operator},
2591       {"@interface",    0,                      st_C_objprot},
2592       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2593       {"friend",                C_PLPL,                 st_C_ignore},
2594       {"typedef",       0,                      st_C_typedef},
2595       {"return",                0,                      st_C_ignore},
2596       {"@implementation",0,                     st_C_objimpl},
2597       {"@protocol",     0,                      st_C_objprot},
2598       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2599       {"extern",                0,                      st_C_extern},
2600       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2601       {"struct",                0,                      st_C_struct},
2602       {"domain",                C_STAR,                 st_C_struct},
2603       {"switch",                0,                      st_C_ignore},
2604       {"enum",          0,                      st_C_enum},
2605       {"for",           0,                      st_C_ignore},
2606       {"namespace",     C_PLPL,                 st_C_struct},
2607       {"class",         0,                      st_C_class},
2608       {"while",         0,                      st_C_ignore},
2609       {"undef",         0,                      st_C_define},
2610       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2611       {"__attribute__", 0,                      st_C_attribute},
2612       {"SYSCALL",       0,                      st_C_gnumacro},
2613       {"ENTRY",         0,                      st_C_gnumacro},
2614       {"PSEUDO",                0,                      st_C_gnumacro},
2615       {"DEFUN",         0,                      st_C_gnumacro}
2616     };
2617
2618   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2619     {
2620       register int key = hash (str, len);
2621
2622       if (key <= MAX_HASH_VALUE && key >= 0)
2623         {
2624           register const char *s = wordlist[key].name;
2625
2626           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2627             return &wordlist[key];
2628         }
2629     }
2630   return 0;
2631 }
2632 /*%>*/
2633
2634 static enum sym_type
2635 C_symtype (str, len, c_ext)
2636      char *str;
2637      int len;
2638      int c_ext;
2639 {
2640   register struct C_stab_entry *se = in_word_set (str, len);
2641
2642   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2643     return st_none;
2644   return se->type;
2645 }
2646
2647 \f
2648 /*
2649  * Ignoring __attribute__ ((list))
2650  */
2651 static bool inattribute;        /* looking at an __attribute__ construct */
2652
2653 /*
2654  * C functions and variables are recognized using a simple
2655  * finite automaton.  fvdef is its state variable.
2656  */
2657 static enum
2658 {
2659   fvnone,                       /* nothing seen */
2660   fdefunkey,                    /* Emacs DEFUN keyword seen */
2661   fdefunname,                   /* Emacs DEFUN name seen */
2662   foperator,                    /* func: operator keyword seen (cplpl) */
2663   fvnameseen,                   /* function or variable name seen */
2664   fstartlist,                   /* func: just after open parenthesis */
2665   finlist,                      /* func: in parameter list */
2666   flistseen,                    /* func: after parameter list */
2667   fignore,                      /* func: before open brace */
2668   vignore                       /* var-like: ignore until ';' */
2669 } fvdef;
2670
2671 static bool fvextern;           /* func or var: extern keyword seen; */
2672
2673 /*
2674  * typedefs are recognized using a simple finite automaton.
2675  * typdef is its state variable.
2676  */
2677 static enum
2678 {
2679   tnone,                        /* nothing seen */
2680   tkeyseen,                     /* typedef keyword seen */
2681   ttypeseen,                    /* defined type seen */
2682   tinbody,                      /* inside typedef body */
2683   tend,                         /* just before typedef tag */
2684   tignore                       /* junk after typedef tag */
2685 } typdef;
2686
2687 /*
2688  * struct-like structures (enum, struct and union) are recognized
2689  * using another simple finite automaton.  `structdef' is its state
2690  * variable.
2691  */
2692 static enum
2693 {
2694   snone,                        /* nothing seen yet,
2695                                    or in struct body if bracelev > 0 */
2696   skeyseen,                     /* struct-like keyword seen */
2697   stagseen,                     /* struct-like tag seen */
2698   scolonseen                    /* colon seen after struct-like tag */
2699 } structdef;
2700
2701 /*
2702  * When objdef is different from onone, objtag is the name of the class.
2703  */
2704 static char *objtag = "<uninited>";
2705
2706 /*
2707  * Yet another little state machine to deal with preprocessor lines.
2708  */
2709 static enum
2710 {
2711   dnone,                        /* nothing seen */
2712   dsharpseen,                   /* '#' seen as first char on line */
2713   ddefineseen,                  /* '#' and 'define' seen */
2714   dignorerest                   /* ignore rest of line */
2715 } definedef;
2716
2717 /*
2718  * State machine for Objective C protocols and implementations.
2719  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2720  */
2721 static enum
2722 {
2723   onone,                        /* nothing seen */
2724   oprotocol,                    /* @interface or @protocol seen */
2725   oimplementation,              /* @implementations seen */
2726   otagseen,                     /* class name seen */
2727   oparenseen,                   /* parenthesis before category seen */
2728   ocatseen,                     /* category name seen */
2729   oinbody,                      /* in @implementation body */
2730   omethodsign,                  /* in @implementation body, after +/- */
2731   omethodtag,                   /* after method name */
2732   omethodcolon,                 /* after method colon */
2733   omethodparm,                  /* after method parameter */
2734   oignore                       /* wait for @end */
2735 } objdef;
2736
2737
2738 /*
2739  * Use this structure to keep info about the token read, and how it
2740  * should be tagged.  Used by the make_C_tag function to build a tag.
2741  */
2742 static struct tok
2743 {
2744   char *line;                   /* string containing the token */
2745   int offset;                   /* where the token starts in LINE */
2746   int length;                   /* token length */
2747   /*
2748     The previous members can be used to pass strings around for generic
2749     purposes.  The following ones specifically refer to creating tags.  In this
2750     case the token contained here is the pattern that will be used to create a
2751     tag.
2752   */
2753   bool valid;                   /* do not create a tag; the token should be
2754                                    invalidated whenever a state machine is
2755                                    reset prematurely */
2756   bool named;                   /* create a named tag */
2757   int lineno;                   /* source line number of tag */
2758   long linepos;                 /* source char number of tag */
2759 } token;                        /* latest token read */
2760
2761 /*
2762  * Variables and functions for dealing with nested structures.
2763  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2764  */
2765 static void pushclass_above __P((int, char *, int));
2766 static void popclass_above __P((int));
2767 static void write_classname __P((linebuffer *, char *qualifier));
2768
2769 static struct {
2770   char **cname;                 /* nested class names */
2771   int *bracelev;                /* nested class brace level */
2772   int nl;                       /* class nesting level (elements used) */
2773   int size;                     /* length of the array */
2774 } cstack;                       /* stack for nested declaration tags */
2775 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2776 #define nestlev         (cstack.nl)
2777 /* After struct keyword or in struct body, not inside a nested function. */
2778 #define instruct        (structdef == snone && nestlev > 0                      \
2779                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2780
2781 static void
2782 pushclass_above (bracelev, str, len)
2783      int bracelev;
2784      char *str;
2785      int len;
2786 {
2787   int nl;
2788
2789   popclass_above (bracelev);
2790   nl = cstack.nl;
2791   if (nl >= cstack.size)
2792     {
2793       int size = cstack.size *= 2;
2794       xrnew (cstack.cname, size, char *);
2795       xrnew (cstack.bracelev, size, int);
2796     }
2797   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2798   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2799   cstack.bracelev[nl] = bracelev;
2800   cstack.nl = nl + 1;
2801 }
2802
2803 static void
2804 popclass_above (bracelev)
2805      int bracelev;
2806 {
2807   int nl;
2808
2809   for (nl = cstack.nl - 1;
2810        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2811        nl--)
2812     {
2813       if (cstack.cname[nl] != NULL)
2814         free (cstack.cname[nl]);
2815       cstack.nl = nl;
2816     }
2817 }
2818
2819 static void
2820 write_classname (cn, qualifier)
2821      linebuffer *cn;
2822      char *qualifier;
2823 {
2824   int i, len;
2825   int qlen = strlen (qualifier);
2826
2827   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2828     {
2829       len = 0;
2830       cn->len = 0;
2831       cn->buffer[0] = '\0';
2832     }
2833   else
2834     {
2835       len = strlen (cstack.cname[0]);
2836       linebuffer_setlen (cn, len);
2837       strcpy (cn->buffer, cstack.cname[0]);
2838     }
2839   for (i = 1; i < cstack.nl; i++)
2840     {
2841       char *s;
2842       int slen;
2843
2844       s = cstack.cname[i];
2845       if (s == NULL)
2846         continue;
2847       slen = strlen (s);
2848       len += slen + qlen;
2849       linebuffer_setlen (cn, len);
2850       strncat (cn->buffer, qualifier, qlen);
2851       strncat (cn->buffer, s, slen);
2852     }
2853 }
2854
2855 \f
2856 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2857 static void make_C_tag __P((bool));
2858
2859 /*
2860  * consider_token ()
2861  *      checks to see if the current token is at the start of a
2862  *      function or variable, or corresponds to a typedef, or
2863  *      is a struct/union/enum tag, or #define, or an enum constant.
2864  *
2865  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2866  *      with args.  C_EXTP points to which language we are looking at.
2867  *
2868  * Globals
2869  *      fvdef                   IN OUT
2870  *      structdef               IN OUT
2871  *      definedef               IN OUT
2872  *      typdef                  IN OUT
2873  *      objdef                  IN OUT
2874  */
2875
2876 static bool
2877 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2878      register char *str;        /* IN: token pointer */
2879      register int len;          /* IN: token length */
2880      register int c;            /* IN: first char after the token */
2881      int *c_extp;               /* IN, OUT: C extensions mask */
2882      int bracelev;              /* IN: brace level */
2883      int parlev;                /* IN: parenthesis level */
2884      bool *is_func_or_var;      /* OUT: function or variable found */
2885 {
2886   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2887      structtype is the type of the preceding struct-like keyword, and
2888      structbracelev is the brace level where it has been seen. */
2889   static enum sym_type structtype;
2890   static int structbracelev;
2891   static enum sym_type toktype;
2892
2893
2894   toktype = C_symtype (str, len, *c_extp);
2895
2896   /*
2897    * Skip __attribute__
2898    */
2899   if (toktype == st_C_attribute)
2900     {
2901       inattribute = TRUE;
2902       return FALSE;
2903      }
2904
2905    /*
2906     * Advance the definedef state machine.
2907     */
2908    switch (definedef)
2909      {
2910      case dnone:
2911        /* We're not on a preprocessor line. */
2912        if (toktype == st_C_gnumacro)
2913          {
2914            fvdef = fdefunkey;
2915            return FALSE;
2916          }
2917        break;
2918      case dsharpseen:
2919        if (toktype == st_C_define)
2920          {
2921            definedef = ddefineseen;
2922          }
2923        else
2924          {
2925            definedef = dignorerest;
2926          }
2927        return FALSE;
2928      case ddefineseen:
2929        /*
2930         * Make a tag for any macro, unless it is a constant
2931         * and constantypedefs is FALSE.
2932         */
2933        definedef = dignorerest;
2934        *is_func_or_var = (c == '(');
2935        if (!*is_func_or_var && !constantypedefs)
2936          return FALSE;
2937        else
2938          return TRUE;
2939      case dignorerest:
2940        return FALSE;
2941      default:
2942        error ("internal error: definedef value.", (char *)NULL);
2943      }
2944
2945    /*
2946     * Now typedefs
2947     */
2948    switch (typdef)
2949      {
2950      case tnone:
2951        if (toktype == st_C_typedef)
2952          {
2953            if (typedefs)
2954              typdef = tkeyseen;
2955            fvextern = FALSE;
2956            fvdef = fvnone;
2957            return FALSE;
2958          }
2959        break;
2960      case tkeyseen:
2961        switch (toktype)
2962          {
2963          case st_none:
2964          case st_C_class:
2965          case st_C_struct:
2966          case st_C_enum:
2967            typdef = ttypeseen;
2968          }
2969        break;
2970      case ttypeseen:
2971        if (structdef == snone && fvdef == fvnone)
2972          {
2973            fvdef = fvnameseen;
2974            return TRUE;
2975          }
2976        break;
2977      case tend:
2978        switch (toktype)
2979          {
2980          case st_C_class:
2981          case st_C_struct:
2982          case st_C_enum:
2983            return FALSE;
2984          }
2985        return TRUE;
2986      }
2987
2988    switch (toktype)
2989      {
2990      case st_C_javastruct:
2991        if (structdef == stagseen)
2992          structdef = scolonseen;
2993        return FALSE;
2994      case st_C_template:
2995      case st_C_class:
2996        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2997            && bracelev == 0
2998            && definedef == dnone && structdef == snone
2999            && typdef == tnone && fvdef == fvnone)
3000          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3001        if (toktype == st_C_template)
3002          break;
3003        /* FALLTHRU */
3004      case st_C_struct:
3005      case st_C_enum:
3006        if (parlev == 0
3007            && fvdef != vignore
3008            && (typdef == tkeyseen
3009                || (typedefs_or_cplusplus && structdef == snone)))
3010          {
3011            structdef = skeyseen;
3012            structtype = toktype;
3013            structbracelev = bracelev;
3014            if (fvdef == fvnameseen)
3015              fvdef = fvnone;
3016          }
3017        return FALSE;
3018      }
3019
3020    if (structdef == skeyseen)
3021      {
3022        structdef = stagseen;
3023        return TRUE;
3024      }
3025
3026    if (typdef != tnone)
3027      definedef = dnone;
3028
3029    /* Detect Objective C constructs. */
3030    switch (objdef)
3031      {
3032      case onone:
3033        switch (toktype)
3034          {
3035          case st_C_objprot:
3036            objdef = oprotocol;
3037            return FALSE;
3038          case st_C_objimpl:
3039            objdef = oimplementation;
3040            return FALSE;
3041          }
3042        break;
3043      case oimplementation:
3044        /* Save the class tag for functions or variables defined inside. */
3045        objtag = savenstr (str, len);
3046        objdef = oinbody;
3047        return FALSE;
3048      case oprotocol:
3049        /* Save the class tag for categories. */
3050        objtag = savenstr (str, len);
3051        objdef = otagseen;
3052        *is_func_or_var = TRUE;
3053        return TRUE;
3054      case oparenseen:
3055        objdef = ocatseen;
3056        *is_func_or_var = TRUE;
3057        return TRUE;
3058      case oinbody:
3059        break;
3060      case omethodsign:
3061        if (parlev == 0)
3062          {
3063            fvdef = fvnone;
3064            objdef = omethodtag;
3065            linebuffer_setlen (&token_name, len);
3066            strncpy (token_name.buffer, str, len);
3067            token_name.buffer[len] = '\0';
3068            return TRUE;
3069          }
3070        return FALSE;
3071      case omethodcolon:
3072        if (parlev == 0)
3073          objdef = omethodparm;
3074        return FALSE;
3075      case omethodparm:
3076        if (parlev == 0)
3077          {
3078            fvdef = fvnone;
3079            objdef = omethodtag;
3080            linebuffer_setlen (&token_name, token_name.len + len);
3081            strncat (token_name.buffer, str, len);
3082            return TRUE;
3083          }
3084        return FALSE;
3085      case oignore:
3086        if (toktype == st_C_objend)
3087          {
3088            /* Memory leakage here: the string pointed by objtag is
3089               never released, because many tests would be needed to
3090               avoid breaking on incorrect input code.  The amount of
3091               memory leaked here is the sum of the lengths of the
3092               class tags.
3093            free (objtag); */
3094            objdef = onone;
3095          }
3096        return FALSE;
3097      }
3098
3099    /* A function, variable or enum constant? */
3100    switch (toktype)
3101      {
3102      case st_C_extern:
3103        fvextern = TRUE;
3104        switch  (fvdef)
3105          {
3106          case finlist:
3107          case flistseen:
3108          case fignore:
3109          case vignore:
3110            break;
3111          default:
3112            fvdef = fvnone;
3113          }
3114        return FALSE;
3115      case st_C_ignore:
3116        fvextern = FALSE;
3117        fvdef = vignore;
3118        return FALSE;
3119      case st_C_operator:
3120        fvdef = foperator;
3121        *is_func_or_var = TRUE;
3122        return TRUE;
3123      case st_none:
3124        if (constantypedefs
3125            && structdef == snone
3126            && structtype == st_C_enum && bracelev > structbracelev)
3127          return TRUE;           /* enum constant */
3128        switch (fvdef)
3129          {
3130          case fdefunkey:
3131            if (bracelev > 0)
3132              break;
3133            fvdef = fdefunname;  /* GNU macro */
3134            *is_func_or_var = TRUE;
3135            return TRUE;
3136          case fvnone:
3137            switch (typdef)
3138              {
3139              case ttypeseen:
3140                return FALSE;
3141              case tnone:
3142                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3143                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3144                  {
3145                    fvdef = vignore;
3146                    return FALSE;
3147                  }
3148                break;
3149              }
3150           /* FALLTHRU */
3151           case fvnameseen:
3152           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3153             {
3154               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3155                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3156               fvdef = foperator;
3157               *is_func_or_var = TRUE;
3158               return TRUE;
3159             }
3160           if (bracelev > 0 && !instruct)
3161             break;
3162           fvdef = fvnameseen;   /* function or variable */
3163           *is_func_or_var = TRUE;
3164           return TRUE;
3165         }
3166       break;
3167     }
3168
3169   return FALSE;
3170 }
3171
3172 \f
3173 /*
3174  * C_entries often keeps pointers to tokens or lines which are older than
3175  * the line currently read.  By keeping two line buffers, and switching
3176  * them at end of line, it is possible to use those pointers.
3177  */
3178 static struct
3179 {
3180   long linepos;
3181   linebuffer lb;
3182 } lbs[2];
3183
3184 #define current_lb_is_new (newndx == curndx)
3185 #define switch_line_buffers() (curndx = 1 - curndx)
3186
3187 #define curlb (lbs[curndx].lb)
3188 #define newlb (lbs[newndx].lb)
3189 #define curlinepos (lbs[curndx].linepos)
3190 #define newlinepos (lbs[newndx].linepos)
3191
3192 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3193 #define cplpl (c_ext & C_PLPL)
3194 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3195
3196 #define CNL_SAVE_DEFINEDEF()                                            \
3197 do {                                                                    \
3198   curlinepos = charno;                                                  \
3199   readline (&curlb, inf);                                               \
3200   lp = curlb.buffer;                                                    \
3201   quotednl = FALSE;                                                     \
3202   newndx = curndx;                                                      \
3203 } while (0)
3204
3205 #define CNL()                                                           \
3206 do {                                                                    \
3207   CNL_SAVE_DEFINEDEF();                                                 \
3208   if (savetoken.valid)                                                  \
3209     {                                                                   \
3210       token = savetoken;                                                \
3211       savetoken.valid = FALSE;                                          \
3212     }                                                                   \
3213   definedef = dnone;                                                    \
3214 } while (0)
3215
3216
3217 static void
3218 make_C_tag (isfun)
3219      bool isfun;
3220 {
3221   /* This function is never called when token.valid is FALSE, but
3222      we must protect against invalid input or internal errors. */
3223   if (!DEBUG && !token.valid)
3224     return;
3225
3226   if (token.valid)
3227     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3228               token.offset+token.length+1, token.lineno, token.linepos);
3229   else                          /* this case is optimised away if !DEBUG */
3230     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3231               token_name.len + 17, isfun, token.line,
3232               token.offset+token.length+1, token.lineno, token.linepos);
3233
3234   token.valid = FALSE;
3235 }
3236
3237
3238 /*
3239  * C_entries ()
3240  *      This routine finds functions, variables, typedefs,
3241  *      #define's, enum constants and struct/union/enum definitions in
3242  *      C syntax and adds them to the list.
3243  */
3244 static void
3245 C_entries (c_ext, inf)
3246      int c_ext;                 /* extension of C */
3247      FILE *inf;                 /* input file */
3248 {
3249   register char c;              /* latest char read; '\0' for end of line */
3250   register char *lp;            /* pointer one beyond the character `c' */
3251   int curndx, newndx;           /* indices for current and new lb */
3252   register int tokoff;          /* offset in line of start of current token */
3253   register int toklen;          /* length of current token */
3254   char *qualifier;              /* string used to qualify names */
3255   int qlen;                     /* length of qualifier */
3256   int bracelev;                 /* current brace level */
3257   int bracketlev;               /* current bracket level */
3258   int parlev;                   /* current parenthesis level */
3259   int attrparlev;               /* __attribute__ parenthesis level */
3260   int templatelev;              /* current template level */
3261   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3262   bool incomm, inquote, inchar, quotednl, midtoken;
3263   bool yacc_rules;              /* in the rules part of a yacc file */
3264   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3265
3266
3267   linebuffer_init (&lbs[0].lb);
3268   linebuffer_init (&lbs[1].lb);
3269   if (cstack.size == 0)
3270     {
3271       cstack.size = (DEBUG) ? 1 : 4;
3272       cstack.nl = 0;
3273       cstack.cname = xnew (cstack.size, char *);
3274       cstack.bracelev = xnew (cstack.size, int);
3275     }
3276
3277   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3278   curndx = newndx = 0;
3279   lp = curlb.buffer;
3280   *lp = 0;
3281
3282   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3283   structdef = snone; definedef = dnone; objdef = onone;
3284   yacc_rules = FALSE;
3285   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3286   token.valid = savetoken.valid = FALSE;
3287   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3288   if (cjava)
3289     { qualifier = "."; qlen = 1; }
3290   else
3291     { qualifier = "::"; qlen = 2; }
3292
3293
3294   while (!feof (inf))
3295     {
3296       c = *lp++;
3297       if (c == '\\')
3298         {
3299           /* If we are at the end of the line, the next character is a
3300              '\0'; do not skip it, because it is what tells us
3301              to read the next line.  */
3302           if (*lp == '\0')
3303             {
3304               quotednl = TRUE;
3305               continue;
3306             }
3307           lp++;
3308           c = ' ';
3309         }
3310       else if (incomm)
3311         {
3312           switch (c)
3313             {
3314             case '*':
3315               if (*lp == '/')
3316                 {
3317                   c = *lp++;
3318                   incomm = FALSE;
3319                 }
3320               break;
3321             case '\0':
3322               /* Newlines inside comments do not end macro definitions in
3323                  traditional cpp. */
3324               CNL_SAVE_DEFINEDEF ();
3325               break;
3326             }
3327           continue;
3328         }
3329       else if (inquote)
3330         {
3331           switch (c)
3332             {
3333             case '"':
3334               inquote = FALSE;
3335               break;
3336             case '\0':
3337               /* Newlines inside strings do not end macro definitions
3338                  in traditional cpp, even though compilers don't
3339                  usually accept them. */
3340               CNL_SAVE_DEFINEDEF ();
3341               break;
3342             }
3343           continue;
3344         }
3345       else if (inchar)
3346         {
3347           switch (c)
3348             {
3349             case '\0':
3350               /* Hmmm, something went wrong. */
3351               CNL ();
3352               /* FALLTHRU */
3353             case '\'':
3354               inchar = FALSE;
3355               break;
3356             }
3357           continue;
3358         }
3359       else if (bracketlev > 0)
3360         {
3361           switch (c)
3362             {
3363             case ']':
3364               if (--bracketlev > 0)
3365                 continue;
3366               break;
3367             case '\0':
3368               CNL_SAVE_DEFINEDEF ();
3369               break;
3370             }
3371           continue;
3372         }
3373       else switch (c)
3374         {
3375         case '"':
3376           inquote = TRUE;
3377           if (inattribute)
3378             break;
3379           switch (fvdef)
3380             {
3381             case fdefunkey:
3382             case fstartlist:
3383             case finlist:
3384             case fignore:
3385             case vignore:
3386               break;
3387             default:
3388               fvextern = FALSE;
3389               fvdef = fvnone;
3390             }
3391           continue;
3392         case '\'':
3393           inchar = TRUE;
3394           if (inattribute)
3395             break;
3396           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3397             {
3398               fvextern = FALSE;
3399               fvdef = fvnone;
3400             }
3401           continue;
3402         case '/':
3403           if (*lp == '*')
3404             {
3405               incomm = TRUE;
3406               lp++;
3407               c = ' ';
3408             }
3409           else if (/* cplpl && */ *lp == '/')
3410             {
3411               c = '\0';
3412             }
3413           break;
3414         case '%':
3415           if ((c_ext & YACC) && *lp == '%')
3416             {
3417               /* Entering or exiting rules section in yacc file. */
3418               lp++;
3419               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3420               typdef = tnone; structdef = snone;
3421               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3422               bracelev = 0;
3423               yacc_rules = !yacc_rules;
3424               continue;
3425             }
3426           else
3427             break;
3428         case '#':
3429           if (definedef == dnone)
3430             {
3431               char *cp;
3432               bool cpptoken = TRUE;
3433
3434               /* Look back on this line.  If all blanks, or nonblanks
3435                  followed by an end of comment, this is a preprocessor
3436                  token. */
3437               for (cp = newlb.buffer; cp < lp-1; cp++)
3438                 if (!iswhite (*cp))
3439                   {
3440                     if (*cp == '*' && *(cp+1) == '/')
3441                       {
3442                         cp++;
3443                         cpptoken = TRUE;
3444                       }
3445                     else
3446                       cpptoken = FALSE;
3447                   }
3448               if (cpptoken)
3449                 definedef = dsharpseen;
3450             } /* if (definedef == dnone) */
3451           continue;
3452         case '[':
3453           bracketlev++;
3454             continue;
3455         } /* switch (c) */
3456
3457
3458       /* Consider token only if some involved conditions are satisfied. */
3459       if (typdef != tignore
3460           && definedef != dignorerest
3461           && fvdef != finlist
3462           && templatelev == 0
3463           && (definedef != dnone
3464               || structdef != scolonseen)
3465           && !inattribute)
3466         {
3467           if (midtoken)
3468             {
3469               if (endtoken (c))
3470                 {
3471                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3472                     /* This handles :: in the middle,
3473                        but not at the beginning of an identifier.
3474                        Also, space-separated :: is not recognised. */
3475                     {
3476                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3477                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3478                       lp += 2;
3479                       toklen += 2;
3480                       c = lp[-1];
3481                       goto still_in_token;
3482                     }
3483                   else
3484                     {
3485                       bool funorvar = FALSE;
3486
3487                       if (yacc_rules
3488                           || consider_token (newlb.buffer + tokoff, toklen, c,
3489                                              &c_ext, bracelev, parlev,
3490                                              &funorvar))
3491                         {
3492                           if (fvdef == foperator)
3493                             {
3494                               char *oldlp = lp;
3495                               lp = skip_spaces (lp-1);
3496                               if (*lp != '\0')
3497                                 lp += 1;
3498                               while (*lp != '\0'
3499                                      && !iswhite (*lp) && *lp != '(')
3500                                 lp += 1;
3501                               c = *lp++;
3502                               toklen += lp - oldlp;
3503                             }
3504                           token.named = FALSE;
3505                           if (!plainc
3506                               && nestlev > 0 && definedef == dnone)
3507                             /* in struct body */
3508                             {
3509                               write_classname (&token_name, qualifier);
3510                               linebuffer_setlen (&token_name,
3511                                                  token_name.len+qlen+toklen);
3512                               strcat (token_name.buffer, qualifier);
3513                               strncat (token_name.buffer,
3514                                        newlb.buffer + tokoff, toklen);
3515                               token.named = TRUE;
3516                             }
3517                           else if (objdef == ocatseen)
3518                             /* Objective C category */
3519                             {
3520                               int len = strlen (objtag) + 2 + toklen;
3521                               linebuffer_setlen (&token_name, len);
3522                               strcpy (token_name.buffer, objtag);
3523                               strcat (token_name.buffer, "(");
3524                               strncat (token_name.buffer,
3525                                        newlb.buffer + tokoff, toklen);
3526                               strcat (token_name.buffer, ")");
3527                               token.named = TRUE;
3528                             }
3529                           else if (objdef == omethodtag
3530                                    || objdef == omethodparm)
3531                             /* Objective C method */
3532                             {
3533                               token.named = TRUE;
3534                             }
3535                           else if (fvdef == fdefunname)
3536                             /* GNU DEFUN and similar macros */
3537                             {
3538                               bool defun = (newlb.buffer[tokoff] == 'F');
3539                               int off = tokoff;
3540                               int len = toklen;
3541
3542                               /* Rewrite the tag so that emacs lisp DEFUNs
3543                                  can be found by their elisp name */
3544                               if (defun)
3545                                 {
3546                                   off += 1;
3547                                   len -= 1;
3548                                 }
3549                               linebuffer_setlen (&token_name, len);
3550                               strncpy (token_name.buffer,
3551                                        newlb.buffer + off, len);
3552                               token_name.buffer[len] = '\0';
3553                               if (defun)
3554                                 while (--len >= 0)
3555                                   if (token_name.buffer[len] == '_')
3556                                     token_name.buffer[len] = '-';
3557                               token.named = defun;
3558                             }
3559                           else
3560                             {
3561                               linebuffer_setlen (&token_name, toklen);
3562                               strncpy (token_name.buffer,
3563                                        newlb.buffer + tokoff, toklen);
3564                               token_name.buffer[toklen] = '\0';
3565                               /* Name macros and members. */
3566                               token.named = (structdef == stagseen
3567                                              || typdef == ttypeseen
3568                                              || typdef == tend
3569                                              || (funorvar
3570                                                  && definedef == dignorerest)
3571                                              || (funorvar
3572                                                  && definedef == dnone
3573                                                  && structdef == snone
3574                                                  && bracelev > 0));
3575                             }
3576                           token.lineno = lineno;
3577                           token.offset = tokoff;
3578                           token.length = toklen;
3579                           token.line = newlb.buffer;
3580                           token.linepos = newlinepos;
3581                           token.valid = TRUE;
3582
3583                           if (definedef == dnone
3584                               && (fvdef == fvnameseen
3585                                   || fvdef == foperator
3586                                   || structdef == stagseen
3587                                   || typdef == tend
3588                                   || typdef == ttypeseen
3589                                   || objdef != onone))
3590                             {
3591                               if (current_lb_is_new)
3592                                 switch_line_buffers ();
3593                             }
3594                           else if (definedef != dnone
3595                                    || fvdef == fdefunname
3596                                    || instruct)
3597                             make_C_tag (funorvar);
3598                         }
3599                       else /* not yacc and consider_token failed */
3600                         {
3601                           if (inattribute && fvdef == fignore)
3602                             {
3603                               /* We have just met __attribute__ after a
3604                                  function parameter list: do not tag the
3605                                  function again. */
3606                               fvdef = fvnone;
3607                             }
3608                         }
3609                       midtoken = FALSE;
3610                     }
3611                 } /* if (endtoken (c)) */
3612               else if (intoken (c))
3613                 still_in_token:
3614                 {
3615                   toklen++;
3616                   continue;
3617                 }
3618             } /* if (midtoken) */
3619           else if (begtoken (c))
3620             {
3621               switch (definedef)
3622                 {
3623                 case dnone:
3624                   switch (fvdef)
3625                     {
3626                     case fstartlist:
3627                       /* This prevents tagging fb in
3628                          void (__attribute__((noreturn)) *fb) (void);
3629                          Fixing this is not easy and not very important. */
3630                       fvdef = finlist;
3631                       continue;
3632                     case flistseen:
3633                       if (plainc || declarations)
3634                         {
3635                           make_C_tag (TRUE); /* a function */
3636                           fvdef = fignore;
3637                         }
3638                       break;
3639                     }
3640                   if (structdef == stagseen && !cjava)
3641                     {
3642                       popclass_above (bracelev);
3643                       structdef = snone;
3644                     }
3645                   break;
3646                 case dsharpseen:
3647                   savetoken = token;
3648                   break;
3649                 }
3650               if (!yacc_rules || lp == newlb.buffer + 1)
3651                 {
3652                   tokoff = lp - 1 - newlb.buffer;
3653                   toklen = 1;
3654                   midtoken = TRUE;
3655                 }
3656               continue;
3657             } /* if (begtoken) */
3658         } /* if must look at token */
3659
3660
3661       /* Detect end of line, colon, comma, semicolon and various braces
3662          after having handled a token.*/
3663       switch (c)
3664         {
3665         case ':':
3666           if (inattribute)
3667             break;
3668           if (yacc_rules && token.offset == 0 && token.valid)
3669             {
3670               make_C_tag (FALSE); /* a yacc function */
3671               break;
3672             }
3673           if (definedef != dnone)
3674             break;
3675           switch (objdef)
3676             {
3677             case  otagseen:
3678               objdef = oignore;
3679               make_C_tag (TRUE); /* an Objective C class */
3680               break;
3681             case omethodtag:
3682             case omethodparm:
3683               objdef = omethodcolon;
3684               linebuffer_setlen (&token_name, token_name.len + 1);
3685               strcat (token_name.buffer, ":");
3686               break;
3687             }
3688           if (structdef == stagseen)
3689             {
3690               structdef = scolonseen;
3691               break;
3692             }
3693           /* Should be useless, but may be work as a safety net. */
3694           if (cplpl && fvdef == flistseen)
3695             {
3696               make_C_tag (TRUE); /* a function */
3697               fvdef = fignore;
3698               break;
3699             }
3700           break;
3701         case ';':
3702           if (definedef != dnone || inattribute)
3703             break;
3704           switch (typdef)
3705             {
3706             case tend:
3707             case ttypeseen:
3708               make_C_tag (FALSE); /* a typedef */
3709               typdef = tnone;
3710               fvdef = fvnone;
3711               break;
3712             case tnone:
3713             case tinbody:
3714             case tignore:
3715               switch (fvdef)
3716                 {
3717                 case fignore:
3718                   if (typdef == tignore || cplpl)
3719                     fvdef = fvnone;
3720                   break;
3721                 case fvnameseen:
3722                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3723                       || (members && instruct))
3724                     make_C_tag (FALSE); /* a variable */
3725                   fvextern = FALSE;
3726                   fvdef = fvnone;
3727                   token.valid = FALSE;
3728                   break;
3729                 case flistseen:
3730                   if ((declarations
3731                        && (cplpl || !instruct)
3732                        && (typdef == tnone || (typdef != tignore && instruct)))
3733                       || (members
3734                           && plainc && instruct))
3735                     make_C_tag (TRUE);  /* a function */
3736                   /* FALLTHRU */
3737                 default:
3738                   fvextern = FALSE;
3739                   fvdef = fvnone;
3740                   if (declarations
3741                        && cplpl && structdef == stagseen)
3742                     make_C_tag (FALSE); /* forward declaration */
3743                   else
3744                     token.valid = FALSE;
3745                 } /* switch (fvdef) */
3746               /* FALLTHRU */
3747             default:
3748               if (!instruct)
3749                 typdef = tnone;
3750             }
3751           if (structdef == stagseen)
3752             structdef = snone;
3753           break;
3754         case ',':
3755           if (definedef != dnone || inattribute)
3756             break;
3757           switch (objdef)
3758             {
3759             case omethodtag:
3760             case omethodparm:
3761               make_C_tag (TRUE); /* an Objective C method */
3762               objdef = oinbody;
3763               break;
3764             }
3765           switch (fvdef)
3766             {
3767             case fdefunkey:
3768             case foperator:
3769             case fstartlist:
3770             case finlist:
3771             case fignore:
3772             case vignore:
3773               break;
3774             case fdefunname:
3775               fvdef = fignore;
3776               break;
3777             case fvnameseen:
3778               if (parlev == 0
3779                   && ((globals
3780                        && bracelev == 0
3781                        && templatelev == 0
3782                        && (!fvextern || declarations))
3783                       || (members && instruct)))
3784                   make_C_tag (FALSE); /* a variable */
3785               break;
3786             case flistseen:
3787               if ((declarations && typdef == tnone && !instruct)
3788                   || (members && typdef != tignore && instruct))
3789                 {
3790                   make_C_tag (TRUE); /* a function */
3791                   fvdef = fvnameseen;
3792                 }
3793               else if (!declarations)
3794                 fvdef = fvnone;
3795               token.valid = FALSE;
3796               break;
3797             default:
3798               fvdef = fvnone;
3799             }
3800           if (structdef == stagseen)
3801             structdef = snone;
3802           break;
3803         case ']':
3804           if (definedef != dnone || inattribute)
3805             break;
3806           if (structdef == stagseen)
3807             structdef = snone;
3808           switch (typdef)
3809             {
3810             case ttypeseen:
3811             case tend:
3812               typdef = tignore;
3813               make_C_tag (FALSE);       /* a typedef */
3814               break;
3815             case tnone:
3816             case tinbody:
3817               switch (fvdef)
3818                 {
3819                 case foperator:
3820                 case finlist:
3821                 case fignore:
3822                 case vignore:
3823                   break;
3824                 case fvnameseen:
3825                   if ((members && bracelev == 1)
3826                       || (globals && bracelev == 0
3827                           && (!fvextern || declarations)))
3828                     make_C_tag (FALSE); /* a variable */
3829                   /* FALLTHRU */
3830                 default:
3831                   fvdef = fvnone;
3832                 }
3833               break;
3834             }
3835           break;
3836         case '(':
3837           if (inattribute)
3838             {
3839               attrparlev++;
3840               break;
3841             }
3842           if (definedef != dnone)
3843             break;
3844           if (objdef == otagseen && parlev == 0)
3845             objdef = oparenseen;
3846           switch (fvdef)
3847             {
3848             case fvnameseen:
3849               if (typdef == ttypeseen
3850                   && *lp != '*'
3851                   && !instruct)
3852                 {
3853                   /* This handles constructs like:
3854                      typedef void OperatorFun (int fun); */
3855                   make_C_tag (FALSE);
3856                   typdef = tignore;
3857                   fvdef = fignore;
3858                   break;
3859                 }
3860               /* FALLTHRU */
3861             case foperator:
3862               fvdef = fstartlist;
3863               break;
3864             case flistseen:
3865               fvdef = finlist;
3866               break;
3867             }
3868           parlev++;
3869           break;
3870         case ')':
3871           if (inattribute)
3872             {
3873               if (--attrparlev == 0)
3874                 inattribute = FALSE;
3875               break;
3876             }
3877           if (definedef != dnone)
3878             break;
3879           if (objdef == ocatseen && parlev == 1)
3880             {
3881               make_C_tag (TRUE); /* an Objective C category */
3882               objdef = oignore;
3883             }
3884           if (--parlev == 0)
3885             {
3886               switch (fvdef)
3887                 {
3888                 case fstartlist:
3889                 case finlist:
3890                   fvdef = flistseen;
3891                   break;
3892                 }
3893               if (!instruct
3894                   && (typdef == tend
3895                       || typdef == ttypeseen))
3896                 {
3897                   typdef = tignore;
3898                   make_C_tag (FALSE); /* a typedef */
3899                 }
3900             }
3901           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3902             parlev = 0;
3903           break;
3904         case '{':
3905           if (definedef != dnone)
3906             break;
3907           if (typdef == ttypeseen)
3908             {
3909               /* Whenever typdef is set to tinbody (currently only
3910                  here), typdefbracelev should be set to bracelev. */
3911               typdef = tinbody;
3912               typdefbracelev = bracelev;
3913             }
3914           switch (fvdef)
3915             {
3916             case flistseen:
3917               make_C_tag (TRUE);    /* a function */
3918               /* FALLTHRU */
3919             case fignore:
3920               fvdef = fvnone;
3921               break;
3922             case fvnone:
3923               switch (objdef)
3924                 {
3925                 case otagseen:
3926                   make_C_tag (TRUE); /* an Objective C class */
3927                   objdef = oignore;
3928                   break;
3929                 case omethodtag:
3930                 case omethodparm:
3931                   make_C_tag (TRUE); /* an Objective C method */
3932                   objdef = oinbody;
3933                   break;
3934                 default:
3935                   /* Neutralize `extern "C" {' grot. */
3936                   if (bracelev == 0 && structdef == snone && nestlev == 0
3937                       && typdef == tnone)
3938                     bracelev = -1;
3939                 }
3940               break;
3941             }
3942           switch (structdef)
3943             {
3944             case skeyseen:         /* unnamed struct */
3945               pushclass_above (bracelev, NULL, 0);
3946               structdef = snone;
3947               break;
3948             case stagseen:         /* named struct or enum */
3949             case scolonseen:       /* a class */
3950               pushclass_above (bracelev,token.line+token.offset, token.length);
3951               structdef = snone;
3952               make_C_tag (FALSE);  /* a struct or enum */
3953               break;
3954             }
3955           bracelev += 1;
3956           break;
3957         case '*':
3958           if (definedef != dnone)
3959             break;
3960           if (fvdef == fstartlist)
3961             {
3962               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3963               token.valid = FALSE;
3964             }
3965           break;
3966         case '}':
3967           if (definedef != dnone)
3968             break;
3969           bracelev -= 1;
3970           if (!ignoreindent && lp == newlb.buffer + 1)
3971             {
3972               if (bracelev != 0)
3973                 token.valid = FALSE; /* unexpected value, token unreliable */
3974               bracelev = 0;     /* reset brace level if first column */
3975               parlev = 0;       /* also reset paren level, just in case... */
3976             }
3977           else if (bracelev < 0)
3978             {
3979             token.valid = FALSE; /* something gone amiss, token unreliable */
3980               bracelev = 0;
3981             }
3982           if (bracelev == 0 && fvdef == vignore)
3983             fvdef = fvnone;             /* end of function */
3984           popclass_above (bracelev);
3985           structdef = snone;
3986           /* Only if typdef == tinbody is typdefbracelev significant. */
3987           if (typdef == tinbody && bracelev <= typdefbracelev)
3988             {
3989               assert (bracelev == typdefbracelev);
3990               typdef = tend;
3991             }
3992           break;
3993         case '=':
3994           if (definedef != dnone)
3995             break;
3996           switch (fvdef)
3997             {
3998             case foperator:
3999             case finlist:
4000             case fignore:
4001             case vignore:
4002               break;
4003             case fvnameseen:
4004               if ((members && bracelev == 1)
4005                   || (globals && bracelev == 0 && (!fvextern || declarations)))
4006                 make_C_tag (FALSE); /* a variable */
4007               /* FALLTHRU */
4008             default:
4009               fvdef = vignore;
4010             }
4011           break;
4012         case '<':
4013           if (cplpl
4014               && (structdef == stagseen || fvdef == fvnameseen))
4015             {
4016               templatelev++;
4017               break;
4018             }
4019           goto resetfvdef;
4020         case '>':
4021           if (templatelev > 0)
4022             {
4023               templatelev--;
4024               break;
4025             }
4026           goto resetfvdef;
4027         case '+':
4028         case '-':
4029           if (objdef == oinbody && bracelev == 0)
4030             {
4031               objdef = omethodsign;
4032               break;
4033             }
4034           /* FALLTHRU */
4035         resetfvdef:
4036         case '#': case '~': case '&': case '%': case '/':
4037         case '|': case '^': case '!': case '.': case '?':
4038           if (definedef != dnone)
4039             break;
4040           /* These surely cannot follow a function tag in C. */
4041           switch (fvdef)
4042             {
4043             case foperator:
4044             case finlist:
4045             case fignore:
4046             case vignore:
4047               break;
4048             default:
4049               fvdef = fvnone;
4050             }
4051           break;
4052         case '\0':
4053           if (objdef == otagseen)
4054             {
4055               make_C_tag (TRUE); /* an Objective C class */
4056               objdef = oignore;
4057             }
4058           /* If a macro spans multiple lines don't reset its state. */
4059           if (quotednl)
4060             CNL_SAVE_DEFINEDEF ();
4061           else
4062             CNL ();
4063           break;
4064         } /* switch (c) */
4065
4066     } /* while not eof */
4067
4068   free (lbs[0].lb.buffer);
4069   free (lbs[1].lb.buffer);
4070 }
4071
4072 /*
4073  * Process either a C++ file or a C file depending on the setting
4074  * of a global flag.
4075  */
4076 static void
4077 default_C_entries (inf)
4078      FILE *inf;
4079 {
4080   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4081 }
4082
4083 /* Always do plain C. */
4084 static void
4085 plain_C_entries (inf)
4086      FILE *inf;
4087 {
4088   C_entries (0, inf);
4089 }
4090
4091 /* Always do C++. */
4092 static void
4093 Cplusplus_entries (inf)
4094      FILE *inf;
4095 {
4096   C_entries (C_PLPL, inf);
4097 }
4098
4099 /* Always do Java. */
4100 static void
4101 Cjava_entries (inf)
4102      FILE *inf;
4103 {
4104   C_entries (C_JAVA, inf);
4105 }
4106
4107 /* Always do C*. */
4108 static void
4109 Cstar_entries (inf)
4110      FILE *inf;
4111 {
4112   C_entries (C_STAR, inf);
4113 }
4114
4115 /* Always do Yacc. */
4116 static void
4117 Yacc_entries (inf)
4118      FILE *inf;
4119 {
4120   C_entries (YACC, inf);
4121 }
4122
4123 \f
4124 /* Useful macros. */
4125 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4126   for (;                        /* loop initialization */               \
4127        !feof (file_pointer)     /* loop test */                         \
4128        &&                       /* instructions at start of loop */     \
4129           (readline (&line_buffer, file_pointer),                       \
4130            char_pointer = line_buffer.buffer,                           \
4131            TRUE);                                                       \
4132       )
4133
4134 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4135   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4136    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4137    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4138    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4139
4140 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4141 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4142   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4143    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4144    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4145
4146 /*
4147  * Read a file, but do no processing.  This is used to do regexp
4148  * matching on files that have no language defined.
4149  */
4150 static void
4151 just_read_file (inf)
4152      FILE *inf;
4153 {
4154   register char *dummy;
4155
4156   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4157     continue;
4158 }
4159
4160 \f
4161 /* Fortran parsing */
4162
4163 static void F_takeprec __P((void));
4164 static void F_getit __P((FILE *));
4165
4166 static void
4167 F_takeprec ()
4168 {
4169   dbp = skip_spaces (dbp);
4170   if (*dbp != '*')
4171     return;
4172   dbp++;
4173   dbp = skip_spaces (dbp);
4174   if (strneq (dbp, "(*)", 3))
4175     {
4176       dbp += 3;
4177       return;
4178     }
4179   if (!ISDIGIT (*dbp))
4180     {
4181       --dbp;                    /* force failure */
4182       return;
4183     }
4184   do
4185     dbp++;
4186   while (ISDIGIT (*dbp));
4187 }
4188
4189 static void
4190 F_getit (inf)
4191      FILE *inf;
4192 {
4193   register char *cp;
4194
4195   dbp = skip_spaces (dbp);
4196   if (*dbp == '\0')
4197     {
4198       readline (&lb, inf);
4199       dbp = lb.buffer;
4200       if (dbp[5] != '&')
4201         return;
4202       dbp += 6;
4203       dbp = skip_spaces (dbp);
4204     }
4205   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4206     return;
4207   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4208     continue;
4209   make_tag (dbp, cp-dbp, TRUE,
4210             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4211 }
4212
4213
4214 static void
4215 Fortran_functions (inf)
4216      FILE *inf;
4217 {
4218   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4219     {
4220       if (*dbp == '%')
4221         dbp++;                  /* Ratfor escape to fortran */
4222       dbp = skip_spaces (dbp);
4223       if (*dbp == '\0')
4224         continue;
4225       switch (lowcase (*dbp))
4226         {
4227         case 'i':
4228           if (nocase_tail ("integer"))
4229             F_takeprec ();
4230           break;
4231         case 'r':
4232           if (nocase_tail ("real"))
4233             F_takeprec ();
4234           break;
4235         case 'l':
4236           if (nocase_tail ("logical"))
4237             F_takeprec ();
4238           break;
4239         case 'c':
4240           if (nocase_tail ("complex") || nocase_tail ("character"))
4241             F_takeprec ();
4242           break;
4243         case 'd':
4244           if (nocase_tail ("double"))
4245             {
4246               dbp = skip_spaces (dbp);
4247               if (*dbp == '\0')
4248                 continue;
4249               if (nocase_tail ("precision"))
4250                 break;
4251               continue;
4252             }
4253           break;
4254         }
4255       dbp = skip_spaces (dbp);
4256       if (*dbp == '\0')
4257         continue;
4258       switch (lowcase (*dbp))
4259         {
4260         case 'f':
4261           if (nocase_tail ("function"))
4262             F_getit (inf);
4263           continue;
4264         case 's':
4265           if (nocase_tail ("subroutine"))
4266             F_getit (inf);
4267           continue;
4268         case 'e':
4269           if (nocase_tail ("entry"))
4270             F_getit (inf);
4271           continue;
4272         case 'b':
4273           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4274             {
4275               dbp = skip_spaces (dbp);
4276               if (*dbp == '\0') /* assume un-named */
4277                 make_tag ("blockdata", 9, TRUE,
4278                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4279               else
4280                 F_getit (inf);  /* look for name */
4281             }
4282           continue;
4283         }
4284     }
4285 }
4286
4287 \f
4288 /*
4289  * Ada parsing
4290  * Original code by
4291  * Philippe Waroquiers (1998)
4292  */
4293
4294 static void Ada_getit __P((FILE *, char *));
4295
4296 /* Once we are positioned after an "interesting" keyword, let's get
4297    the real tag value necessary. */
4298 static void
4299 Ada_getit (inf, name_qualifier)
4300      FILE *inf;
4301      char *name_qualifier;
4302 {
4303   register char *cp;
4304   char *name;
4305   char c;
4306
4307   while (!feof (inf))
4308     {
4309       dbp = skip_spaces (dbp);
4310       if (*dbp == '\0'
4311           || (dbp[0] == '-' && dbp[1] == '-'))
4312         {
4313           readline (&lb, inf);
4314           dbp = lb.buffer;
4315         }
4316       switch (lowcase(*dbp))
4317         {
4318         case 'b':
4319           if (nocase_tail ("body"))
4320             {
4321               /* Skipping body of   procedure body   or   package body or ....
4322                  resetting qualifier to body instead of spec. */
4323               name_qualifier = "/b";
4324               continue;
4325             }
4326           break;
4327         case 't':
4328           /* Skipping type of   task type   or   protected type ... */
4329           if (nocase_tail ("type"))
4330             continue;
4331           break;
4332         }
4333       if (*dbp == '"')
4334         {
4335           dbp += 1;
4336           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4337             continue;
4338         }
4339       else
4340         {
4341           dbp = skip_spaces (dbp);
4342           for (cp = dbp;
4343                (*cp != '\0'
4344                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4345                cp++)
4346             continue;
4347           if (cp == dbp)
4348             return;
4349         }
4350       c = *cp;
4351       *cp = '\0';
4352       name = concat (dbp, name_qualifier, "");
4353       *cp = c;
4354       make_tag (name, strlen (name), TRUE,
4355                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4356       free (name);
4357       if (c == '"')
4358         dbp = cp + 1;
4359       return;
4360     }
4361 }
4362
4363 static void
4364 Ada_funcs (inf)
4365      FILE *inf;
4366 {
4367   bool inquote = FALSE;
4368   bool skip_till_semicolumn = FALSE;
4369
4370   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4371     {
4372       while (*dbp != '\0')
4373         {
4374           /* Skip a string i.e. "abcd". */
4375           if (inquote || (*dbp == '"'))
4376             {
4377               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4378               if (dbp != NULL)
4379                 {
4380                   inquote = FALSE;
4381                   dbp += 1;
4382                   continue;     /* advance char */
4383                 }
4384               else
4385                 {
4386                   inquote = TRUE;
4387                   break;        /* advance line */
4388                 }
4389             }
4390
4391           /* Skip comments. */
4392           if (dbp[0] == '-' && dbp[1] == '-')
4393             break;              /* advance line */
4394
4395           /* Skip character enclosed in single quote i.e. 'a'
4396              and skip single quote starting an attribute i.e. 'Image. */
4397           if (*dbp == '\'')
4398             {
4399               dbp++ ;
4400               if (*dbp != '\0')
4401                 dbp++;
4402               continue;
4403             }
4404
4405           if (skip_till_semicolumn)
4406             {
4407               if (*dbp == ';')
4408                 skip_till_semicolumn = FALSE;
4409               dbp++;
4410               continue;         /* advance char */
4411             }
4412
4413           /* Search for beginning of a token.  */
4414           if (!begtoken (*dbp))
4415             {
4416               dbp++;
4417               continue;         /* advance char */
4418             }
4419
4420           /* We are at the beginning of a token. */
4421           switch (lowcase(*dbp))
4422             {
4423             case 'f':
4424               if (!packages_only && nocase_tail ("function"))
4425                 Ada_getit (inf, "/f");
4426               else
4427                 break;          /* from switch */
4428               continue;         /* advance char */
4429             case 'p':
4430               if (!packages_only && nocase_tail ("procedure"))
4431                 Ada_getit (inf, "/p");
4432               else if (nocase_tail ("package"))
4433                 Ada_getit (inf, "/s");
4434               else if (nocase_tail ("protected")) /* protected type */
4435                 Ada_getit (inf, "/t");
4436               else
4437                 break;          /* from switch */
4438               continue;         /* advance char */
4439
4440             case 'u':
4441               if (typedefs && !packages_only && nocase_tail ("use"))
4442                 {
4443                   /* when tagging types, avoid tagging  use type Pack.Typename;
4444                      for this, we will skip everything till a ; */
4445                   skip_till_semicolumn = TRUE;
4446                   continue;     /* advance char */
4447                 }
4448
4449             case 't':
4450               if (!packages_only && nocase_tail ("task"))
4451                 Ada_getit (inf, "/k");
4452               else if (typedefs && !packages_only && nocase_tail ("type"))
4453                 {
4454                   Ada_getit (inf, "/t");
4455                   while (*dbp != '\0')
4456                     dbp += 1;
4457                 }
4458               else
4459                 break;          /* from switch */
4460               continue;         /* advance char */
4461             }
4462
4463           /* Look for the end of the token. */
4464           while (!endtoken (*dbp))
4465             dbp++;
4466
4467         } /* advance char */
4468     } /* advance line */
4469 }
4470
4471 \f
4472 /*
4473  * Unix and microcontroller assembly tag handling
4474  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4475  * Idea by Bob Weiner, Motorola Inc. (1994)
4476  */
4477 static void
4478 Asm_labels (inf)
4479      FILE *inf;
4480 {
4481   register char *cp;
4482
4483   LOOP_ON_INPUT_LINES (inf, lb, cp)
4484     {
4485       /* If first char is alphabetic or one of [_.$], test for colon
4486          following identifier. */
4487       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4488         {
4489           /* Read past label. */
4490           cp++;
4491           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4492             cp++;
4493           if (*cp == ':' || iswhite (*cp))
4494             /* Found end of label, so copy it and add it to the table. */
4495             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4496                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4497         }
4498     }
4499 }
4500
4501 \f
4502 /*
4503  * Perl support
4504  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4505  * Perl variable names: /^(my|local).../
4506  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4507  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4508  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4509  */
4510 static void
4511 Perl_functions (inf)
4512      FILE *inf;
4513 {
4514   char *package = savestr ("main"); /* current package name */
4515   register char *cp;
4516
4517   LOOP_ON_INPUT_LINES (inf, lb, cp)
4518     {
4519       skip_spaces(cp);
4520
4521       if (LOOKING_AT (cp, "package"))
4522         {
4523           free (package);
4524           get_tag (cp, &package);
4525         }
4526       else if (LOOKING_AT (cp, "sub"))
4527         {
4528           char *pos;
4529           char *sp = cp;
4530
4531           while (!notinname (*cp))
4532             cp++;
4533           if (cp == sp)
4534             continue;           /* nothing found */
4535           if ((pos = etags_strchr (sp, ':')) != NULL
4536               && pos < cp && pos[1] == ':')
4537             /* The name is already qualified. */
4538             make_tag (sp, cp - sp, TRUE,
4539                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4540           else
4541             /* Qualify it. */
4542             {
4543               char savechar, *name;
4544
4545               savechar = *cp;
4546               *cp = '\0';
4547               name = concat (package, "::", sp);
4548               *cp = savechar;
4549               make_tag (name, strlen(name), TRUE,
4550                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4551               free (name);
4552             }
4553         }
4554        else if (globals)        /* only if we are tagging global vars */
4555         {
4556           /* Skip a qualifier, if any. */
4557           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4558           /* After "my" or "local", but before any following paren or space. */
4559           char *varstart = cp;
4560
4561           if (qual              /* should this be removed?  If yes, how? */
4562               && (*cp == '$' || *cp == '@' || *cp == '%'))
4563             {
4564               varstart += 1;
4565               do
4566                 cp++;
4567               while (ISALNUM (*cp) || *cp == '_');
4568             }
4569           else if (qual)
4570             {
4571               /* Should be examining a variable list at this point;
4572                  could insist on seeing an open parenthesis. */
4573               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4574                 cp++;
4575             }
4576           else
4577             continue;
4578
4579           make_tag (varstart, cp - varstart, FALSE,
4580                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4581         }
4582     }
4583   free (package);
4584 }
4585
4586
4587 /*
4588  * Python support
4589  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4590  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4591  * More ideas by seb bacon <seb@jamkit.com> (2002)
4592  */
4593 static void
4594 Python_functions (inf)
4595      FILE *inf;
4596 {
4597   register char *cp;
4598
4599   LOOP_ON_INPUT_LINES (inf, lb, cp)
4600     {
4601       cp = skip_spaces (cp);
4602       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4603         {
4604           char *name = cp;
4605           while (!notinname (*cp) && *cp != ':')
4606             cp++;
4607           make_tag (name, cp - name, TRUE,
4608                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4609         }
4610     }
4611 }
4612
4613 \f
4614 /*
4615  * PHP support
4616  * Look for:
4617  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4618  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4619  *  - /^[ \t]*define\(\"[^\"]+/
4620  * Only with --members:
4621  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4622  * Idea by Diez B. Roggisch (2001)
4623  */
4624 static void
4625 PHP_functions (inf)
4626      FILE *inf;
4627 {
4628   register char *cp, *name;
4629   bool search_identifier = FALSE;
4630
4631   LOOP_ON_INPUT_LINES (inf, lb, cp)
4632     {
4633       cp = skip_spaces (cp);
4634       name = cp;
4635       if (search_identifier
4636           && *cp != '\0')
4637         {
4638           while (!notinname (*cp))
4639             cp++;
4640           make_tag (name, cp - name, TRUE,
4641                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4642           search_identifier = FALSE;
4643         }
4644       else if (LOOKING_AT (cp, "function"))
4645         {
4646           if(*cp == '&')
4647             cp = skip_spaces (cp+1);
4648           if(*cp != '\0')
4649             {
4650               name = cp;
4651               while (!notinname (*cp))
4652                 cp++;
4653               make_tag (name, cp - name, TRUE,
4654                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4655             }
4656           else
4657             search_identifier = TRUE;
4658         }
4659       else if (LOOKING_AT (cp, "class"))
4660         {
4661           if (*cp != '\0')
4662             {
4663               name = cp;
4664               while (*cp != '\0' && !iswhite (*cp))
4665                 cp++;
4666               make_tag (name, cp - name, FALSE,
4667                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4668             }
4669           else
4670             search_identifier = TRUE;
4671         }
4672       else if (strneq (cp, "define", 6)
4673                && (cp = skip_spaces (cp+6))
4674                && *cp++ == '('
4675                && (*cp == '"' || *cp == '\''))
4676         {
4677           char quote = *cp++;
4678           name = cp;
4679           while (*cp != quote && *cp != '\0')
4680             cp++;
4681           make_tag (name, cp - name, FALSE,
4682                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4683         }
4684       else if (members
4685                && LOOKING_AT (cp, "var")
4686                && *cp == '$')
4687         {
4688           name = cp;
4689           while (!notinname(*cp))
4690             cp++;
4691           make_tag (name, cp - name, FALSE,
4692                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4693         }
4694     }
4695 }
4696
4697 \f
4698 /*
4699  * Cobol tag functions
4700  * We could look for anything that could be a paragraph name.
4701  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4702  * Idea by Corny de Souza (1993)
4703  */
4704 static void
4705 Cobol_paragraphs (inf)
4706      FILE *inf;
4707 {
4708   register char *bp, *ep;
4709
4710   LOOP_ON_INPUT_LINES (inf, lb, bp)
4711     {
4712       if (lb.len < 9)
4713         continue;
4714       bp += 8;
4715
4716       /* If eoln, compiler option or comment ignore whole line. */
4717       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4718         continue;
4719
4720       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4721         continue;
4722       if (*ep++ == '.')
4723         make_tag (bp, ep - bp, TRUE,
4724                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4725     }
4726 }
4727
4728 \f
4729 /*
4730  * Makefile support
4731  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4732  */
4733 static void
4734 Makefile_targets (inf)
4735      FILE *inf;
4736 {
4737   register char *bp;
4738
4739   LOOP_ON_INPUT_LINES (inf, lb, bp)
4740     {
4741       if (*bp == '\t' || *bp == '#')
4742         continue;
4743       while (*bp != '\0' && *bp != '=' && *bp != ':')
4744         bp++;
4745       if (*bp == ':' || (globals && *bp == '='))
4746         {
4747           /* We should detect if there is more than one tag, but we do not.
4748              We just skip initial and final spaces. */
4749           char * namestart = skip_spaces (lb.buffer);
4750           while (--bp > namestart)
4751             if (!notinname (*bp))
4752               break;
4753           make_tag (namestart, bp - namestart + 1, TRUE,
4754                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4755         }
4756     }
4757 }
4758
4759 \f
4760 /*
4761  * Pascal parsing
4762  * Original code by Mosur K. Mohan (1989)
4763  *
4764  *  Locates tags for procedures & functions.  Doesn't do any type- or
4765  *  var-definitions.  It does look for the keyword "extern" or
4766  *  "forward" immediately following the procedure statement; if found,
4767  *  the tag is skipped.
4768  */
4769 static void
4770 Pascal_functions (inf)
4771      FILE *inf;
4772 {
4773   linebuffer tline;             /* mostly copied from C_entries */
4774   long save_lcno;
4775   int save_lineno, namelen, taglen;
4776   char c, *name;
4777
4778   bool                          /* each of these flags is TRUE if: */
4779     incomment,                  /* point is inside a comment */
4780     inquote,                    /* point is inside '..' string */
4781     get_tagname,                /* point is after PROCEDURE/FUNCTION
4782                                    keyword, so next item = potential tag */
4783     found_tag,                  /* point is after a potential tag */
4784     inparms,                    /* point is within parameter-list */
4785     verify_tag;                 /* point has passed the parm-list, so the
4786                                    next token will determine whether this
4787                                    is a FORWARD/EXTERN to be ignored, or
4788                                    whether it is a real tag */
4789
4790   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4791   name = NULL;                  /* keep compiler quiet */
4792   dbp = lb.buffer;
4793   *dbp = '\0';
4794   linebuffer_init (&tline);
4795
4796   incomment = inquote = FALSE;
4797   found_tag = FALSE;            /* have a proc name; check if extern */
4798   get_tagname = FALSE;          /* found "procedure" keyword         */
4799   inparms = FALSE;              /* found '(' after "proc"            */
4800   verify_tag = FALSE;           /* check if "extern" is ahead        */
4801
4802
4803   while (!feof (inf))           /* long main loop to get next char */
4804     {
4805       c = *dbp++;
4806       if (c == '\0')            /* if end of line */
4807         {
4808           readline (&lb, inf);
4809           dbp = lb.buffer;
4810           if (*dbp == '\0')
4811             continue;
4812           if (!((found_tag && verify_tag)
4813                 || get_tagname))
4814             c = *dbp++;         /* only if don't need *dbp pointing
4815                                    to the beginning of the name of
4816                                    the procedure or function */
4817         }
4818       if (incomment)
4819         {
4820           if (c == '}')         /* within { } comments */
4821             incomment = FALSE;
4822           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4823             {
4824               dbp++;
4825               incomment = FALSE;
4826             }
4827           continue;
4828         }
4829       else if (inquote)
4830         {
4831           if (c == '\'')
4832             inquote = FALSE;
4833           continue;
4834         }
4835       else
4836         switch (c)
4837           {
4838           case '\'':
4839             inquote = TRUE;     /* found first quote */
4840             continue;
4841           case '{':             /* found open { comment */
4842             incomment = TRUE;
4843             continue;
4844           case '(':
4845             if (*dbp == '*')    /* found open (* comment */
4846               {
4847                 incomment = TRUE;
4848                 dbp++;
4849               }
4850             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4851               inparms = TRUE;
4852             continue;
4853           case ')':             /* end of parms list */
4854             if (inparms)
4855               inparms = FALSE;
4856             continue;
4857           case ';':
4858             if (found_tag && !inparms) /* end of proc or fn stmt */
4859               {
4860                 verify_tag = TRUE;
4861                 break;
4862               }
4863             continue;
4864           }
4865       if (found_tag && verify_tag && (*dbp != ' '))
4866         {
4867           /* Check if this is an "extern" declaration. */
4868           if (*dbp == '\0')
4869             continue;
4870           if (lowcase (*dbp == 'e'))
4871             {
4872               if (nocase_tail ("extern")) /* superfluous, really! */
4873                 {
4874                   found_tag = FALSE;
4875                   verify_tag = FALSE;
4876                 }
4877             }
4878           else if (lowcase (*dbp) == 'f')
4879             {
4880               if (nocase_tail ("forward")) /* check for forward reference */
4881                 {
4882                   found_tag = FALSE;
4883                   verify_tag = FALSE;
4884                 }
4885             }
4886           if (found_tag && verify_tag) /* not external proc, so make tag */
4887             {
4888               found_tag = FALSE;
4889               verify_tag = FALSE;
4890               make_tag (name, namelen, TRUE,
4891                         tline.buffer, taglen, save_lineno, save_lcno);
4892               continue;
4893             }
4894         }
4895       if (get_tagname)          /* grab name of proc or fn */
4896         {
4897           char *cp;
4898
4899           if (*dbp == '\0')
4900             continue;
4901
4902           /* Find block name. */
4903           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4904             continue;
4905
4906           /* Save all values for later tagging. */
4907           linebuffer_setlen (&tline, lb.len);
4908           strcpy (tline.buffer, lb.buffer);
4909           save_lineno = lineno;
4910           save_lcno = linecharno;
4911           name = tline.buffer + (dbp - lb.buffer);
4912           namelen = cp - dbp;
4913           taglen = cp - lb.buffer + 1;
4914
4915           dbp = cp;             /* set dbp to e-o-token */
4916           get_tagname = FALSE;
4917           found_tag = TRUE;
4918           continue;
4919
4920           /* And proceed to check for "extern". */
4921         }
4922       else if (!incomment && !inquote && !found_tag)
4923         {
4924           /* Check for proc/fn keywords. */
4925           switch (lowcase (c))
4926             {
4927             case 'p':
4928               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4929                 get_tagname = TRUE;
4930               continue;
4931             case 'f':
4932               if (nocase_tail ("unction"))
4933                 get_tagname = TRUE;
4934               continue;
4935             }
4936         }
4937     } /* while not eof */
4938
4939   free (tline.buffer);
4940 }
4941
4942 \f
4943 /*
4944  * Lisp tag functions
4945  *  look for (def or (DEF, quote or QUOTE
4946  */
4947
4948 static void L_getit __P((void));
4949
4950 static void
4951 L_getit ()
4952 {
4953   if (*dbp == '\'')             /* Skip prefix quote */
4954     dbp++;
4955   else if (*dbp == '(')
4956   {
4957     dbp++;
4958     /* Try to skip "(quote " */
4959     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4960       /* Ok, then skip "(" before name in (defstruct (foo)) */
4961       dbp = skip_spaces (dbp);
4962   }
4963   get_tag (dbp, NULL);
4964 }
4965
4966 static void
4967 Lisp_functions (inf)
4968      FILE *inf;
4969 {
4970   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4971     {
4972       if (dbp[0] != '(')
4973         continue;
4974
4975       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4976         {
4977           dbp = skip_non_spaces (dbp);
4978           dbp = skip_spaces (dbp);
4979           L_getit ();
4980         }
4981       else
4982         {
4983           /* Check for (foo::defmumble name-defined ... */
4984           do
4985             dbp++;
4986           while (!notinname (*dbp) && *dbp != ':');
4987           if (*dbp == ':')
4988             {
4989               do
4990                 dbp++;
4991               while (*dbp == ':');
4992
4993               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4994                 {
4995                   dbp = skip_non_spaces (dbp);
4996                   dbp = skip_spaces (dbp);
4997                   L_getit ();
4998                 }
4999             }
5000         }
5001     }
5002 }
5003
5004 \f
5005 /*
5006  * Lua script language parsing
5007  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5008  *
5009  *  "function" and "local function" are tags if they start at column 1.
5010  */
5011 static void
5012 Lua_functions (inf)
5013      FILE *inf;
5014 {
5015   register char *bp;
5016
5017   LOOP_ON_INPUT_LINES (inf, lb, bp)
5018     {
5019       if (bp[0] != 'f' && bp[0] != 'l')
5020         continue;
5021
5022       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5023
5024       if (LOOKING_AT (bp, "function"))
5025         get_tag (bp, NULL);
5026     }
5027 }
5028
5029 \f
5030 /*
5031  * Postscript tags
5032  * Just look for lines where the first character is '/'
5033  * Also look at "defineps" for PSWrap
5034  * Ideas by:
5035  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5036  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5037  */
5038 static void
5039 PS_functions (inf)
5040      FILE *inf;
5041 {
5042   register char *bp, *ep;
5043
5044   LOOP_ON_INPUT_LINES (inf, lb, bp)
5045     {
5046       if (bp[0] == '/')
5047         {
5048           for (ep = bp+1;
5049                *ep != '\0' && *ep != ' ' && *ep != '{';
5050                ep++)
5051             continue;
5052           make_tag (bp, ep - bp, TRUE,
5053                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5054         }
5055       else if (LOOKING_AT (bp, "defineps"))
5056         get_tag (bp, NULL);
5057     }
5058 }
5059
5060 \f
5061 /*
5062  * Forth tags
5063  * Ignore anything after \ followed by space or in ( )
5064  * Look for words defined by :
5065  * Look for constant, code, create, defer, value, and variable
5066  * OBP extensions:  Look for buffer:, field,
5067  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5068  */
5069 static void
5070 Forth_words (inf)
5071      FILE *inf;
5072 {
5073   register char *bp;
5074
5075   LOOP_ON_INPUT_LINES (inf, lb, bp)
5076     while ((bp = skip_spaces (bp))[0] != '\0')
5077       if (bp[0] == '\\' && iswhite(bp[1]))
5078         break;                  /* read next line */
5079       else if (bp[0] == '(' && iswhite(bp[1]))
5080         do                      /* skip to ) or eol */
5081           bp++;
5082         while (*bp != ')' && *bp != '\0');
5083       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5084                || LOOKING_AT_NOCASE (bp, "constant")
5085                || LOOKING_AT_NOCASE (bp, "code")
5086                || LOOKING_AT_NOCASE (bp, "create")
5087                || LOOKING_AT_NOCASE (bp, "defer")
5088                || LOOKING_AT_NOCASE (bp, "value")
5089                || LOOKING_AT_NOCASE (bp, "variable")
5090                || LOOKING_AT_NOCASE (bp, "buffer:")
5091                || LOOKING_AT_NOCASE (bp, "field"))
5092         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5093       else
5094         bp = skip_non_spaces (bp);
5095 }
5096
5097 \f
5098 /*
5099  * Scheme tag functions
5100  * look for (def... xyzzy
5101  *          (def... (xyzzy
5102  *          (def ... ((...(xyzzy ....
5103  *          (set! xyzzy
5104  * Original code by Ken Haase (1985?)
5105  */
5106 static void
5107 Scheme_functions (inf)
5108      FILE *inf;
5109 {
5110   register char *bp;
5111
5112   LOOP_ON_INPUT_LINES (inf, lb, bp)
5113     {
5114       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5115         {
5116           bp = skip_non_spaces (bp+4);
5117           /* Skip over open parens and white space */
5118           while (notinname (*bp))
5119             bp++;
5120           get_tag (bp, NULL);
5121         }
5122       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5123         get_tag (bp, NULL);
5124     }
5125 }
5126
5127 \f
5128 /* Find tags in TeX and LaTeX input files.  */
5129
5130 /* TEX_toktab is a table of TeX control sequences that define tags.
5131  * Each entry records one such control sequence.
5132  *
5133  * Original code from who knows whom.
5134  * Ideas by:
5135  *   Stefan Monnier (2002)
5136  */
5137
5138 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5139
5140 /* Default set of control sequences to put into TEX_toktab.
5141    The value of environment var TEXTAGS is prepended to this.  */
5142 static char *TEX_defenv = "\
5143 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5144 :part:appendix:entry:index:def\
5145 :newcommand:renewcommand:newenvironment:renewenvironment";
5146
5147 static void TEX_mode __P((FILE *));
5148 static void TEX_decode_env __P((char *, char *));
5149
5150 static char TEX_esc = '\\';
5151 static char TEX_opgrp = '{';
5152 static char TEX_clgrp = '}';
5153
5154 /*
5155  * TeX/LaTeX scanning loop.
5156  */
5157 static void
5158 TeX_commands (inf)
5159      FILE *inf;
5160 {
5161   char *cp;
5162   linebuffer *key;
5163
5164   /* Select either \ or ! as escape character.  */
5165   TEX_mode (inf);
5166
5167   /* Initialize token table once from environment. */
5168   if (TEX_toktab == NULL)
5169     TEX_decode_env ("TEXTAGS", TEX_defenv);
5170
5171   LOOP_ON_INPUT_LINES (inf, lb, cp)
5172     {
5173       /* Look at each TEX keyword in line. */
5174       for (;;)
5175         {
5176           /* Look for a TEX escape. */
5177           while (*cp++ != TEX_esc)
5178             if (cp[-1] == '\0' || cp[-1] == '%')
5179               goto tex_next_line;
5180
5181           for (key = TEX_toktab; key->buffer != NULL; key++)
5182             if (strneq (cp, key->buffer, key->len))
5183               {
5184                 register char *p;
5185                 int namelen, linelen;
5186                 bool opgrp = FALSE;
5187
5188                 cp = skip_spaces (cp + key->len);
5189                 if (*cp == TEX_opgrp)
5190                   {
5191                     opgrp = TRUE;
5192                     cp++;
5193                   }
5194                 for (p = cp;
5195                      (!iswhite (*p) && *p != '#' &&
5196                       *p != TEX_opgrp && *p != TEX_clgrp);
5197                      p++)
5198                   continue;
5199                 namelen = p - cp;
5200                 linelen = lb.len;
5201                 if (!opgrp || *p == TEX_clgrp)
5202                   {
5203                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5204                       p++;
5205                     linelen = p - lb.buffer + 1;
5206                   }
5207                 make_tag (cp, namelen, TRUE,
5208                           lb.buffer, linelen, lineno, linecharno);
5209                 goto tex_next_line; /* We only tag a line once */
5210               }
5211         }
5212     tex_next_line:
5213       ;
5214     }
5215 }
5216
5217 #define TEX_LESC '\\'
5218 #define TEX_SESC '!'
5219
5220 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5221    chars accordingly. */
5222 static void
5223 TEX_mode (inf)
5224      FILE *inf;
5225 {
5226   int c;
5227
5228   while ((c = getc (inf)) != EOF)
5229     {
5230       /* Skip to next line if we hit the TeX comment char. */
5231       if (c == '%')
5232         while (c != '\n' && c != EOF)
5233           c = getc (inf);
5234       else if (c == TEX_LESC || c == TEX_SESC )
5235         break;
5236     }
5237
5238   if (c == TEX_LESC)
5239     {
5240       TEX_esc = TEX_LESC;
5241       TEX_opgrp = '{';
5242       TEX_clgrp = '}';
5243     }
5244   else
5245     {
5246       TEX_esc = TEX_SESC;
5247       TEX_opgrp = '<';
5248       TEX_clgrp = '>';
5249     }
5250   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5251      No attempt is made to correct the situation. */
5252   rewind (inf);
5253 }
5254
5255 /* Read environment and prepend it to the default string.
5256    Build token table. */
5257 static void
5258 TEX_decode_env (evarname, defenv)
5259      char *evarname;
5260      char *defenv;
5261 {
5262   register char *env, *p;
5263   int i, len;
5264
5265   /* Append default string to environment. */
5266   env = getenv (evarname);
5267   if (!env)
5268     env = defenv;
5269   else
5270     {
5271       char *oldenv = env;
5272       env = concat (oldenv, defenv, "");
5273     }
5274
5275   /* Allocate a token table */
5276   for (len = 1, p = env; p;)
5277     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5278       len++;
5279   TEX_toktab = xnew (len, linebuffer);
5280
5281   /* Unpack environment string into token table. Be careful about */
5282   /* zero-length strings (leading ':', "::" and trailing ':') */
5283   for (i = 0; *env != '\0';)
5284     {
5285       p = etags_strchr (env, ':');
5286       if (!p)                   /* End of environment string. */
5287         p = env + strlen (env);
5288       if (p - env > 0)
5289         {                       /* Only non-zero strings. */
5290           TEX_toktab[i].buffer = savenstr (env, p - env);
5291           TEX_toktab[i].len = p - env;
5292           i++;
5293         }
5294       if (*p)
5295         env = p + 1;
5296       else
5297         {
5298           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5299           TEX_toktab[i].len = 0;
5300           break;
5301         }
5302     }
5303 }
5304
5305 \f
5306 /* Texinfo support.  Dave Love, Mar. 2000.  */
5307 static void
5308 Texinfo_nodes (inf)
5309      FILE * inf;
5310 {
5311   char *cp, *start;
5312   LOOP_ON_INPUT_LINES (inf, lb, cp)
5313     if (LOOKING_AT (cp, "@node"))
5314       {
5315         start = cp;
5316         while (*cp != '\0' && *cp != ',')
5317           cp++;
5318         make_tag (start, cp - start, TRUE,
5319                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5320       }
5321 }
5322
5323 \f
5324 /*
5325  * HTML support.
5326  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5327  * Contents of <a name=xxx> are tags with name xxx.
5328  *
5329  * Francesco Potortì, 2002.
5330  */
5331 static void
5332 HTML_labels (inf)
5333      FILE * inf;
5334 {
5335   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5336   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5337   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5338   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5339   char *end;
5340
5341
5342   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5343
5344   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5345     for (;;)                    /* loop on the same line */
5346       {
5347         if (skiptag)            /* skip HTML tag */
5348           {
5349             while (*dbp != '\0' && *dbp != '>')
5350               dbp++;
5351             if (*dbp == '>')
5352               {
5353                 dbp += 1;
5354                 skiptag = FALSE;
5355                 continue;       /* look on the same line */
5356               }
5357             break;              /* go to next line */
5358           }
5359
5360         else if (intag) /* look for "name=" or "id=" */
5361           {
5362             while (*dbp != '\0' && *dbp != '>'
5363                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5364               dbp++;
5365             if (*dbp == '\0')
5366               break;            /* go to next line */
5367             if (*dbp == '>')
5368               {
5369                 dbp += 1;
5370                 intag = FALSE;
5371                 continue;       /* look on the same line */
5372               }
5373             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5374                 || LOOKING_AT_NOCASE (dbp, "id="))
5375               {
5376                 bool quoted = (dbp[0] == '"');
5377
5378                 if (quoted)
5379                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5380                     continue;
5381                 else
5382                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5383                     continue;
5384                 linebuffer_setlen (&token_name, end - dbp);
5385                 strncpy (token_name.buffer, dbp, end - dbp);
5386                 token_name.buffer[end - dbp] = '\0';
5387
5388                 dbp = end;
5389                 intag = FALSE;  /* we found what we looked for */
5390                 skiptag = TRUE; /* skip to the end of the tag */
5391                 getnext = TRUE; /* then grab the text */
5392                 continue;       /* look on the same line */
5393               }
5394             dbp += 1;
5395           }
5396
5397         else if (getnext)       /* grab next tokens and tag them */
5398           {
5399             dbp = skip_spaces (dbp);
5400             if (*dbp == '\0')
5401               break;            /* go to next line */
5402             if (*dbp == '<')
5403               {
5404                 intag = TRUE;
5405                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5406                 continue;       /* look on the same line */
5407               }
5408
5409             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5410               continue;
5411             make_tag (token_name.buffer, token_name.len, TRUE,
5412                       dbp, end - dbp, lineno, linecharno);
5413             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5414             getnext = FALSE;
5415             break;              /* go to next line */
5416           }
5417
5418         else                    /* look for an interesting HTML tag */
5419           {
5420             while (*dbp != '\0' && *dbp != '<')
5421               dbp++;
5422             if (*dbp == '\0')
5423               break;            /* go to next line */
5424             intag = TRUE;
5425             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5426               {
5427                 inanchor = TRUE;
5428                 continue;       /* look on the same line */
5429               }
5430             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5431                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5432                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5433                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5434               {
5435                 intag = FALSE;
5436                 getnext = TRUE;
5437                 continue;       /* look on the same line */
5438               }
5439             dbp += 1;
5440           }
5441       }
5442 }
5443
5444 \f
5445 /*
5446  * Prolog support
5447  *
5448  * Assumes that the predicate or rule starts at column 0.
5449  * Only the first clause of a predicate or rule is added.
5450  * Original code by Sunichirou Sugou (1989)
5451  * Rewritten by Anders Lindgren (1996)
5452  */
5453 static int prolog_pr __P((char *, char *));
5454 static void prolog_skip_comment __P((linebuffer *, FILE *));
5455 static int prolog_atom __P((char *, int));
5456
5457 static void
5458 Prolog_functions (inf)
5459      FILE *inf;
5460 {
5461   char *cp, *last;
5462   int len;
5463   int allocated;
5464
5465   allocated = 0;
5466   len = 0;
5467   last = NULL;
5468
5469   LOOP_ON_INPUT_LINES (inf, lb, cp)
5470     {
5471       if (cp[0] == '\0')        /* Empty line */
5472         continue;
5473       else if (iswhite (cp[0])) /* Not a predicate */
5474         continue;
5475       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5476         prolog_skip_comment (&lb, inf);
5477       else if ((len = prolog_pr (cp, last)) > 0)
5478         {
5479           /* Predicate or rule.  Store the function name so that we
5480              only generate a tag for the first clause.  */
5481           if (last == NULL)
5482             last = xnew(len + 1, char);
5483           else if (len + 1 > allocated)
5484             xrnew (last, len + 1, char);
5485           allocated = len + 1;
5486           strncpy (last, cp, len);
5487           last[len] = '\0';
5488         }
5489     }
5490   if (last != NULL)
5491     free (last);
5492 }
5493
5494
5495 static void
5496 prolog_skip_comment (plb, inf)
5497      linebuffer *plb;
5498      FILE *inf;
5499 {
5500   char *cp;
5501
5502   do
5503     {
5504       for (cp = plb->buffer; *cp != '\0'; cp++)
5505         if (cp[0] == '*' && cp[1] == '/')
5506           return;
5507       readline (plb, inf);
5508     }
5509   while (!feof(inf));
5510 }
5511
5512 /*
5513  * A predicate or rule definition is added if it matches:
5514  *     <beginning of line><Prolog Atom><whitespace>(
5515  * or  <beginning of line><Prolog Atom><whitespace>:-
5516  *
5517  * It is added to the tags database if it doesn't match the
5518  * name of the previous clause header.
5519  *
5520  * Return the size of the name of the predicate or rule, or 0 if no
5521  * header was found.
5522  */
5523 static int
5524 prolog_pr (s, last)
5525      char *s;
5526      char *last;                /* Name of last clause. */
5527 {
5528   int pos;
5529   int len;
5530
5531   pos = prolog_atom (s, 0);
5532   if (pos < 1)
5533     return 0;
5534
5535   len = pos;
5536   pos = skip_spaces (s + pos) - s;
5537
5538   if ((s[pos] == '.'
5539        || (s[pos] == '(' && (pos += 1))
5540        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5541       && (last == NULL          /* save only the first clause */
5542           || len != (int)strlen (last)
5543           || !strneq (s, last, len)))
5544         {
5545           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5546           return len;
5547         }
5548   else
5549     return 0;
5550 }
5551
5552 /*
5553  * Consume a Prolog atom.
5554  * Return the number of bytes consumed, or -1 if there was an error.
5555  *
5556  * A prolog atom, in this context, could be one of:
5557  * - An alphanumeric sequence, starting with a lower case letter.
5558  * - A quoted arbitrary string. Single quotes can escape themselves.
5559  *   Backslash quotes everything.
5560  */
5561 static int
5562 prolog_atom (s, pos)
5563      char *s;
5564      int pos;
5565 {
5566   int origpos;
5567
5568   origpos = pos;
5569
5570   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5571     {
5572       /* The atom is unquoted. */
5573       pos++;
5574       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5575         {
5576           pos++;
5577         }
5578       return pos - origpos;
5579     }
5580   else if (s[pos] == '\'')
5581     {
5582       pos++;
5583
5584       for (;;)
5585         {
5586           if (s[pos] == '\'')
5587             {
5588               pos++;
5589               if (s[pos] != '\'')
5590                 break;
5591               pos++;            /* A double quote */
5592             }
5593           else if (s[pos] == '\0')
5594             /* Multiline quoted atoms are ignored. */
5595             return -1;
5596           else if (s[pos] == '\\')
5597             {
5598               if (s[pos+1] == '\0')
5599                 return -1;
5600               pos += 2;
5601             }
5602           else
5603             pos++;
5604         }
5605       return pos - origpos;
5606     }
5607   else
5608     return -1;
5609 }
5610
5611 \f
5612 /*
5613  * Support for Erlang
5614  *
5615  * Generates tags for functions, defines, and records.
5616  * Assumes that Erlang functions start at column 0.
5617  * Original code by Anders Lindgren (1996)
5618  */
5619 static int erlang_func __P((char *, char *));
5620 static void erlang_attribute __P((char *));
5621 static int erlang_atom __P((char *));
5622
5623 static void
5624 Erlang_functions (inf)
5625      FILE *inf;
5626 {
5627   char *cp, *last;
5628   int len;
5629   int allocated;
5630
5631   allocated = 0;
5632   len = 0;
5633   last = NULL;
5634
5635   LOOP_ON_INPUT_LINES (inf, lb, cp)
5636     {
5637       if (cp[0] == '\0')        /* Empty line */
5638         continue;
5639       else if (iswhite (cp[0])) /* Not function nor attribute */
5640         continue;
5641       else if (cp[0] == '%')    /* comment */
5642         continue;
5643       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5644         continue;
5645       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5646         {
5647           erlang_attribute (cp);
5648           if (last != NULL)
5649             {
5650               free (last);
5651               last = NULL;
5652             }
5653         }
5654       else if ((len = erlang_func (cp, last)) > 0)
5655         {
5656           /*
5657            * Function.  Store the function name so that we only
5658            * generates a tag for the first clause.
5659            */
5660           if (last == NULL)
5661             last = xnew (len + 1, char);
5662           else if (len + 1 > allocated)
5663             xrnew (last, len + 1, char);
5664           allocated = len + 1;
5665           strncpy (last, cp, len);
5666           last[len] = '\0';
5667         }
5668     }
5669   if (last != NULL)
5670     free (last);
5671 }
5672
5673
5674 /*
5675  * A function definition is added if it matches:
5676  *     <beginning of line><Erlang Atom><whitespace>(
5677  *
5678  * It is added to the tags database if it doesn't match the
5679  * name of the previous clause header.
5680  *
5681  * Return the size of the name of the function, or 0 if no function
5682  * was found.
5683  */
5684 static int
5685 erlang_func (s, last)
5686      char *s;
5687      char *last;                /* Name of last clause. */
5688 {
5689   int pos;
5690   int len;
5691
5692   pos = erlang_atom (s);
5693   if (pos < 1)
5694     return 0;
5695
5696   len = pos;
5697   pos = skip_spaces (s + pos) - s;
5698
5699   /* Save only the first clause. */
5700   if (s[pos++] == '('
5701       && (last == NULL
5702           || len != (int)strlen (last)
5703           || !strneq (s, last, len)))
5704         {
5705           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5706           return len;
5707         }
5708
5709   return 0;
5710 }
5711
5712
5713 /*
5714  * Handle attributes.  Currently, tags are generated for defines
5715  * and records.
5716  *
5717  * They are on the form:
5718  * -define(foo, bar).
5719  * -define(Foo(M, N), M+N).
5720  * -record(graph, {vtab = notable, cyclic = true}).
5721  */
5722 static void
5723 erlang_attribute (s)
5724      char *s;
5725 {
5726   char *cp = s;
5727
5728   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5729       && *cp++ == '(')
5730     {
5731       int len = erlang_atom (skip_spaces (cp));
5732       if (len > 0)
5733         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5734     }
5735   return;
5736 }
5737
5738
5739 /*
5740  * Consume an Erlang atom (or variable).
5741  * Return the number of bytes consumed, or -1 if there was an error.
5742  */
5743 static int
5744 erlang_atom (s)
5745      char *s;
5746 {
5747   int pos = 0;
5748
5749   if (ISALPHA (s[pos]) || s[pos] == '_')
5750     {
5751       /* The atom is unquoted. */
5752       do
5753         pos++;
5754       while (ISALNUM (s[pos]) || s[pos] == '_');
5755     }
5756   else if (s[pos] == '\'')
5757     {
5758       for (pos++; s[pos] != '\''; pos++)
5759         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5760             || (s[pos] == '\\' && s[++pos] == '\0'))
5761           return 0;
5762       pos++;
5763     }
5764
5765   return pos;
5766 }
5767
5768 \f
5769 static char *scan_separators __P((char *));
5770 static void add_regex __P((char *, language *));
5771 static char *substitute __P((char *, char *, struct re_registers *));
5772
5773 /*
5774  * Take a string like "/blah/" and turn it into "blah", verifying
5775  * that the first and last characters are the same, and handling
5776  * quoted separator characters.  Actually, stops on the occurrence of
5777  * an unquoted separator.  Also process \t, \n, etc. and turn into
5778  * appropriate characters. Works in place.  Null terminates name string.
5779  * Returns pointer to terminating separator, or NULL for
5780  * unterminated regexps.
5781  */
5782 static char *
5783 scan_separators (name)
5784      char *name;
5785 {
5786   char sep = name[0];
5787   char *copyto = name;
5788   bool quoted = FALSE;
5789
5790   for (++name; *name != '\0'; ++name)
5791     {
5792       if (quoted)
5793         {
5794           switch (*name)
5795             {
5796             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5797             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5798             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5799             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5800             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5801             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5802             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5803             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5804             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5805             default:
5806               if (*name == sep)
5807                 *copyto++ = sep;
5808               else
5809                 {
5810                   /* Something else is quoted, so preserve the quote. */
5811                   *copyto++ = '\\';
5812                   *copyto++ = *name;
5813                 }
5814               break;
5815             }
5816           quoted = FALSE;
5817         }
5818       else if (*name == '\\')
5819         quoted = TRUE;
5820       else if (*name == sep)
5821         break;
5822       else
5823         *copyto++ = *name;
5824     }
5825   if (*name != sep)
5826     name = NULL;                /* signal unterminated regexp */
5827
5828   /* Terminate copied string. */
5829   *copyto = '\0';
5830   return name;
5831 }
5832
5833 /* Look at the argument of --regex or --no-regex and do the right
5834    thing.  Same for each line of a regexp file. */
5835 static void
5836 analyse_regex (regex_arg)
5837      char *regex_arg;
5838 {
5839   if (regex_arg == NULL)
5840     {
5841       free_regexps ();          /* --no-regex: remove existing regexps */
5842       return;
5843     }
5844
5845   /* A real --regexp option or a line in a regexp file. */
5846   switch (regex_arg[0])
5847     {
5848       /* Comments in regexp file or null arg to --regex. */
5849     case '\0':
5850     case ' ':
5851     case '\t':
5852       break;
5853
5854       /* Read a regex file.  This is recursive and may result in a
5855          loop, which will stop when the file descriptors are exhausted. */
5856     case '@':
5857       {
5858         FILE *regexfp;
5859         linebuffer regexbuf;
5860         char *regexfile = regex_arg + 1;
5861
5862         /* regexfile is a file containing regexps, one per line. */
5863         regexfp = fopen (regexfile, "r");
5864         if (regexfp == NULL)
5865           {
5866             pfatal (regexfile);
5867             return;
5868           }
5869         linebuffer_init (&regexbuf);
5870         while (readline_internal (&regexbuf, regexfp) > 0)
5871           analyse_regex (regexbuf.buffer);
5872         free (regexbuf.buffer);
5873         fclose (regexfp);
5874       }
5875       break;
5876
5877       /* Regexp to be used for a specific language only. */
5878     case '{':
5879       {
5880         language *lang;
5881         char *lang_name = regex_arg + 1;
5882         char *cp;
5883
5884         for (cp = lang_name; *cp != '}'; cp++)
5885           if (*cp == '\0')
5886             {
5887               error ("unterminated language name in regex: %s", regex_arg);
5888               return;
5889             }
5890         *cp++ = '\0';
5891         lang = get_language_from_langname (lang_name);
5892         if (lang == NULL)
5893           return;
5894         add_regex (cp, lang);
5895       }
5896       break;
5897
5898       /* Regexp to be used for any language. */
5899     default:
5900       add_regex (regex_arg, NULL);
5901       break;
5902     }
5903 }
5904
5905 /* Separate the regexp pattern, compile it,
5906    and care for optional name and modifiers. */
5907 static void
5908 add_regex (regexp_pattern, lang)
5909      char *regexp_pattern;
5910      language *lang;
5911 {
5912   static struct re_pattern_buffer zeropattern;
5913   char sep, *pat, *name, *modifiers;
5914   const char *err;
5915   struct re_pattern_buffer *patbuf;
5916   regexp *rp;
5917   bool
5918     force_explicit_name = TRUE, /* do not use implicit tag names */
5919     ignore_case = FALSE,        /* case is significant */
5920     multi_line = FALSE,         /* matches are done one line at a time */
5921     single_line = FALSE;        /* dot does not match newline */
5922
5923
5924   if (strlen(regexp_pattern) < 3)
5925     {
5926       error ("null regexp", (char *)NULL);
5927       return;
5928     }
5929   sep = regexp_pattern[0];
5930   name = scan_separators (regexp_pattern);
5931   if (name == NULL)
5932     {
5933       error ("%s: unterminated regexp", regexp_pattern);
5934       return;
5935     }
5936   if (name[1] == sep)
5937     {
5938       error ("null name for regexp \"%s\"", regexp_pattern);
5939       return;
5940     }
5941   modifiers = scan_separators (name);
5942   if (modifiers == NULL)        /* no terminating separator --> no name */
5943     {
5944       modifiers = name;
5945       name = "";
5946     }
5947   else
5948     modifiers += 1;             /* skip separator */
5949
5950   /* Parse regex modifiers. */
5951   for (; modifiers[0] != '\0'; modifiers++)
5952     switch (modifiers[0])
5953       {
5954       case 'N':
5955         if (modifiers == name)
5956           error ("forcing explicit tag name but no name, ignoring", NULL);
5957         force_explicit_name = TRUE;
5958         break;
5959       case 'i':
5960         ignore_case = TRUE;
5961         break;
5962       case 's':
5963         single_line = TRUE;
5964         /* FALLTHRU */
5965       case 'm':
5966         multi_line = TRUE;
5967         need_filebuf = TRUE;
5968         break;
5969       default:
5970         {
5971           char wrongmod [2];
5972           wrongmod[0] = modifiers[0];
5973           wrongmod[1] = '\0';
5974           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5975         }
5976         break;
5977       }
5978
5979   patbuf = xnew (1, struct re_pattern_buffer);
5980   *patbuf = zeropattern;
5981   if (ignore_case)
5982     {
5983       static char lc_trans[CHARS];
5984       int i;
5985       for (i = 0; i < CHARS; i++)
5986         lc_trans[i] = lowcase (i);
5987       patbuf->translate = lc_trans;     /* translation table to fold case  */
5988     }
5989
5990   if (multi_line)
5991     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5992   else
5993     pat = regexp_pattern;
5994
5995   if (single_line)
5996     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5997   else
5998     re_set_syntax (RE_SYNTAX_EMACS);
5999
6000   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6001   if (multi_line)
6002     free (pat);
6003   if (err != NULL)
6004     {
6005       error ("%s while compiling pattern", err);
6006       return;
6007     }
6008
6009   rp = p_head;
6010   p_head = xnew (1, regexp);
6011   p_head->pattern = savestr (regexp_pattern);
6012   p_head->p_next = rp;
6013   p_head->lang = lang;
6014   p_head->pat = patbuf;
6015   p_head->name = savestr (name);
6016   p_head->error_signaled = FALSE;
6017   p_head->force_explicit_name = force_explicit_name;
6018   p_head->ignore_case = ignore_case;
6019   p_head->multi_line = multi_line;
6020 }
6021
6022 /*
6023  * Do the substitutions indicated by the regular expression and
6024  * arguments.
6025  */
6026 static char *
6027 substitute (in, out, regs)
6028      char *in, *out;
6029      struct re_registers *regs;
6030 {
6031   char *result, *t;
6032   int size, dig, diglen;
6033
6034   result = NULL;
6035   size = strlen (out);
6036
6037   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6038   if (out[size - 1] == '\\')
6039     fatal ("pattern error in \"%s\"", out);
6040   for (t = etags_strchr (out, '\\');
6041        t != NULL;
6042        t = etags_strchr (t + 2, '\\'))
6043     if (ISDIGIT (t[1]))
6044       {
6045         dig = t[1] - '0';
6046         diglen = regs->end[dig] - regs->start[dig];
6047         size += diglen - 2;
6048       }
6049     else
6050       size -= 1;
6051
6052   /* Allocate space and do the substitutions. */
6053   assert (size >= 0);
6054   result = xnew (size + 1, char);
6055
6056   for (t = result; *out != '\0'; out++)
6057     if (*out == '\\' && ISDIGIT (*++out))
6058       {
6059         dig = *out - '0';
6060         diglen = regs->end[dig] - regs->start[dig];
6061         strncpy (t, in + regs->start[dig], diglen);
6062         t += diglen;
6063       }
6064     else
6065       *t++ = *out;
6066   *t = '\0';
6067
6068   assert (t <= result + size);
6069   assert (t - result == (int)strlen (result));
6070
6071   return result;
6072 }
6073
6074 /* Deallocate all regexps. */
6075 static void
6076 free_regexps ()
6077 {
6078   regexp *rp;
6079   while (p_head != NULL)
6080     {
6081       rp = p_head->p_next;
6082       free (p_head->pattern);
6083       free (p_head->name);
6084       free (p_head);
6085       p_head = rp;
6086     }
6087   return;
6088 }
6089
6090 /*
6091  * Reads the whole file as a single string from `filebuf' and looks for
6092  * multi-line regular expressions, creating tags on matches.
6093  * readline already dealt with normal regexps.
6094  *
6095  * Idea by Ben Wing <ben@666.com> (2002).
6096  */
6097 static void
6098 regex_tag_multiline ()
6099 {
6100   char *buffer = filebuf.buffer;
6101   regexp *rp;
6102   char *name;
6103
6104   for (rp = p_head; rp != NULL; rp = rp->p_next)
6105     {
6106       int match = 0;
6107
6108       if (!rp->multi_line)
6109         continue;               /* skip normal regexps */
6110
6111       /* Generic initialisations before parsing file from memory. */
6112       lineno = 1;               /* reset global line number */
6113       charno = 0;               /* reset global char number */
6114       linecharno = 0;           /* reset global char number of line start */
6115
6116       /* Only use generic regexps or those for the current language. */
6117       if (rp->lang != NULL && rp->lang != curfdp->lang)
6118         continue;
6119
6120       while (match >= 0 && match < filebuf.len)
6121         {
6122           match = re_search (rp->pat, buffer, filebuf.len, charno,
6123                              filebuf.len - match, &rp->regs);
6124           switch (match)
6125             {
6126             case -2:
6127               /* Some error. */
6128               if (!rp->error_signaled)
6129                 {
6130                   error ("regexp stack overflow while matching \"%s\"",
6131                          rp->pattern);
6132                   rp->error_signaled = TRUE;
6133                 }
6134               break;
6135             case -1:
6136               /* No match. */
6137               break;
6138             default:
6139               if (match == rp->regs.end[0])
6140                 {
6141                   if (!rp->error_signaled)
6142                     {
6143                       error ("regexp matches the empty string: \"%s\"",
6144                              rp->pattern);
6145                       rp->error_signaled = TRUE;
6146                     }
6147                   match = -3;   /* exit from while loop */
6148                   break;
6149                 }
6150
6151               /* Match occurred.  Construct a tag. */
6152               while (charno < rp->regs.end[0])
6153                 if (buffer[charno++] == '\n')
6154                   lineno++, linecharno = charno;
6155               name = rp->name;
6156               if (name[0] == '\0')
6157                 name = NULL;
6158               else /* make a named tag */
6159                 name = substitute (buffer, rp->name, &rp->regs);
6160               if (rp->force_explicit_name)
6161                 /* Force explicit tag name, if a name is there. */
6162                 pfnote (name, TRUE, buffer + linecharno,
6163                         charno - linecharno + 1, lineno, linecharno);
6164               else
6165                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6166                           charno - linecharno + 1, lineno, linecharno);
6167               break;
6168             }
6169         }
6170     }
6171 }
6172
6173 \f
6174 static bool
6175 nocase_tail (cp)
6176      char *cp;
6177 {
6178   register int len = 0;
6179
6180   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6181     cp++, len++;
6182   if (*cp == '\0' && !intoken (dbp[len]))
6183     {
6184       dbp += len;
6185       return TRUE;
6186     }
6187   return FALSE;
6188 }
6189
6190 static void
6191 get_tag (bp, namepp)
6192      register char *bp;
6193      char **namepp;
6194 {
6195   register char *cp = bp;
6196
6197   if (*bp != '\0')
6198     {
6199       /* Go till you get to white space or a syntactic break */
6200       for (cp = bp + 1; !notinname (*cp); cp++)
6201         continue;
6202       make_tag (bp, cp - bp, TRUE,
6203                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6204     }
6205
6206   if (namepp != NULL)
6207     *namepp = savenstr (bp, cp - bp);
6208 }
6209
6210 /*
6211  * Read a line of text from `stream' into `lbp', excluding the
6212  * newline or CR-NL, if any.  Return the number of characters read from
6213  * `stream', which is the length of the line including the newline.
6214  *
6215  * On DOS or Windows we do not count the CR character, if any before the
6216  * NL, in the returned length; this mirrors the behavior of Emacs on those
6217  * platforms (for text files, it translates CR-NL to NL as it reads in the
6218  * file).
6219  *
6220  * If multi-line regular expressions are requested, each line read is
6221  * appended to `filebuf'.
6222  */
6223 static long
6224 readline_internal (lbp, stream)
6225      linebuffer *lbp;
6226      register FILE *stream;
6227 {
6228   char *buffer = lbp->buffer;
6229   register char *p = lbp->buffer;
6230   register char *pend;
6231   int chars_deleted;
6232
6233   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6234
6235   for (;;)
6236     {
6237       register int c = getc (stream);
6238       if (p == pend)
6239         {
6240           /* We're at the end of linebuffer: expand it. */
6241           lbp->size *= 2;
6242           xrnew (buffer, lbp->size, char);
6243           p += buffer - lbp->buffer;
6244           pend = buffer + lbp->size;
6245           lbp->buffer = buffer;
6246         }
6247       if (c == EOF)
6248         {
6249           *p = '\0';
6250           chars_deleted = 0;
6251           break;
6252         }
6253       if (c == '\n')
6254         {
6255           if (p > buffer && p[-1] == '\r')
6256             {
6257               p -= 1;
6258 #ifdef DOS_NT
6259              /* Assume CRLF->LF translation will be performed by Emacs
6260                 when loading this file, so CRs won't appear in the buffer.
6261                 It would be cleaner to compensate within Emacs;
6262                 however, Emacs does not know how many CRs were deleted
6263                 before any given point in the file.  */
6264               chars_deleted = 1;
6265 #else
6266               chars_deleted = 2;
6267 #endif
6268             }
6269           else
6270             {
6271               chars_deleted = 1;
6272             }
6273           *p = '\0';
6274           break;
6275         }
6276       *p++ = c;
6277     }
6278   lbp->len = p - buffer;
6279
6280   if (need_filebuf              /* we need filebuf for multi-line regexps */
6281       && chars_deleted > 0)     /* not at EOF */
6282     {
6283       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6284         {
6285           /* Expand filebuf. */
6286           filebuf.size *= 2;
6287           xrnew (filebuf.buffer, filebuf.size, char);
6288         }
6289       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6290       filebuf.len += lbp->len;
6291       filebuf.buffer[filebuf.len++] = '\n';
6292       filebuf.buffer[filebuf.len] = '\0';
6293     }
6294
6295   return lbp->len + chars_deleted;
6296 }
6297
6298 /*
6299  * Like readline_internal, above, but in addition try to match the
6300  * input line against relevant regular expressions and manage #line
6301  * directives.
6302  */
6303 static void
6304 readline (lbp, stream)
6305      linebuffer *lbp;
6306      FILE *stream;
6307 {
6308   long result;
6309
6310   linecharno = charno;          /* update global char number of line start */
6311   result = readline_internal (lbp, stream); /* read line */
6312   lineno += 1;                  /* increment global line number */
6313   charno += result;             /* increment global char number */
6314
6315   /* Honour #line directives. */
6316   if (!no_line_directive)
6317     {
6318       static bool discard_until_line_directive;
6319
6320       /* Check whether this is a #line directive. */
6321       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6322         {
6323           unsigned int lno;
6324           int start = 0;
6325
6326           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6327               && start > 0)     /* double quote character found */
6328             {
6329               char *endp = lbp->buffer + start;
6330
6331               while ((endp = etags_strchr (endp, '"')) != NULL
6332                      && endp[-1] == '\\')
6333                 endp++;
6334               if (endp != NULL)
6335                 /* Ok, this is a real #line directive.  Let's deal with it. */
6336                 {
6337                   char *taggedabsname;  /* absolute name of original file */
6338                   char *taggedfname;    /* name of original file as given */
6339                   char *name;           /* temp var */
6340
6341                   discard_until_line_directive = FALSE; /* found it */
6342                   name = lbp->buffer + start;
6343                   *endp = '\0';
6344                   canonicalize_filename (name); /* for DOS */
6345                   taggedabsname = absolute_filename (name, tagfiledir);
6346                   if (filename_is_absolute (name)
6347                       || filename_is_absolute (curfdp->infname))
6348                     taggedfname = savestr (taggedabsname);
6349                   else
6350                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6351
6352                   if (streq (curfdp->taggedfname, taggedfname))
6353                     /* The #line directive is only a line number change.  We
6354                        deal with this afterwards. */
6355                     free (taggedfname);
6356                   else
6357                     /* The tags following this #line directive should be
6358                        attributed to taggedfname.  In order to do this, set
6359                        curfdp accordingly. */
6360                     {
6361                       fdesc *fdp; /* file description pointer */
6362
6363                       /* Go look for a file description already set up for the
6364                          file indicated in the #line directive.  If there is
6365                          one, use it from now until the next #line
6366                          directive. */
6367                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6368                         if (streq (fdp->infname, curfdp->infname)
6369                             && streq (fdp->taggedfname, taggedfname))
6370                           /* If we remove the second test above (after the &&)
6371                              then all entries pertaining to the same file are
6372                              coalesced in the tags file.  If we use it, then
6373                              entries pertaining to the same file but generated
6374                              from different files (via #line directives) will
6375                              go into separate sections in the tags file.  These
6376                              alternatives look equivalent.  The first one
6377                              destroys some apparently useless information. */
6378                           {
6379                             curfdp = fdp;
6380                             free (taggedfname);
6381                             break;
6382                           }
6383                       /* Else, if we already tagged the real file, skip all
6384                          input lines until the next #line directive. */
6385                       if (fdp == NULL) /* not found */
6386                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6387                           if (streq (fdp->infabsname, taggedabsname))
6388                             {
6389                               discard_until_line_directive = TRUE;
6390                               free (taggedfname);
6391                               break;
6392                             }
6393                       /* Else create a new file description and use that from
6394                          now on, until the next #line directive. */
6395                       if (fdp == NULL) /* not found */
6396                         {
6397                           fdp = fdhead;
6398                           fdhead = xnew (1, fdesc);
6399                           *fdhead = *curfdp; /* copy curr. file description */
6400                           fdhead->next = fdp;
6401                           fdhead->infname = savestr (curfdp->infname);
6402                           fdhead->infabsname = savestr (curfdp->infabsname);
6403                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6404                           fdhead->taggedfname = taggedfname;
6405                           fdhead->usecharno = FALSE;
6406                           fdhead->prop = NULL;
6407                           fdhead->written = FALSE;
6408                           curfdp = fdhead;
6409                         }
6410                     }
6411                   free (taggedabsname);
6412                   lineno = lno - 1;
6413                   readline (lbp, stream);
6414                   return;
6415                 } /* if a real #line directive */
6416             } /* if #line is followed by a a number */
6417         } /* if line begins with "#line " */
6418
6419       /* If we are here, no #line directive was found. */
6420       if (discard_until_line_directive)
6421         {
6422           if (result > 0)
6423             {
6424               /* Do a tail recursion on ourselves, thus discarding the contents
6425                  of the line buffer. */
6426               readline (lbp, stream);
6427               return;
6428             }
6429           /* End of file. */
6430           discard_until_line_directive = FALSE;
6431           return;
6432         }
6433     } /* if #line directives should be considered */
6434
6435   {
6436     int match;
6437     regexp *rp;
6438     char *name;
6439
6440     /* Match against relevant regexps. */
6441     if (lbp->len > 0)
6442       for (rp = p_head; rp != NULL; rp = rp->p_next)
6443         {
6444           /* Only use generic regexps or those for the current language.
6445              Also do not use multiline regexps, which is the job of
6446              regex_tag_multiline. */
6447           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6448               || rp->multi_line)
6449             continue;
6450
6451           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6452           switch (match)
6453             {
6454             case -2:
6455               /* Some error. */
6456               if (!rp->error_signaled)
6457                 {
6458                   error ("regexp stack overflow while matching \"%s\"",
6459                          rp->pattern);
6460                   rp->error_signaled = TRUE;
6461                 }
6462               break;
6463             case -1:
6464               /* No match. */
6465               break;
6466             case 0:
6467               /* Empty string matched. */
6468               if (!rp->error_signaled)
6469                 {
6470                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6471                   rp->error_signaled = TRUE;
6472                 }
6473               break;
6474             default:
6475               /* Match occurred.  Construct a tag. */
6476               name = rp->name;
6477               if (name[0] == '\0')
6478                 name = NULL;
6479               else /* make a named tag */
6480                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6481               if (rp->force_explicit_name)
6482                 /* Force explicit tag name, if a name is there. */
6483                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6484               else
6485                 make_tag (name, strlen (name), TRUE,
6486                           lbp->buffer, match, lineno, linecharno);
6487               break;
6488             }
6489         }
6490   }
6491 }
6492
6493 \f
6494 /*
6495  * Return a pointer to a space of size strlen(cp)+1 allocated
6496  * with xnew where the string CP has been copied.
6497  */
6498 static char *
6499 savestr (cp)
6500      char *cp;
6501 {
6502   return savenstr (cp, strlen (cp));
6503 }
6504
6505 /*
6506  * Return a pointer to a space of size LEN+1 allocated with xnew where
6507  * the string CP has been copied for at most the first LEN characters.
6508  */
6509 static char *
6510 savenstr (cp, len)
6511      char *cp;
6512      int len;
6513 {
6514   register char *dp;
6515
6516   dp = xnew (len + 1, char);
6517   strncpy (dp, cp, len);
6518   dp[len] = '\0';
6519   return dp;
6520 }
6521
6522 /*
6523  * Return the ptr in sp at which the character c last
6524  * appears; NULL if not found
6525  *
6526  * Identical to POSIX strrchr, included for portability.
6527  */
6528 static char *
6529 etags_strrchr (sp, c)
6530      register const char *sp;
6531      register int c;
6532 {
6533   register const char *r;
6534
6535   r = NULL;
6536   do
6537     {
6538       if (*sp == c)
6539         r = sp;
6540   } while (*sp++);
6541   return (char *)r;
6542 }
6543
6544 /*
6545  * Return the ptr in sp at which the character c first
6546  * appears; NULL if not found
6547  *
6548  * Identical to POSIX strchr, included for portability.
6549  */
6550 static char *
6551 etags_strchr (sp, c)
6552      register const char *sp;
6553      register int c;
6554 {
6555   do
6556     {
6557       if (*sp == c)
6558         return (char *)sp;
6559     } while (*sp++);
6560   return NULL;
6561 }
6562
6563 /*
6564  * Compare two strings, ignoring case for alphabetic characters.
6565  *
6566  * Same as BSD's strcasecmp, included for portability.
6567  */
6568 static int
6569 etags_strcasecmp (s1, s2)
6570      register const char *s1;
6571      register const char *s2;
6572 {
6573   while (*s1 != '\0'
6574          && (ISALPHA (*s1) && ISALPHA (*s2)
6575              ? lowcase (*s1) == lowcase (*s2)
6576              : *s1 == *s2))
6577     s1++, s2++;
6578
6579   return (ISALPHA (*s1) && ISALPHA (*s2)
6580           ? lowcase (*s1) - lowcase (*s2)
6581           : *s1 - *s2);
6582 }
6583
6584 /*
6585  * Compare two strings, ignoring case for alphabetic characters.
6586  * Stop after a given number of characters
6587  *
6588  * Same as BSD's strncasecmp, included for portability.
6589  */
6590 static int
6591 etags_strncasecmp (s1, s2, n)
6592      register const char *s1;
6593      register const char *s2;
6594      register int n;
6595 {
6596   while (*s1 != '\0' && n-- > 0
6597          && (ISALPHA (*s1) && ISALPHA (*s2)
6598              ? lowcase (*s1) == lowcase (*s2)
6599              : *s1 == *s2))
6600     s1++, s2++;
6601
6602   if (n < 0)
6603     return 0;
6604   else
6605     return (ISALPHA (*s1) && ISALPHA (*s2)
6606             ? lowcase (*s1) - lowcase (*s2)
6607             : *s1 - *s2);
6608 }
6609
6610 /* Skip spaces (end of string is not space), return new pointer. */
6611 static char *
6612 skip_spaces (cp)
6613      char *cp;
6614 {
6615   while (iswhite (*cp))
6616     cp++;
6617   return cp;
6618 }
6619
6620 /* Skip non spaces, except end of string, return new pointer. */
6621 static char *
6622 skip_non_spaces (cp)
6623      char *cp;
6624 {
6625   while (*cp != '\0' && !iswhite (*cp))
6626     cp++;
6627   return cp;
6628 }
6629
6630 /* Print error message and exit.  */
6631 void
6632 fatal (s1, s2)
6633      char *s1, *s2;
6634 {
6635   error (s1, s2);
6636   exit (EXIT_FAILURE);
6637 }
6638
6639 static void
6640 pfatal (s1)
6641      char *s1;
6642 {
6643   perror (s1);
6644   exit (EXIT_FAILURE);
6645 }
6646
6647 static void
6648 suggest_asking_for_help ()
6649 {
6650   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6651            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6652   exit (EXIT_FAILURE);
6653 }
6654
6655 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6656 static void
6657 error (s1, s2)
6658      const char *s1, *s2;
6659 {
6660   fprintf (stderr, "%s: ", progname);
6661   fprintf (stderr, s1, s2);
6662   fprintf (stderr, "\n");
6663 }
6664
6665 /* Return a newly-allocated string whose contents
6666    concatenate those of s1, s2, s3.  */
6667 static char *
6668 concat (s1, s2, s3)
6669      char *s1, *s2, *s3;
6670 {
6671   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6672   char *result = xnew (len1 + len2 + len3 + 1, char);
6673
6674   strcpy (result, s1);
6675   strcpy (result + len1, s2);
6676   strcpy (result + len1 + len2, s3);
6677   result[len1 + len2 + len3] = '\0';
6678
6679   return result;
6680 }
6681
6682 \f
6683 /* Does the same work as the system V getcwd, but does not need to
6684    guess the buffer size in advance. */
6685 static char *
6686 etags_getcwd ()
6687 {
6688 #ifdef HAVE_GETCWD
6689   int bufsize = 200;
6690   char *path = xnew (bufsize, char);
6691
6692   while (getcwd (path, bufsize) == NULL)
6693     {
6694       if (errno != ERANGE)
6695         pfatal ("getcwd");
6696       bufsize *= 2;
6697       free (path);
6698       path = xnew (bufsize, char);
6699     }
6700
6701   canonicalize_filename (path);
6702   return path;
6703
6704 #else /* not HAVE_GETCWD */
6705 #if MSDOS
6706
6707   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6708
6709   getwd (path);
6710
6711   for (p = path; *p != '\0'; p++)
6712     if (*p == '\\')
6713       *p = '/';
6714     else
6715       *p = lowcase (*p);
6716
6717   return strdup (path);
6718 #else /* not MSDOS */
6719   linebuffer path;
6720   FILE *pipe;
6721
6722   linebuffer_init (&path);
6723   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6724   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6725     pfatal ("pwd");
6726   pclose (pipe);
6727
6728   return path.buffer;
6729 #endif /* not MSDOS */
6730 #endif /* not HAVE_GETCWD */
6731 }
6732
6733 /* Return a newly allocated string containing the file name of FILE
6734    relative to the absolute directory DIR (which should end with a slash). */
6735 static char *
6736 relative_filename (file, dir)
6737      char *file, *dir;
6738 {
6739   char *fp, *dp, *afn, *res;
6740   int i;
6741
6742   /* Find the common root of file and dir (with a trailing slash). */
6743   afn = absolute_filename (file, cwd);
6744   fp = afn;
6745   dp = dir;
6746   while (*fp++ == *dp++)
6747     continue;
6748   fp--, dp--;                   /* back to the first differing char */
6749 #ifdef DOS_NT
6750   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6751     return afn;
6752 #endif
6753   do                            /* look at the equal chars until '/' */
6754     fp--, dp--;
6755   while (*fp != '/');
6756
6757   /* Build a sequence of "../" strings for the resulting relative file name. */
6758   i = 0;
6759   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6760     i += 1;
6761   res = xnew (3*i + strlen (fp + 1) + 1, char);
6762   res[0] = '\0';
6763   while (i-- > 0)
6764     strcat (res, "../");
6765
6766   /* Add the file name relative to the common root of file and dir. */
6767   strcat (res, fp + 1);
6768   free (afn);
6769
6770   return res;
6771 }
6772
6773 /* Return a newly allocated string containing the absolute file name
6774    of FILE given DIR (which should end with a slash). */
6775 static char *
6776 absolute_filename (file, dir)
6777      char *file, *dir;
6778 {
6779   char *slashp, *cp, *res;
6780
6781   if (filename_is_absolute (file))
6782     res = savestr (file);
6783 #ifdef DOS_NT
6784   /* We don't support non-absolute file names with a drive
6785      letter, like `d:NAME' (it's too much hassle).  */
6786   else if (file[1] == ':')
6787     fatal ("%s: relative file names with drive letters not supported", file);
6788 #endif
6789   else
6790     res = concat (dir, file, "");
6791
6792   /* Delete the "/dirname/.." and "/." substrings. */
6793   slashp = etags_strchr (res, '/');
6794   while (slashp != NULL && slashp[0] != '\0')
6795     {
6796       if (slashp[1] == '.')
6797         {
6798           if (slashp[2] == '.'
6799               && (slashp[3] == '/' || slashp[3] == '\0'))
6800             {
6801               cp = slashp;
6802               do
6803                 cp--;
6804               while (cp >= res && !filename_is_absolute (cp));
6805               if (cp < res)
6806                 cp = slashp;    /* the absolute name begins with "/.." */
6807 #ifdef DOS_NT
6808               /* Under MSDOS and NT we get `d:/NAME' as absolute
6809                  file name, so the luser could say `d:/../NAME'.
6810                  We silently treat this as `d:/NAME'.  */
6811               else if (cp[0] != '/')
6812                 cp = slashp;
6813 #endif
6814               strcpy (cp, slashp + 3);
6815               slashp = cp;
6816               continue;
6817             }
6818           else if (slashp[2] == '/' || slashp[2] == '\0')
6819             {
6820               strcpy (slashp, slashp + 2);
6821               continue;
6822             }
6823         }
6824
6825       slashp = etags_strchr (slashp + 1, '/');
6826     }
6827
6828   if (res[0] == '\0')           /* just a safety net: should never happen */
6829     {
6830       free (res);
6831       return savestr ("/");
6832     }
6833   else
6834     return res;
6835 }
6836
6837 /* Return a newly allocated string containing the absolute
6838    file name of dir where FILE resides given DIR (which should
6839    end with a slash). */
6840 static char *
6841 absolute_dirname (file, dir)
6842      char *file, *dir;
6843 {
6844   char *slashp, *res;
6845   char save;
6846
6847   canonicalize_filename (file);
6848   slashp = etags_strrchr (file, '/');
6849   if (slashp == NULL)
6850     return savestr (dir);
6851   save = slashp[1];
6852   slashp[1] = '\0';
6853   res = absolute_filename (file, dir);
6854   slashp[1] = save;
6855
6856   return res;
6857 }
6858
6859 /* Whether the argument string is an absolute file name.  The argument
6860    string must have been canonicalized with canonicalize_filename. */
6861 static bool
6862 filename_is_absolute (fn)
6863      char *fn;
6864 {
6865   return (fn[0] == '/'
6866 #ifdef DOS_NT
6867           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6868 #endif
6869           );
6870 }
6871
6872 /* Translate backslashes into slashes.  Works in place. */
6873 static void
6874 canonicalize_filename (fn)
6875      register char *fn;
6876 {
6877 #ifdef DOS_NT
6878   /* Canonicalize drive letter case.  */
6879   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6880     fn[0] = upcase (fn[0]);
6881   /* Convert backslashes to slashes.  */
6882   for (; *fn != '\0'; fn++)
6883     if (*fn == '\\')
6884       *fn = '/';
6885 #else
6886   /* No action. */
6887   fn = NULL;                    /* shut up the compiler */
6888 #endif
6889 }
6890
6891 \f
6892 /* Initialize a linebuffer for use */
6893 static void
6894 linebuffer_init (lbp)
6895      linebuffer *lbp;
6896 {
6897   lbp->size = (DEBUG) ? 3 : 200;
6898   lbp->buffer = xnew (lbp->size, char);
6899   lbp->buffer[0] = '\0';
6900   lbp->len = 0;
6901 }
6902
6903 /* Set the minimum size of a string contained in a linebuffer. */
6904 static void
6905 linebuffer_setlen (lbp, toksize)
6906      linebuffer *lbp;
6907      int toksize;
6908 {
6909   while (lbp->size <= toksize)
6910     {
6911       lbp->size *= 2;
6912       xrnew (lbp->buffer, lbp->size, char);
6913     }
6914   lbp->len = toksize;
6915 }
6916
6917 /* Like malloc but get fatal error if memory is exhausted. */
6918 static PTR
6919 xmalloc (size)
6920      unsigned int size;
6921 {
6922   PTR result = (PTR) malloc (size);
6923   if (result == NULL)
6924     fatal ("virtual memory exhausted", (char *)NULL);
6925   return result;
6926 }
6927
6928 static PTR
6929 xrealloc (ptr, size)
6930      char *ptr;
6931      unsigned int size;
6932 {
6933   PTR result = (PTR) realloc (ptr, size);
6934   if (result == NULL)
6935     fatal ("virtual memory exhausted", (char *)NULL);
6936   return result;
6937 }
6938
6939 /*
6940  * Local Variables:
6941  * indent-tabs-mode: t
6942  * tab-width: 8
6943  * fill-column: 79
6944  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6945  * c-file-style: "gnu"
6946  * End:
6947  */
6948
6949 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6950    (do not change this comment) */
6951
6952 /* etags.c ends here */