lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
  32   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  33   Free Software Foundation, Inc.
  34
  35 This file is not considered part of GNU Emacs.
  36
  37 This program is free software; you can redistribute it and/or modify
  38 it under the terms of the GNU General Public License as published by
  39 the Free Software Foundation; either version 3, or (at your option)
  40 any later version.
  41
  42 This program is distributed in the hope that it will be useful,
  43 but WITHOUT ANY WARRANTY; without even the implied warranty of
  44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  45 GNU General Public License for more details.
  46
  47 You should have received a copy of the GNU General Public License
  48 along with this program; see the file COPYING.  If not, write to the
  49 Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  50 Boston, MA 02110-1301, USA. */
  51
  52
  53 /* NB To comply with the above BSD license, copyright information is
  54 reproduced in etc/ETAGS.README.  That file should be updated when the
  55 above notices are.
  56
  57 To the best of our knowledge, this code was originally based on the
  58 ctags.c distributed with BSD4.2, which was copyrighted by the
  59 University of California, as described above. */
  60
  61
  62 /*
  63  * Authors:
  64  * 1983 Ctags originally by Ken Arnold.
  65  * 1984 Fortran added by Jim Kleckner.
  66  * 1984 Ed Pelegri-Llopart added C typedefs.
  67  * 1985 Emacs TAGS format by Richard Stallman.
  68  * 1989 Sam Kendall added C++.
  69  * 1992 Joseph B. Wells improved C and C++ parsing.
  70  * 1993 Francesco Potortì reorganised C and C++.
  71  * 1994 Line-by-line regexp tags by Tom Tromey.
  72  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  73  * 2002 #line directives by Francesco Potortì.
  74  *
  75  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  76  */
  77
  78 /*
  79  * If you want to add support for a new language, start by looking at the LUA
  80  * language, which is the simplest.  Alternatively, consider shipping a
  81  * configuration file containing regexp definitions for etags.
  82  */
  83
  84 char pot_etags_version[] = "@(#) pot revision number is 17.26";
  85
  86 #define TRUE    1
  87 #define FALSE   0
  88
  89 #ifdef DEBUG
  90 #  undef DEBUG
  91 #  define DEBUG TRUE
  92 #else
  93 #  define DEBUG  FALSE
  94 #  define NDEBUG                /* disable assert */
  95 #endif
  96
  97 #ifdef HAVE_CONFIG_H
  98 # include <config.h>
  99   /* On some systems, Emacs defines static as nothing for the sake
 100      of unexec.  We don't want that here since we don't use unexec. */
 101 # undef static
 102 # ifndef PTR                    /* for XEmacs */
 103 #   define PTR void *
 104 # endif
 105 # ifndef __P                    /* for XEmacs */
 106 #   define __P(args) args
 107 # endif
 108 #else  /* no config.h */
 109 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 110 #   define __P(args) args       /* use prototypes */
 111 #   define PTR void *           /* for generic pointers */
 112 # else /* not standard C */
 113 #   define __P(args) ()         /* no prototypes */
 114 #   define const                /* remove const for old compilers' sake */
 115 #   define PTR long *           /* don't use void* */
 116 # endif
 117 #endif /* !HAVE_CONFIG_H */
 118
 119 #ifndef _GNU_SOURCE
 120 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 121 #endif
 122
 123 /* WIN32_NATIVE is for XEmacs.
 124    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 125 #ifdef WIN32_NATIVE
 126 # undef MSDOS
 127 # undef  WINDOWSNT
 128 # define WINDOWSNT
 129 #endif /* WIN32_NATIVE */
 130
 131 #ifdef MSDOS
 132 # undef MSDOS
 133 # define MSDOS TRUE
 134 # include <fcntl.h>
 135 # include <sys/param.h>
 136 # include <io.h>
 137 # ifndef HAVE_CONFIG_H
 138 #   define DOS_NT
 139 #   include <sys/config.h>
 140 # endif
 141 #else
 142 # define MSDOS FALSE
 143 #endif /* MSDOS */
 144
 145 #ifdef WINDOWSNT
 146 # include <stdlib.h>
 147 # include <fcntl.h>
 148 # include <string.h>
 149 # include <direct.h>
 150 # include <io.h>
 151 # define MAXPATHLEN _MAX_PATH
 152 # undef HAVE_NTGUI
 153 # undef  DOS_NT
 154 # define DOS_NT
 155 # ifndef HAVE_GETCWD
 156 #   define HAVE_GETCWD
 157 # endif /* undef HAVE_GETCWD */
 158 #else /* not WINDOWSNT */
 159 # ifdef STDC_HEADERS
 160 #  include <stdlib.h>
 161 #  include <string.h>
 162 # else /* no standard C headers */
 163     extern char *getenv ();
 164 #  ifdef VMS
 165 #   define EXIT_SUCCESS 1
 166 #   define EXIT_FAILURE 0
 167 #  else /* no VMS */
 168 #   define EXIT_SUCCESS 0
 169 #   define EXIT_FAILURE 1
 170 #  endif
 171 # endif
 172 #endif /* !WINDOWSNT */
 173
 174 #ifdef HAVE_UNISTD_H
 175 # include <unistd.h>
 176 #else
 177 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 178     extern char *getcwd (char *buf, size_t size);
 179 # endif
 180 #endif /* HAVE_UNISTD_H */
 181
 182 #include <stdio.h>
 183 #include <ctype.h>
 184 #include <errno.h>
 185 #ifndef errno
 186   extern int errno;
 187 #endif
 188 #include <sys/types.h>
 189 #include <sys/stat.h>
 190
 191 #include <assert.h>
 192 #ifdef NDEBUG
 193 # undef  assert                 /* some systems have a buggy assert.h */
 194 # define assert(x) ((void) 0)
 195 #endif
 196
 197 #if !defined (S_ISREG) && defined (S_IFREG)
 198 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 199 #endif
 200
 201 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 202 # define NO_LONG_OPTIONS TRUE
 203 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 204   extern char *optarg;
 205   extern int optind, opterr;
 206 #else
 207 # define NO_LONG_OPTIONS FALSE
 208 # include <getopt.h>
 209 #endif /* NO_LONG_OPTIONS */
 210
 211 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 212 # ifdef __CYGWIN__              /* compiling on Cygwin */
 213                              !!! NOTICE !!!
 214  the regex.h distributed with Cygwin is not compatible with etags, alas!
 215 If you want regular expression support, you should delete this notice and
 216               arrange to use the GNU regex.h and regex.c.
 217 # endif
 218 #endif
 219 #include <regex.h>
 220
 221 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 222  Leave it undefined to make the program "etags", which makes emacs-style
 223  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 224 #ifdef CTAGS
 225 # undef  CTAGS
 226 # define CTAGS TRUE
 227 #else
 228 # define CTAGS FALSE
 229 #endif
 230
 231 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 232 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 233 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 234 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 235
 236 #define CHARS 256               /* 2^sizeof(char) */
 237 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 238 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 239 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 240 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 241 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 242 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 243
 244 #define ISALNUM(c)      isalnum (CHAR(c))
 245 #define ISALPHA(c)      isalpha (CHAR(c))
 246 #define ISDIGIT(c)      isdigit (CHAR(c))
 247 #define ISLOWER(c)      islower (CHAR(c))
 248
 249 #define lowcase(c)      tolower (CHAR(c))
 250 #define upcase(c)       toupper (CHAR(c))
 251
 252
 253 /*
 254  *      xnew, xrnew -- allocate, reallocate storage
 255  *
 256  * SYNOPSIS:    Type *xnew (int n, Type);
 257  *              void xrnew (OldPointer, int n, Type);
 258  */
 259 #if DEBUG
 260 # include "chkmalloc.h"
 261 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 262                                                   (n) * sizeof (Type)))
 263 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 264                                         (char *) (op), (n) * sizeof (Type)))
 265 #else
 266 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 267 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 268                                         (char *) (op), (n) * sizeof (Type)))
 269 #endif
 270
 271 #define bool int
 272
 273 typedef void Lang_function __P((FILE *));
 274
 275 typedef struct
 276 {
 277   char *suffix;                 /* file name suffix for this compressor */
 278   char *command;                /* takes one arg and decompresses to stdout */
 279 } compressor;
 280
 281 typedef struct
 282 {
 283   char *name;                   /* language name */
 284   char *help;                   /* detailed help for the language */
 285   Lang_function *function;      /* parse function */
 286   char **suffixes;              /* name suffixes of this language's files */
 287   char **filenames;             /* names of this language's files */
 288   char **interpreters;          /* interpreters for this language */
 289   bool metasource;              /* source used to generate other sources */
 290 } language;
 291
 292 typedef struct fdesc
 293 {
 294   struct fdesc *next;           /* for the linked list */
 295   char *infname;                /* uncompressed input file name */
 296   char *infabsname;             /* absolute uncompressed input file name */
 297   char *infabsdir;              /* absolute dir of input file */
 298   char *taggedfname;            /* file name to write in tagfile */
 299   language *lang;               /* language of file */
 300   char *prop;                   /* file properties to write in tagfile */
 301   bool usecharno;               /* etags tags shall contain char number */
 302   bool written;                 /* entry written in the tags file */
 303 } fdesc;
 304
 305 typedef struct node_st
 306 {                               /* sorting structure */
 307   struct node_st *left, *right; /* left and right sons */
 308   fdesc *fdp;                   /* description of file to whom tag belongs */
 309   char *name;                   /* tag name */
 310   char *regex;                  /* search regexp */
 311   bool valid;                   /* write this tag on the tag file */
 312   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 313   bool been_warned;             /* warning already given for duplicated tag */
 314   int lno;                      /* line number tag is on */
 315   long cno;                     /* character number line starts on */
 316 } node;
 317
 318 /*
 319  * A `linebuffer' is a structure which holds a line of text.
 320  * `readline_internal' reads a line from a stream into a linebuffer
 321  * and works regardless of the length of the line.
 322  * SIZE is the size of BUFFER, LEN is the length of the string in
 323  * BUFFER after readline reads it.
 324  */
 325 typedef struct
 326 {
 327   long size;
 328   int len;
 329   char *buffer;
 330 } linebuffer;
 331
 332 /* Used to support mixing of --lang and file names. */
 333 typedef struct
 334 {
 335   enum {
 336     at_language,                /* a language specification */
 337     at_regexp,                  /* a regular expression */
 338     at_filename,                /* a file name */
 339     at_stdin,                   /* read from stdin here */
 340     at_end                      /* stop parsing the list */
 341   } arg_type;                   /* argument type */
 342   language *lang;               /* language associated with the argument */
 343   char *what;                   /* the argument itself */
 344 } argument;
 345
 346 /* Structure defining a regular expression. */
 347 typedef struct regexp
 348 {
 349   struct regexp *p_next;        /* pointer to next in list */
 350   language *lang;               /* if set, use only for this language */
 351   char *pattern;                /* the regexp pattern */
 352   char *name;                   /* tag name */
 353   struct re_pattern_buffer *pat; /* the compiled pattern */
 354   struct re_registers regs;     /* re registers */
 355   bool error_signaled;          /* already signaled for this regexp */
 356   bool force_explicit_name;     /* do not allow implict tag name */
 357   bool ignore_case;             /* ignore case when matching */
 358   bool multi_line;              /* do a multi-line match on the whole file */
 359 } regexp;
 360
 361
 362 /* Many compilers barf on this:
 363         Lang_function Ada_funcs;
 364    so let's write it this way */
 365 static void Ada_funcs __P((FILE *));
 366 static void Asm_labels __P((FILE *));
 367 static void C_entries __P((int c_ext, FILE *));
 368 static void default_C_entries __P((FILE *));
 369 static void plain_C_entries __P((FILE *));
 370 static void Cjava_entries __P((FILE *));
 371 static void Cobol_paragraphs __P((FILE *));
 372 static void Cplusplus_entries __P((FILE *));
 373 static void Cstar_entries __P((FILE *));
 374 static void Erlang_functions __P((FILE *));
 375 static void Forth_words __P((FILE *));
 376 static void Fortran_functions __P((FILE *));
 377 static void HTML_labels __P((FILE *));
 378 static void Lisp_functions __P((FILE *));
 379 static void Lua_functions __P((FILE *));
 380 static void Makefile_targets __P((FILE *));
 381 static void Pascal_functions __P((FILE *));
 382 static void Perl_functions __P((FILE *));
 383 static void PHP_functions __P((FILE *));
 384 static void PS_functions __P((FILE *));
 385 static void Prolog_functions __P((FILE *));
 386 static void Python_functions __P((FILE *));
 387 static void Scheme_functions __P((FILE *));
 388 static void TeX_commands __P((FILE *));
 389 static void Texinfo_nodes __P((FILE *));
 390 static void Yacc_entries __P((FILE *));
 391 static void just_read_file __P((FILE *));
 392
 393 static void print_language_names __P((void));
 394 static void print_version __P((void));
 395 static void print_help __P((argument *));
 396 int main __P((int, char **));
 397
 398 static compressor *get_compressor_from_suffix __P((char *, char **));
 399 static language *get_language_from_langname __P((const char *));
 400 static language *get_language_from_interpreter __P((char *));
 401 static language *get_language_from_filename __P((char *, bool));
 402 static void readline __P((linebuffer *, FILE *));
 403 static long readline_internal __P((linebuffer *, FILE *));
 404 static bool nocase_tail __P((char *));
 405 static void get_tag __P((char *, char **));
 406
 407 static void analyse_regex __P((char *));
 408 static void free_regexps __P((void));
 409 static void regex_tag_multiline __P((void));
 410 static void error __P((const char *, const char *));
 411 static void suggest_asking_for_help __P((void));
 412 void fatal __P((char *, char *));
 413 static void pfatal __P((char *));
 414 static void add_node __P((node *, node **));
 415
 416 static void init __P((void));
 417 static void process_file_name __P((char *, language *));
 418 static void process_file __P((FILE *, char *, language *));
 419 static void find_entries __P((FILE *));
 420 static void free_tree __P((node *));
 421 static void free_fdesc __P((fdesc *));
 422 static void pfnote __P((char *, bool, char *, int, int, long));
 423 static void make_tag __P((char *, int, bool, char *, int, int, long));
 424 static void invalidate_nodes __P((fdesc *, node **));
 425 static void put_entries __P((node *));
 426
 427 static char *concat __P((char *, char *, char *));
 428 static char *skip_spaces __P((char *));
 429 static char *skip_non_spaces __P((char *));
 430 static char *savenstr __P((char *, int));
 431 static char *savestr __P((char *));
 432 static char *etags_strchr __P((const char *, int));
 433 static char *etags_strrchr __P((const char *, int));
 434 static int etags_strcasecmp __P((const char *, const char *));
 435 static int etags_strncasecmp __P((const char *, const char *, int));
 436 static char *etags_getcwd __P((void));
 437 static char *relative_filename __P((char *, char *));
 438 static char *absolute_filename __P((char *, char *));
 439 static char *absolute_dirname __P((char *, char *));
 440 static bool filename_is_absolute __P((char *f));
 441 static void canonicalize_filename __P((char *));
 442 static void linebuffer_init __P((linebuffer *));
 443 static void linebuffer_setlen __P((linebuffer *, int));
 444 static PTR xmalloc __P((unsigned int));
 445 static PTR xrealloc __P((char *, unsigned int));
 446
 447 \f
 448 static char searchar = '/';     /* use /.../ searches */
 449
 450 static char *tagfile;           /* output file */
 451 static char *progname;          /* name this program was invoked with */
 452 static char *cwd;               /* current working directory */
 453 static char *tagfiledir;        /* directory of tagfile */
 454 static FILE *tagf;              /* ioptr for tags file */
 455
 456 static fdesc *fdhead;           /* head of file description list */
 457 static fdesc *curfdp;           /* current file description */
 458 static int lineno;              /* line number of current line */
 459 static long charno;             /* current character number */
 460 static long linecharno;         /* charno of start of current line */
 461 static char *dbp;               /* pointer to start of current tag */
 462
 463 static const int invalidcharno = -1;
 464
 465 static node *nodehead;          /* the head of the binary tree of tags */
 466 static node *last_node;         /* the last node created */
 467
 468 static linebuffer lb;           /* the current line */
 469 static linebuffer filebuf;      /* a buffer containing the whole file */
 470 static linebuffer token_name;   /* a buffer containing a tag name */
 471
 472 /* boolean "functions" (see init)       */
 473 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 474 static char
 475   /* white chars */
 476   *white = " \f\t\n\r\v",
 477   /* not in a name */
 478   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 479   /* token ending chars */
 480   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 481   /* token starting chars */
 482   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 483   /* valid in-token chars */
 484   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 485
 486 static bool append_to_tagfile;  /* -a: append to tags */
 487 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 488 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 489 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 490                                 /* 0 struct/enum/union decls, and C++ */
 491                                 /* member functions. */
 492 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 493                                 /* constants and variables. */
 494                                 /* -D: opposite of -d.  Default under ctags. */
 495 static bool globals;            /* create tags for global variables */
 496 static bool members;            /* create tags for C member variables */
 497 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 498 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 499 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 500 static bool update;             /* -u: update tags */
 501 static bool vgrind_style;       /* -v: create vgrind style index output */
 502 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 503 static bool cxref_style;        /* -x: create cxref style output */
 504 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 505 static bool ignoreindent;       /* -I: ignore indentation in C */
 506 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 507
 508 /* STDIN is defined in LynxOS system headers */
 509 #ifdef STDIN
 510 # undef STDIN
 511 #endif
 512
 513 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 514 static bool parsing_stdin;      /* --parse-stdin used */
 515
 516 static regexp *p_head;          /* list of all regexps */
 517 static bool need_filebuf;       /* some regexes are multi-line */
 518
 519 static struct option longopts[] =
 520 {
 521   { "append",             no_argument,       NULL,               'a'   },
 522   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 523   { "c++",                no_argument,       NULL,               'C'   },
 524   { "declarations",       no_argument,       &declarations,      TRUE  },
 525   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 526   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 527   { "help",               no_argument,       NULL,               'h'   },
 528   { "help",               no_argument,       NULL,               'H'   },
 529   { "ignore-indentation", no_argument,       NULL,               'I'   },
 530   { "language",           required_argument, NULL,               'l'   },
 531   { "members",            no_argument,       &members,           TRUE  },
 532   { "no-members",         no_argument,       &members,           FALSE },
 533   { "output",             required_argument, NULL,               'o'   },
 534   { "regex",              required_argument, NULL,               'r'   },
 535   { "no-regex",           no_argument,       NULL,               'R'   },
 536   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 537   { "parse-stdin",        required_argument, NULL,               STDIN },
 538   { "version",            no_argument,       NULL,               'V'   },
 539
 540 #if CTAGS /* Ctags options */
 541   { "backward-search",    no_argument,       NULL,               'B'   },
 542   { "cxref",              no_argument,       NULL,               'x'   },
 543   { "defines",            no_argument,       NULL,               'd'   },
 544   { "globals",            no_argument,       &globals,           TRUE  },
 545   { "typedefs",           no_argument,       NULL,               't'   },
 546   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 547   { "update",             no_argument,       NULL,               'u'   },
 548   { "vgrind",             no_argument,       NULL,               'v'   },
 549   { "no-warn",            no_argument,       NULL,               'w'   },
 550
 551 #else /* Etags options */
 552   { "no-defines",         no_argument,       NULL,               'D'   },
 553   { "no-globals",         no_argument,       &globals,           FALSE },
 554   { "include",            required_argument, NULL,               'i'   },
 555 #endif
 556   { NULL }
 557 };
 558
 559 static compressor compressors[] =
 560 {
 561   { "z", "gzip -d -c"},
 562   { "Z", "gzip -d -c"},
 563   { "gz", "gzip -d -c"},
 564   { "GZ", "gzip -d -c"},
 565   { "bz2", "bzip2 -d -c" },
 566   { NULL }
 567 };
 568
 569 /*
 570  * Language stuff.
 571  */
 572
 573 /* Ada code */
 574 static char *Ada_suffixes [] =
 575   { "ads", "adb", "ada", NULL };
 576 static char Ada_help [] =
 577 "In Ada code, functions, procedures, packages, tasks and types are\n\
 578 tags.  Use the `--packages-only' option to create tags for\n\
 579 packages only.\n\
 580 Ada tag names have suffixes indicating the type of entity:\n\
 581         Entity type:    Qualifier:\n\
 582         ------------    ----------\n\
 583         function        /f\n\
 584         procedure       /p\n\
 585         package spec    /s\n\
 586         package body    /b\n\
 587         type            /t\n\
 588         task            /k\n\
 589 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 590 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 591 will just search for any tag `bidule'.";
 592
 593 /* Assembly code */
 594 static char *Asm_suffixes [] =
 595   { "a",        /* Unix assembler */
 596     "asm", /* Microcontroller assembly */
 597     "def", /* BSO/Tasking definition includes  */
 598     "inc", /* Microcontroller include files */
 599     "ins", /* Microcontroller include files */
 600     "s", "sa", /* Unix assembler */
 601     "S",   /* cpp-processed Unix assembler */
 602     "src", /* BSO/Tasking C compiler output */
 603     NULL
 604   };
 605 static char Asm_help [] =
 606 "In assembler code, labels appearing at the beginning of a line,\n\
 607 followed by a colon, are tags.";
 608
 609
 610 /* Note that .c and .h can be considered C++, if the --c++ flag was
 611    given, or if the `class' or `template' keywords are met inside the file.
 612    That is why default_C_entries is called for these. */
 613 static char *default_C_suffixes [] =
 614   { "c", "h", NULL };
 615 #if CTAGS                               /* C help for Ctags */
 616 static char default_C_help [] =
 617 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 618 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 619 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 620 Use --globals to tag global variables.\n\
 621 You can tag function declarations and external variables by\n\
 622 using `--declarations', and struct members by using `--members'.";
 623 #else                                   /* C help for Etags */
 624 static char default_C_help [] =
 625 "In C code, any C function or typedef is a tag, and so are\n\
 626 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 627 definitions and `enum' constants are tags unless you specify\n\
 628 `--no-defines'.  Global variables are tags unless you specify\n\
 629 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 630 can make the tags table file much smaller.\n\
 631 You can tag function declarations and external variables by\n\
 632 using `--declarations', and struct members by using `--members'.";
 633 #endif  /* C help for Ctags and Etags */
 634
 635 static char *Cplusplus_suffixes [] =
 636   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 637     "M",                        /* Objective C++ */
 638     "pdb",                      /* Postscript with C syntax */
 639     NULL };
 640 static char Cplusplus_help [] =
 641 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 642 --help --lang=c --lang=c++ for full help.)\n\
 643 In addition to C tags, member functions are also recognized.  Member\n\
 644 variables are also recognized if you use the `--members' option.\n\
 645 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 646 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 647 `operator+'.";
 648
 649 static char *Cjava_suffixes [] =
 650   { "java", NULL };
 651 static char Cjava_help [] =
 652 "In Java code, all the tags constructs of C and C++ code are\n\
 653 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 654
 655
 656 static char *Cobol_suffixes [] =
 657   { "COB", "cob", NULL };
 658 static char Cobol_help [] =
 659 "In Cobol code, tags are paragraph names; that is, any word\n\
 660 starting in column 8 and followed by a period.";
 661
 662 static char *Cstar_suffixes [] =
 663   { "cs", "hs", NULL };
 664
 665 static char *Erlang_suffixes [] =
 666   { "erl", "hrl", NULL };
 667 static char Erlang_help [] =
 668 "In Erlang code, the tags are the functions, records and macros\n\
 669 defined in the file.";
 670
 671 char *Forth_suffixes [] =
 672   { "fth", "tok", NULL };
 673 static char Forth_help [] =
 674 "In Forth code, tags are words defined by `:',\n\
 675 constant, code, create, defer, value, variable, buffer:, field.";
 676
 677 static char *Fortran_suffixes [] =
 678   { "F", "f", "f90", "for", NULL };
 679 static char Fortran_help [] =
 680 "In Fortran code, functions, subroutines and block data are tags.";
 681
 682 static char *HTML_suffixes [] =
 683   { "htm", "html", "shtml", NULL };
 684 static char HTML_help [] =
 685 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 686 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 687 occurrences of `id='.";
 688
 689 static char *Lisp_suffixes [] =
 690   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 691 static char Lisp_help [] =
 692 "In Lisp code, any function defined with `defun', any variable\n\
 693 defined with `defvar' or `defconst', and in general the first\n\
 694 argument of any expression that starts with `(def' in column zero\n\
 695 is a tag.";
 696
 697 static char *Lua_suffixes [] =
 698   { "lua", "LUA", NULL };
 699 static char Lua_help [] =
 700 "In Lua scripts, all functions are tags.";
 701
 702 static char *Makefile_filenames [] =
 703   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 704 static char Makefile_help [] =
 705 "In makefiles, targets are tags; additionally, variables are tags\n\
 706 unless you specify `--no-globals'.";
 707
 708 static char *Objc_suffixes [] =
 709   { "lm",                       /* Objective lex file */
 710     "m",                        /* Objective C file */
 711      NULL };
 712 static char Objc_help [] =
 713 "In Objective C code, tags include Objective C definitions for classes,\n\
 714 class categories, methods and protocols.  Tags for variables and\n\
 715 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 716 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 717
 718 static char *Pascal_suffixes [] =
 719   { "p", "pas", NULL };
 720 static char Pascal_help [] =
 721 "In Pascal code, the tags are the functions and procedures defined\n\
 722 in the file.";
 723 /* " // this is for working around an Emacs highlighting bug... */
 724
 725 static char *Perl_suffixes [] =
 726   { "pl", "pm", NULL };
 727 static char *Perl_interpreters [] =
 728   { "perl", "@PERL@", NULL };
 729 static char Perl_help [] =
 730 "In Perl code, the tags are the packages, subroutines and variables\n\
 731 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 732 `--globals' if you want to tag global variables.  Tags for\n\
 733 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 734 defined in the default package is `main::SUB'.";
 735
 736 static char *PHP_suffixes [] =
 737   { "php", "php3", "php4", NULL };
 738 static char PHP_help [] =
 739 "In PHP code, tags are functions, classes and defines.  When using\n\
 740 the `--members' option, vars are tags too.";
 741
 742 static char *plain_C_suffixes [] =
 743   { "pc",                       /* Pro*C file */
 744      NULL };
 745
 746 static char *PS_suffixes [] =
 747   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 748 static char PS_help [] =
 749 "In PostScript code, the tags are the functions.";
 750
 751 static char *Prolog_suffixes [] =
 752   { "prolog", NULL };
 753 static char Prolog_help [] =
 754 "In Prolog code, tags are predicates and rules at the beginning of\n\
 755 line.";
 756
 757 static char *Python_suffixes [] =
 758   { "py", NULL };
 759 static char Python_help [] =
 760 "In Python code, `def' or `class' at the beginning of a line\n\
 761 generate a tag.";
 762
 763 /* Can't do the `SCM' or `scm' prefix with a version number. */
 764 static char *Scheme_suffixes [] =
 765   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 766 static char Scheme_help [] =
 767 "In Scheme code, tags include anything defined with `def' or with a\n\
 768 construct whose name starts with `def'.  They also include\n\
 769 variables set with `set!' at top level in the file.";
 770
 771 static char *TeX_suffixes [] =
 772   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 773 static char TeX_help [] =
 774 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 775 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 776 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 777 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 778 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 779 \n\
 780 Other commands can be specified by setting the environment variable\n\
 781 `TEXTAGS' to a colon-separated list like, for example,\n\
 782      TEXTAGS=\"mycommand:myothercommand\".";
 783
 784
 785 static char *Texinfo_suffixes [] =
 786   { "texi", "texinfo", "txi", NULL };
 787 static char Texinfo_help [] =
 788 "for texinfo files, lines starting with @node are tagged.";
 789
 790 static char *Yacc_suffixes [] =
 791   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 792 static char Yacc_help [] =
 793 "In Bison or Yacc input files, each rule defines as a tag the\n\
 794 nonterminal it constructs.  The portions of the file that contain\n\
 795 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 796 for full help).";
 797
 798 static char auto_help [] =
 799 "`auto' is not a real language, it indicates to use\n\
 800 a default language for files base on file name suffix and file contents.";
 801
 802 static char none_help [] =
 803 "`none' is not a real language, it indicates to only do\n\
 804 regexp processing on files.";
 805
 806 static char no_lang_help [] =
 807 "No detailed help available for this language.";
 808
 809
 810 /*
 811  * Table of languages.
 812  *
 813  * It is ok for a given function to be listed under more than one
 814  * name.  I just didn't.
 815  */
 816
 817 static language lang_names [] =
 818 {
 819   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 820   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 821   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 822   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 823   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 824   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 825   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 826   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 827   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 828   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 829   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 830   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 831   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 832   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 833   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 834   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 835   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 836   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 837   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 838   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 839   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 840   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 841   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 842   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 843   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 844   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 845   { "auto",      auto_help },                      /* default guessing scheme */
 846   { "none",      none_help,      just_read_file }, /* regexp matching only */
 847   { NULL }                /* end of list */
 848 };
 849
 850 \f
 851 static void
 852 print_language_names ()
 853 {
 854   language *lang;
 855   char **name, **ext;
 856
 857   puts ("\nThese are the currently supported languages, along with the\n\
 858 default file names and dot suffixes:");
 859   for (lang = lang_names; lang->name != NULL; lang++)
 860     {
 861       printf ("  %-*s", 10, lang->name);
 862       if (lang->filenames != NULL)
 863         for (name = lang->filenames; *name != NULL; name++)
 864           printf (" %s", *name);
 865       if (lang->suffixes != NULL)
 866         for (ext = lang->suffixes; *ext != NULL; ext++)
 867           printf (" .%s", *ext);
 868       puts ("");
 869     }
 870   puts ("where `auto' means use default language for files based on file\n\
 871 name suffix, and `none' means only do regexp processing on files.\n\
 872 If no language is specified and no matching suffix is found,\n\
 873 the first line of the file is read for a sharp-bang (#!) sequence\n\
 874 followed by the name of an interpreter.  If no such sequence is found,\n\
 875 Fortran is tried first; if no tags are found, C is tried next.\n\
 876 When parsing any C file, a \"class\" or \"template\" keyword\n\
 877 switches to C++.");
 878   puts ("Compressed files are supported using gzip and bzip2.\n\
 879 \n\
 880 For detailed help on a given language use, for example,\n\
 881 etags --help --lang=ada.");
 882 }
 883
 884 #ifndef EMACS_NAME
 885 # define EMACS_NAME "standalone"
 886 #endif
 887 #ifndef VERSION
 888 # define VERSION "17.26"
 889 #endif
 890 static void
 891 print_version ()
 892 {
 893   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 894   puts ("Copyright (C) 2008 Free Software Foundation, Inc.");
 895   puts ("This program is distributed under the terms in ETAGS.README");
 896
 897   exit (EXIT_SUCCESS);
 898 }
 899
 900 static void
 901 print_help (argbuffer)
 902      argument *argbuffer;
 903 {
 904   bool help_for_lang = FALSE;
 905
 906   for (; argbuffer->arg_type != at_end; argbuffer++)
 907     if (argbuffer->arg_type == at_language)
 908       {
 909         if (help_for_lang)
 910           puts ("");
 911         puts (argbuffer->lang->help);
 912         help_for_lang = TRUE;
 913       }
 914
 915   if (help_for_lang)
 916     exit (EXIT_SUCCESS);
 917
 918   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 919 \n\
 920 These are the options accepted by %s.\n", progname, progname);
 921   if (NO_LONG_OPTIONS)
 922     puts ("WARNING: long option names do not work with this executable,\n\
 923 as it is not linked with GNU getopt.");
 924   else
 925     puts ("You may use unambiguous abbreviations for the long option names.");
 926   puts ("  A - as file name means read names from stdin (one per line).\n\
 927 Absolute names are stored in the output file as they are.\n\
 928 Relative ones are stored relative to the output file's directory.\n");
 929
 930   puts ("-a, --append\n\
 931         Append tag entries to existing tags file.");
 932
 933   puts ("--packages-only\n\
 934         For Ada files, only generate tags for packages.");
 935
 936   if (CTAGS)
 937     puts ("-B, --backward-search\n\
 938         Write the search commands for the tag entries using '?', the\n\
 939         backward-search command instead of '/', the forward-search command.");
 940
 941   /* This option is mostly obsolete, because etags can now automatically
 942      detect C++.  Retained for backward compatibility and for debugging and
 943      experimentation.  In principle, we could want to tag as C++ even
 944      before any "class" or "template" keyword.
 945   puts ("-C, --c++\n\
 946         Treat files whose name suffix defaults to C language as C++ files.");
 947   */
 948
 949   puts ("--declarations\n\
 950         In C and derived languages, create tags for function declarations,");
 951   if (CTAGS)
 952     puts ("\tand create tags for extern variables if --globals is used.");
 953   else
 954     puts
 955       ("\tand create tags for extern variables unless --no-globals is used.");
 956
 957   if (CTAGS)
 958     puts ("-d, --defines\n\
 959         Create tag entries for C #define constants and enum constants, too.");
 960   else
 961     puts ("-D, --no-defines\n\
 962         Don't create tag entries for C #define constants and enum constants.\n\
 963         This makes the tags file smaller.");
 964
 965   if (!CTAGS)
 966     puts ("-i FILE, --include=FILE\n\
 967         Include a note in tag file indicating that, when searching for\n\
 968         a tag, one should also consult the tags file FILE after\n\
 969         checking the current file.");
 970
 971   puts ("-l LANG, --language=LANG\n\
 972         Force the following files to be considered as written in the\n\
 973         named language up to the next --language=LANG option.");
 974
 975   if (CTAGS)
 976     puts ("--globals\n\
 977         Create tag entries for global variables in some languages.");
 978   else
 979     puts ("--no-globals\n\
 980         Do not create tag entries for global variables in some\n\
 981         languages.  This makes the tags file smaller.");
 982   puts ("--members\n\
 983         Create tag entries for members of structures in some languages.");
 984
 985   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 986         Make a tag for each line matching a regular expression pattern\n\
 987         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 988         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 989         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 990         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 991   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 992         For example Tcl named tags can be created with:\n\
 993           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 994         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 995         `m' means to allow multi-line matches, `s' implies `m' and\n\
 996         causes dot to match any character, including newline.");
 997   puts ("-R, --no-regex\n\
 998         Don't create tags from regexps for the following files.");
 999   puts ("-I, --ignore-indentation\n\
1000         In C and C++ do not assume that a closing brace in the first\n\
1001         column is the final brace of a function or structure definition.");
1002   puts ("-o FILE, --output=FILE\n\
1003         Write the tags to FILE.");
1004   puts ("--parse-stdin=NAME\n\
1005         Read from standard input and record tags as belonging to file NAME.");
1006
1007   if (CTAGS)
1008     {
1009       puts ("-t, --typedefs\n\
1010         Generate tag entries for C and Ada typedefs.");
1011       puts ("-T, --typedefs-and-c++\n\
1012         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1013         and C++ member functions.");
1014     }
1015
1016   if (CTAGS)
1017     puts ("-u, --update\n\
1018         Update the tag entries for the given files, leaving tag\n\
1019         entries for other files in place.  Currently, this is\n\
1020         implemented by deleting the existing entries for the given\n\
1021         files and then rewriting the new entries at the end of the\n\
1022         tags file.  It is often faster to simply rebuild the entire\n\
1023         tag file than to use this.");
1024
1025   if (CTAGS)
1026     {
1027       puts ("-v, --vgrind\n\
1028         Print on the standard output an index of items intended for\n\
1029         human consumption, similar to the output of vgrind.  The index\n\
1030         is sorted, and gives the page number of each item.");
1031 # if PRINT_UNDOCUMENTED_OPTIONS_HELP
1032       puts ("-w, --no-duplicates\n\
1033         Do not create duplicate tag entries, for compatibility with\n\
1034         traditional ctags.");
1035       puts ("-w, --no-warn\n\
1036         Suppress warning messages about duplicate tag entries.");
1037 # endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
1038       puts ("-x, --cxref\n\
1039         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1040         The output uses line numbers instead of page numbers, but\n\
1041         beyond that the differences are cosmetic; try both to see\n\
1042         which you like.");
1043     }
1044
1045   puts ("-V, --version\n\
1046         Print the version of the program.\n\
1047 -h, --help\n\
1048         Print this help message.\n\
1049         Followed by one or more `--language' options prints detailed\n\
1050         help about tag generation for the specified languages.");
1051
1052   print_language_names ();
1053
1054   puts ("");
1055   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1056
1057   exit (EXIT_SUCCESS);
1058 }
1059
1060 \f
1061 #ifdef VMS                      /* VMS specific functions */
1062
1063 #define EOS     '\0'
1064
1065 /* This is a BUG!  ANY arbitrary limit is a BUG!
1066    Won't someone please fix this?  */
1067 #define MAX_FILE_SPEC_LEN       255
1068 typedef struct  {
1069   short   curlen;
1070   char    body[MAX_FILE_SPEC_LEN + 1];
1071 } vspec;
1072
1073 /*
1074  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1075  returning in each successive call the next file name matching the input
1076  spec. The function expects that each in_spec passed
1077  to it will be processed to completion; in particular, up to and
1078  including the call following that in which the last matching name
1079  is returned, the function ignores the value of in_spec, and will
1080  only start processing a new spec with the following call.
1081  If an error occurs, on return out_spec contains the value
1082  of in_spec when the error occurred.
1083
1084  With each successive file name returned in out_spec, the
1085  function's return value is one. When there are no more matching
1086  names the function returns zero. If on the first call no file
1087  matches in_spec, or there is any other error, -1 is returned.
1088 */
1089
1090 #include        <rmsdef.h>
1091 #include        <descrip.h>
1092 #define         OUTSIZE MAX_FILE_SPEC_LEN
1093 static short
1094 fn_exp (out, in)
1095      vspec *out;
1096      char *in;
1097 {
1098   static long context = 0;
1099   static struct dsc$descriptor_s o;
1100   static struct dsc$descriptor_s i;
1101   static bool pass1 = TRUE;
1102   long status;
1103   short retval;
1104
1105   if (pass1)
1106     {
1107       pass1 = FALSE;
1108       o.dsc$a_pointer = (char *) out;
1109       o.dsc$w_length = (short)OUTSIZE;
1110       i.dsc$a_pointer = in;
1111       i.dsc$w_length = (short)strlen(in);
1112       i.dsc$b_dtype = DSC$K_DTYPE_T;
1113       i.dsc$b_class = DSC$K_CLASS_S;
1114       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1115       o.dsc$b_class = DSC$K_CLASS_VS;
1116     }
1117   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1118     {
1119       out->body[out->curlen] = EOS;
1120       return 1;
1121     }
1122   else if (status == RMS$_NMF)
1123     retval = 0;
1124   else
1125     {
1126       strcpy(out->body, in);
1127       retval = -1;
1128     }
1129   lib$find_file_end(&context);
1130   pass1 = TRUE;
1131   return retval;
1132 }
1133
1134 /*
1135   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1136   name of each file specified by the provided arg expanding wildcards.
1137 */
1138 static char *
1139 gfnames (arg, p_error)
1140      char *arg;
1141      bool *p_error;
1142 {
1143   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1144
1145   switch (fn_exp (&filename, arg))
1146     {
1147     case 1:
1148       *p_error = FALSE;
1149       return filename.body;
1150     case 0:
1151       *p_error = FALSE;
1152       return NULL;
1153     default:
1154       *p_error = TRUE;
1155       return filename.body;
1156     }
1157 }
1158
1159 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1160 system (cmd)
1161      char *cmd;
1162 {
1163   error ("%s", "system() function not implemented under VMS");
1164 }
1165 #endif
1166
1167 #define VERSION_DELIM   ';'
1168 char *massage_name (s)
1169      char *s;
1170 {
1171   char *start = s;
1172
1173   for ( ; *s; s++)
1174     if (*s == VERSION_DELIM)
1175       {
1176         *s = EOS;
1177         break;
1178       }
1179     else
1180       *s = lowcase (*s);
1181   return start;
1182 }
1183 #endif /* VMS */
1184
1185 \f
1186 int
1187 main (argc, argv)
1188      int argc;
1189      char *argv[];
1190 {
1191   int i;
1192   unsigned int nincluded_files;
1193   char **included_files;
1194   argument *argbuffer;
1195   int current_arg, file_count;
1196   linebuffer filename_lb;
1197   bool help_asked = FALSE;
1198 #ifdef VMS
1199   bool got_err;
1200 #endif
1201  char *optstring;
1202  int opt;
1203
1204
1205 #ifdef DOS_NT
1206   _fmode = O_BINARY;   /* all of files are treated as binary files */
1207 #endif /* DOS_NT */
1208
1209   progname = argv[0];
1210   nincluded_files = 0;
1211   included_files = xnew (argc, char *);
1212   current_arg = 0;
1213   file_count = 0;
1214
1215   /* Allocate enough no matter what happens.  Overkill, but each one
1216      is small. */
1217   argbuffer = xnew (argc, argument);
1218
1219   /*
1220    * If etags, always find typedefs and structure tags.  Why not?
1221    * Also default to find macro constants, enum constants and
1222    * global variables.
1223    */
1224   if (!CTAGS)
1225     {
1226       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1227       globals = TRUE;
1228     }
1229
1230   /* When the optstring begins with a '-' getopt_long does not rearrange the
1231      non-options arguments to be at the end, but leaves them alone. */
1232   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1233                       "ac:Cf:Il:o:r:RSVhH",
1234                       (CTAGS) ? "BxdtTuvw" : "Di:");
1235
1236   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1237     switch (opt)
1238       {
1239       case 0:
1240         /* If getopt returns 0, then it has already processed a
1241            long-named option.  We should do nothing.  */
1242         break;
1243
1244       case 1:
1245         /* This means that a file name has been seen.  Record it. */
1246         argbuffer[current_arg].arg_type = at_filename;
1247         argbuffer[current_arg].what     = optarg;
1248         ++current_arg;
1249         ++file_count;
1250         break;
1251
1252       case STDIN:
1253         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1254         argbuffer[current_arg].arg_type = at_stdin;
1255         argbuffer[current_arg].what     = optarg;
1256         ++current_arg;
1257         ++file_count;
1258         if (parsing_stdin)
1259           fatal ("cannot parse standard input more than once", (char *)NULL);
1260         parsing_stdin = TRUE;
1261         break;
1262
1263         /* Common options. */
1264       case 'a': append_to_tagfile = TRUE;       break;
1265       case 'C': cplusplus = TRUE;               break;
1266       case 'f':         /* for compatibility with old makefiles */
1267       case 'o':
1268         if (tagfile)
1269           {
1270             error ("-o option may only be given once.", (char *)NULL);
1271             suggest_asking_for_help ();
1272             /* NOTREACHED */
1273           }
1274         tagfile = optarg;
1275         break;
1276       case 'I':
1277       case 'S':         /* for backward compatibility */
1278         ignoreindent = TRUE;
1279         break;
1280       case 'l':
1281         {
1282           language *lang = get_language_from_langname (optarg);
1283           if (lang != NULL)
1284             {
1285               argbuffer[current_arg].lang = lang;
1286               argbuffer[current_arg].arg_type = at_language;
1287               ++current_arg;
1288             }
1289         }
1290         break;
1291       case 'c':
1292         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1293         optarg = concat (optarg, "i", ""); /* memory leak here */
1294         /* FALLTHRU */
1295       case 'r':
1296         argbuffer[current_arg].arg_type = at_regexp;
1297         argbuffer[current_arg].what = optarg;
1298         ++current_arg;
1299         break;
1300       case 'R':
1301         argbuffer[current_arg].arg_type = at_regexp;
1302         argbuffer[current_arg].what = NULL;
1303         ++current_arg;
1304         break;
1305       case 'V':
1306         print_version ();
1307         break;
1308       case 'h':
1309       case 'H':
1310         help_asked = TRUE;
1311         break;
1312
1313         /* Etags options */
1314       case 'D': constantypedefs = FALSE;                        break;
1315       case 'i': included_files[nincluded_files++] = optarg;     break;
1316
1317         /* Ctags options. */
1318       case 'B': searchar = '?';                                 break;
1319       case 'd': constantypedefs = TRUE;                         break;
1320       case 't': typedefs = TRUE;                                break;
1321       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1322       case 'u': update = TRUE;                                  break;
1323       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1324       case 'x': cxref_style = TRUE;                             break;
1325       case 'w': no_warnings = TRUE;                             break;
1326       default:
1327         suggest_asking_for_help ();
1328         /* NOTREACHED */
1329       }
1330
1331   /* No more options.  Store the rest of arguments. */
1332   for (; optind < argc; optind++)
1333     {
1334       argbuffer[current_arg].arg_type = at_filename;
1335       argbuffer[current_arg].what = argv[optind];
1336       ++current_arg;
1337       ++file_count;
1338     }
1339
1340   argbuffer[current_arg].arg_type = at_end;
1341
1342   if (help_asked)
1343     print_help (argbuffer);
1344     /* NOTREACHED */
1345
1346   if (nincluded_files == 0 && file_count == 0)
1347     {
1348       error ("no input files specified.", (char *)NULL);
1349       suggest_asking_for_help ();
1350       /* NOTREACHED */
1351     }
1352
1353   if (tagfile == NULL)
1354     tagfile = CTAGS ? "tags" : "TAGS";
1355   cwd = etags_getcwd ();        /* the current working directory */
1356   if (cwd[strlen (cwd) - 1] != '/')
1357     {
1358       char *oldcwd = cwd;
1359       cwd = concat (oldcwd, "/", "");
1360       free (oldcwd);
1361     }
1362   /* Relative file names are made relative to the current directory. */
1363   if (streq (tagfile, "-")
1364       || strneq (tagfile, "/dev/", 5))
1365     tagfiledir = cwd;
1366   else
1367     tagfiledir = absolute_dirname (tagfile, cwd);
1368
1369   init ();                      /* set up boolean "functions" */
1370
1371   linebuffer_init (&lb);
1372   linebuffer_init (&filename_lb);
1373   linebuffer_init (&filebuf);
1374   linebuffer_init (&token_name);
1375
1376   if (!CTAGS)
1377     {
1378       if (streq (tagfile, "-"))
1379         {
1380           tagf = stdout;
1381 #ifdef DOS_NT
1382           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1383              doesn't take effect until after `stdout' is already open). */
1384           if (!isatty (fileno (stdout)))
1385             setmode (fileno (stdout), O_BINARY);
1386 #endif /* DOS_NT */
1387         }
1388       else
1389         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1390       if (tagf == NULL)
1391         pfatal (tagfile);
1392     }
1393
1394   /*
1395    * Loop through files finding functions.
1396    */
1397   for (i = 0; i < current_arg; i++)
1398     {
1399       static language *lang;    /* non-NULL if language is forced */
1400       char *this_file;
1401
1402       switch (argbuffer[i].arg_type)
1403         {
1404         case at_language:
1405           lang = argbuffer[i].lang;
1406           break;
1407         case at_regexp:
1408           analyse_regex (argbuffer[i].what);
1409           break;
1410         case at_filename:
1411 #ifdef VMS
1412           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1413             {
1414               if (got_err)
1415                 {
1416                   error ("can't find file %s\n", this_file);
1417                   argc--, argv++;
1418                 }
1419               else
1420                 {
1421                   this_file = massage_name (this_file);
1422                 }
1423 #else
1424               this_file = argbuffer[i].what;
1425 #endif
1426               /* Input file named "-" means read file names from stdin
1427                  (one per line) and use them. */
1428               if (streq (this_file, "-"))
1429                 {
1430                   if (parsing_stdin)
1431                     fatal ("cannot parse standard input AND read file names from it",
1432                            (char *)NULL);
1433                   while (readline_internal (&filename_lb, stdin) > 0)
1434                     process_file_name (filename_lb.buffer, lang);
1435                 }
1436               else
1437                 process_file_name (this_file, lang);
1438 #ifdef VMS
1439             }
1440 #endif
1441           break;
1442         case at_stdin:
1443           this_file = argbuffer[i].what;
1444           process_file (stdin, this_file, lang);
1445           break;
1446         }
1447     }
1448
1449   free_regexps ();
1450   free (lb.buffer);
1451   free (filebuf.buffer);
1452   free (token_name.buffer);
1453
1454   if (!CTAGS || cxref_style)
1455     {
1456       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1457       put_entries (nodehead);
1458       free_tree (nodehead);
1459       nodehead = NULL;
1460       if (!CTAGS)
1461         {
1462           fdesc *fdp;
1463
1464           /* Output file entries that have no tags. */
1465           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1466             if (!fdp->written)
1467               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1468
1469           while (nincluded_files-- > 0)
1470             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1471
1472           if (fclose (tagf) == EOF)
1473             pfatal (tagfile);
1474         }
1475
1476       exit (EXIT_SUCCESS);
1477     }
1478
1479   /* From here on, we are in (CTAGS && !cxref_style) */
1480   if (update)
1481     {
1482       char cmd[BUFSIZ];
1483       for (i = 0; i < current_arg; ++i)
1484         {
1485           switch (argbuffer[i].arg_type)
1486             {
1487             case at_filename:
1488             case at_stdin:
1489               break;
1490             default:
1491               continue;         /* the for loop */
1492             }
1493           sprintf (cmd,
1494                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1495                    tagfile, argbuffer[i].what, tagfile);
1496           if (system (cmd) != EXIT_SUCCESS)
1497             fatal ("failed to execute shell command", (char *)NULL);
1498         }
1499       append_to_tagfile = TRUE;
1500     }
1501
1502   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1503   if (tagf == NULL)
1504     pfatal (tagfile);
1505   put_entries (nodehead);       /* write all the tags (CTAGS) */
1506   free_tree (nodehead);
1507   nodehead = NULL;
1508   if (fclose (tagf) == EOF)
1509     pfatal (tagfile);
1510
1511   if (CTAGS)
1512     if (append_to_tagfile || update)
1513       {
1514         char cmd[2*BUFSIZ+20];
1515         /* Maybe these should be used:
1516            setenv ("LC_COLLATE", "C", 1);
1517            setenv ("LC_ALL", "C", 1); */
1518         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1519         exit (system (cmd));
1520       }
1521   return EXIT_SUCCESS;
1522 }
1523
1524
1525 /*
1526  * Return a compressor given the file name.  If EXTPTR is non-zero,
1527  * return a pointer into FILE where the compressor-specific
1528  * extension begins.  If no compressor is found, NULL is returned
1529  * and EXTPTR is not significant.
1530  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1531  */
1532 static compressor *
1533 get_compressor_from_suffix (file, extptr)
1534      char *file;
1535      char **extptr;
1536 {
1537   compressor *compr;
1538   char *slash, *suffix;
1539
1540   /* This relies on FN to be after canonicalize_filename,
1541      so we don't need to consider backslashes on DOS_NT.  */
1542   slash = etags_strrchr (file, '/');
1543   suffix = etags_strrchr (file, '.');
1544   if (suffix == NULL || suffix < slash)
1545     return NULL;
1546   if (extptr != NULL)
1547     *extptr = suffix;
1548   suffix += 1;
1549   /* Let those poor souls who live with DOS 8+3 file name limits get
1550      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1551      Only the first do loop is run if not MSDOS */
1552   do
1553     {
1554       for (compr = compressors; compr->suffix != NULL; compr++)
1555         if (streq (compr->suffix, suffix))
1556           return compr;
1557       if (!MSDOS)
1558         break;                  /* do it only once: not really a loop */
1559       if (extptr != NULL)
1560         *extptr = ++suffix;
1561     } while (*suffix != '\0');
1562   return NULL;
1563 }
1564
1565
1566
1567 /*
1568  * Return a language given the name.
1569  */
1570 static language *
1571 get_language_from_langname (name)
1572      const char *name;
1573 {
1574   language *lang;
1575
1576   if (name == NULL)
1577     error ("empty language name", (char *)NULL);
1578   else
1579     {
1580       for (lang = lang_names; lang->name != NULL; lang++)
1581         if (streq (name, lang->name))
1582           return lang;
1583       error ("unknown language \"%s\"", name);
1584     }
1585
1586   return NULL;
1587 }
1588
1589
1590 /*
1591  * Return a language given the interpreter name.
1592  */
1593 static language *
1594 get_language_from_interpreter (interpreter)
1595      char *interpreter;
1596 {
1597   language *lang;
1598   char **iname;
1599
1600   if (interpreter == NULL)
1601     return NULL;
1602   for (lang = lang_names; lang->name != NULL; lang++)
1603     if (lang->interpreters != NULL)
1604       for (iname = lang->interpreters; *iname != NULL; iname++)
1605         if (streq (*iname, interpreter))
1606             return lang;
1607
1608   return NULL;
1609 }
1610
1611
1612
1613 /*
1614  * Return a language given the file name.
1615  */
1616 static language *
1617 get_language_from_filename (file, case_sensitive)
1618      char *file;
1619      bool case_sensitive;
1620 {
1621   language *lang;
1622   char **name, **ext, *suffix;
1623
1624   /* Try whole file name first. */
1625   for (lang = lang_names; lang->name != NULL; lang++)
1626     if (lang->filenames != NULL)
1627       for (name = lang->filenames; *name != NULL; name++)
1628         if ((case_sensitive)
1629             ? streq (*name, file)
1630             : strcaseeq (*name, file))
1631           return lang;
1632
1633   /* If not found, try suffix after last dot. */
1634   suffix = etags_strrchr (file, '.');
1635   if (suffix == NULL)
1636     return NULL;
1637   suffix += 1;
1638   for (lang = lang_names; lang->name != NULL; lang++)
1639     if (lang->suffixes != NULL)
1640       for (ext = lang->suffixes; *ext != NULL; ext++)
1641         if ((case_sensitive)
1642             ? streq (*ext, suffix)
1643             : strcaseeq (*ext, suffix))
1644           return lang;
1645   return NULL;
1646 }
1647
1648 \f
1649 /*
1650  * This routine is called on each file argument.
1651  */
1652 static void
1653 process_file_name (file, lang)
1654      char *file;
1655      language *lang;
1656 {
1657   struct stat stat_buf;
1658   FILE *inf;
1659   fdesc *fdp;
1660   compressor *compr;
1661   char *compressed_name, *uncompressed_name;
1662   char *ext, *real_name;
1663   int retval;
1664
1665   canonicalize_filename (file);
1666   if (streq (file, tagfile) && !streq (tagfile, "-"))
1667     {
1668       error ("skipping inclusion of %s in self.", file);
1669       return;
1670     }
1671   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1672     {
1673       compressed_name = NULL;
1674       real_name = uncompressed_name = savestr (file);
1675     }
1676   else
1677     {
1678       real_name = compressed_name = savestr (file);
1679       uncompressed_name = savenstr (file, ext - file);
1680     }
1681
1682   /* If the canonicalized uncompressed name
1683      has already been dealt with, skip it silently. */
1684   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1685     {
1686       assert (fdp->infname != NULL);
1687       if (streq (uncompressed_name, fdp->infname))
1688         goto cleanup;
1689     }
1690
1691   if (stat (real_name, &stat_buf) != 0)
1692     {
1693       /* Reset real_name and try with a different name. */
1694       real_name = NULL;
1695       if (compressed_name != NULL) /* try with the given suffix */
1696         {
1697           if (stat (uncompressed_name, &stat_buf) == 0)
1698             real_name = uncompressed_name;
1699         }
1700       else                      /* try all possible suffixes */
1701         {
1702           for (compr = compressors; compr->suffix != NULL; compr++)
1703             {
1704               compressed_name = concat (file, ".", compr->suffix);
1705               if (stat (compressed_name, &stat_buf) != 0)
1706                 {
1707                   if (MSDOS)
1708                     {
1709                       char *suf = compressed_name + strlen (file);
1710                       size_t suflen = strlen (compr->suffix) + 1;
1711                       for ( ; suf[1]; suf++, suflen--)
1712                         {
1713                           memmove (suf, suf + 1, suflen);
1714                           if (stat (compressed_name, &stat_buf) == 0)
1715                             {
1716                               real_name = compressed_name;
1717                               break;
1718                             }
1719                         }
1720                       if (real_name != NULL)
1721                         break;
1722                     } /* MSDOS */
1723                   free (compressed_name);
1724                   compressed_name = NULL;
1725                 }
1726               else
1727                 {
1728                   real_name = compressed_name;
1729                   break;
1730                 }
1731             }
1732         }
1733       if (real_name == NULL)
1734         {
1735           perror (file);
1736           goto cleanup;
1737         }
1738     } /* try with a different name */
1739
1740   if (!S_ISREG (stat_buf.st_mode))
1741     {
1742       error ("skipping %s: it is not a regular file.", real_name);
1743       goto cleanup;
1744     }
1745   if (real_name == compressed_name)
1746     {
1747       char *cmd = concat (compr->command, " ", real_name);
1748       inf = (FILE *) popen (cmd, "r");
1749       free (cmd);
1750     }
1751   else
1752     inf = fopen (real_name, "r");
1753   if (inf == NULL)
1754     {
1755       perror (real_name);
1756       goto cleanup;
1757     }
1758
1759   process_file (inf, uncompressed_name, lang);
1760
1761   if (real_name == compressed_name)
1762     retval = pclose (inf);
1763   else
1764     retval = fclose (inf);
1765   if (retval < 0)
1766     pfatal (file);
1767
1768  cleanup:
1769   if (compressed_name) free (compressed_name);
1770   if (uncompressed_name) free (uncompressed_name);
1771   last_node = NULL;
1772   curfdp = NULL;
1773   return;
1774 }
1775
1776 static void
1777 process_file (fh, fn, lang)
1778      FILE *fh;
1779      char *fn;
1780      language *lang;
1781 {
1782   static const fdesc emptyfdesc;
1783   fdesc *fdp;
1784
1785   /* Create a new input file description entry. */
1786   fdp = xnew (1, fdesc);
1787   *fdp = emptyfdesc;
1788   fdp->next = fdhead;
1789   fdp->infname = savestr (fn);
1790   fdp->lang = lang;
1791   fdp->infabsname = absolute_filename (fn, cwd);
1792   fdp->infabsdir = absolute_dirname (fn, cwd);
1793   if (filename_is_absolute (fn))
1794     {
1795       /* An absolute file name.  Canonicalize it. */
1796       fdp->taggedfname = absolute_filename (fn, NULL);
1797     }
1798   else
1799     {
1800       /* A file name relative to cwd.  Make it relative
1801          to the directory of the tags file. */
1802       fdp->taggedfname = relative_filename (fn, tagfiledir);
1803     }
1804   fdp->usecharno = TRUE;        /* use char position when making tags */
1805   fdp->prop = NULL;
1806   fdp->written = FALSE;         /* not written on tags file yet */
1807
1808   fdhead = fdp;
1809   curfdp = fdhead;              /* the current file description */
1810
1811   find_entries (fh);
1812
1813   /* If not Ctags, and if this is not metasource and if it contained no #line
1814      directives, we can write the tags and free all nodes pointing to
1815      curfdp. */
1816   if (!CTAGS
1817       && curfdp->usecharno      /* no #line directives in this file */
1818       && !curfdp->lang->metasource)
1819     {
1820       node *np, *prev;
1821
1822       /* Look for the head of the sublist relative to this file.  See add_node
1823          for the structure of the node tree. */
1824       prev = NULL;
1825       for (np = nodehead; np != NULL; prev = np, np = np->left)
1826         if (np->fdp == curfdp)
1827           break;
1828
1829       /* If we generated tags for this file, write and delete them. */
1830       if (np != NULL)
1831         {
1832           /* This is the head of the last sublist, if any.  The following
1833              instructions depend on this being true. */
1834           assert (np->left == NULL);
1835
1836           assert (fdhead == curfdp);
1837           assert (last_node->fdp == curfdp);
1838           put_entries (np);     /* write tags for file curfdp->taggedfname */
1839           free_tree (np);       /* remove the written nodes */
1840           if (prev == NULL)
1841             nodehead = NULL;    /* no nodes left */
1842           else
1843             prev->left = NULL;  /* delete the pointer to the sublist */
1844         }
1845     }
1846 }
1847
1848 /*
1849  * This routine sets up the boolean pseudo-functions which work
1850  * by setting boolean flags dependent upon the corresponding character.
1851  * Every char which is NOT in that string is not a white char.  Therefore,
1852  * all of the array "_wht" is set to FALSE, and then the elements
1853  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1854  * of a char is TRUE if it is the string "white", else FALSE.
1855  */
1856 static void
1857 init ()
1858 {
1859   register char *sp;
1860   register int i;
1861
1862   for (i = 0; i < CHARS; i++)
1863     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1864   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1865   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1866   notinname('\0') = notinname('\n');
1867   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1868   begtoken('\0') = begtoken('\n');
1869   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1870   intoken('\0') = intoken('\n');
1871   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1872   endtoken('\0') = endtoken('\n');
1873 }
1874
1875 /*
1876  * This routine opens the specified file and calls the function
1877  * which finds the function and type definitions.
1878  */
1879 static void
1880 find_entries (inf)
1881      FILE *inf;
1882 {
1883   char *cp;
1884   language *lang = curfdp->lang;
1885   Lang_function *parser = NULL;
1886
1887   /* If user specified a language, use it. */
1888   if (lang != NULL && lang->function != NULL)
1889     {
1890       parser = lang->function;
1891     }
1892
1893   /* Else try to guess the language given the file name. */
1894   if (parser == NULL)
1895     {
1896       lang = get_language_from_filename (curfdp->infname, TRUE);
1897       if (lang != NULL && lang->function != NULL)
1898         {
1899           curfdp->lang = lang;
1900           parser = lang->function;
1901         }
1902     }
1903
1904   /* Else look for sharp-bang as the first two characters. */
1905   if (parser == NULL
1906       && readline_internal (&lb, inf) > 0
1907       && lb.len >= 2
1908       && lb.buffer[0] == '#'
1909       && lb.buffer[1] == '!')
1910     {
1911       char *lp;
1912
1913       /* Set lp to point at the first char after the last slash in the
1914          line or, if no slashes, at the first nonblank.  Then set cp to
1915          the first successive blank and terminate the string. */
1916       lp = etags_strrchr (lb.buffer+2, '/');
1917       if (lp != NULL)
1918         lp += 1;
1919       else
1920         lp = skip_spaces (lb.buffer + 2);
1921       cp = skip_non_spaces (lp);
1922       *cp = '\0';
1923
1924       if (strlen (lp) > 0)
1925         {
1926           lang = get_language_from_interpreter (lp);
1927           if (lang != NULL && lang->function != NULL)
1928             {
1929               curfdp->lang = lang;
1930               parser = lang->function;
1931             }
1932         }
1933     }
1934
1935   /* We rewind here, even if inf may be a pipe.  We fail if the
1936      length of the first line is longer than the pipe block size,
1937      which is unlikely. */
1938   rewind (inf);
1939
1940   /* Else try to guess the language given the case insensitive file name. */
1941   if (parser == NULL)
1942     {
1943       lang = get_language_from_filename (curfdp->infname, FALSE);
1944       if (lang != NULL && lang->function != NULL)
1945         {
1946           curfdp->lang = lang;
1947           parser = lang->function;
1948         }
1949     }
1950
1951   /* Else try Fortran or C. */
1952   if (parser == NULL)
1953     {
1954       node *old_last_node = last_node;
1955
1956       curfdp->lang = get_language_from_langname ("fortran");
1957       find_entries (inf);
1958
1959       if (old_last_node == last_node)
1960         /* No Fortran entries found.  Try C. */
1961         {
1962           /* We do not tag if rewind fails.
1963              Only the file name will be recorded in the tags file. */
1964           rewind (inf);
1965           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1966           find_entries (inf);
1967         }
1968       return;
1969     }
1970
1971   if (!no_line_directive
1972       && curfdp->lang != NULL && curfdp->lang->metasource)
1973     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1974        file, or anyway we parsed a file that is automatically generated from
1975        this one.  If this is the case, the bingo.c file contained #line
1976        directives that generated tags pointing to this file.  Let's delete
1977        them all before parsing this file, which is the real source. */
1978     {
1979       fdesc **fdpp = &fdhead;
1980       while (*fdpp != NULL)
1981         if (*fdpp != curfdp
1982             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1983           /* We found one of those!  We must delete both the file description
1984              and all tags referring to it. */
1985           {
1986             fdesc *badfdp = *fdpp;
1987
1988             /* Delete the tags referring to badfdp->taggedfname
1989                that were obtained from badfdp->infname. */
1990             invalidate_nodes (badfdp, &nodehead);
1991
1992             *fdpp = badfdp->next; /* remove the bad description from the list */
1993             free_fdesc (badfdp);
1994           }
1995         else
1996           fdpp = &(*fdpp)->next; /* advance the list pointer */
1997     }
1998
1999   assert (parser != NULL);
2000
2001   /* Generic initialisations before reading from file. */
2002   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2003
2004   /* Generic initialisations before parsing file with readline. */
2005   lineno = 0;                  /* reset global line number */
2006   charno = 0;                  /* reset global char number */
2007   linecharno = 0;              /* reset global char number of line start */
2008
2009   parser (inf);
2010
2011   regex_tag_multiline ();
2012 }
2013
2014 \f
2015 /*
2016  * Check whether an implicitly named tag should be created,
2017  * then call `pfnote'.
2018  * NAME is a string that is internally copied by this function.
2019  *
2020  * TAGS format specification
2021  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2022  * The following is explained in some more detail in etc/ETAGS.EBNF.
2023  *
2024  * make_tag creates tags with "implicit tag names" (unnamed tags)
2025  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2026  *  1. NAME does not contain any of the characters in NONAM;
2027  *  2. LINESTART contains name as either a rightmost, or rightmost but
2028  *     one character, substring;
2029  *  3. the character, if any, immediately before NAME in LINESTART must
2030  *     be a character in NONAM;
2031  *  4. the character, if any, immediately after NAME in LINESTART must
2032  *     also be a character in NONAM.
2033  *
2034  * The implementation uses the notinname() macro, which recognises the
2035  * characters stored in the string `nonam'.
2036  * etags.el needs to use the same characters that are in NONAM.
2037  */
2038 static void
2039 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2040      char *name;                /* tag name, or NULL if unnamed */
2041      int namelen;               /* tag length */
2042      bool is_func;              /* tag is a function */
2043      char *linestart;           /* start of the line where tag is */
2044      int linelen;               /* length of the line where tag is */
2045      int lno;                   /* line number */
2046      long cno;                  /* character number */
2047 {
2048   bool named = (name != NULL && namelen > 0);
2049
2050   if (!CTAGS && named)          /* maybe set named to false */
2051     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2052        such that etags.el can guess a name from it. */
2053     {
2054       int i;
2055       register char *cp = name;
2056
2057       for (i = 0; i < namelen; i++)
2058         if (notinname (*cp++))
2059           break;
2060       if (i == namelen)                         /* rule #1 */
2061         {
2062           cp = linestart + linelen - namelen;
2063           if (notinname (linestart[linelen-1]))
2064             cp -= 1;                            /* rule #4 */
2065           if (cp >= linestart                   /* rule #2 */
2066               && (cp == linestart
2067                   || notinname (cp[-1]))        /* rule #3 */
2068               && strneq (name, cp, namelen))    /* rule #2 */
2069             named = FALSE;      /* use implicit tag name */
2070         }
2071     }
2072
2073   if (named)
2074     name = savenstr (name, namelen);
2075   else
2076     name = NULL;
2077   pfnote (name, is_func, linestart, linelen, lno, cno);
2078 }
2079
2080 /* Record a tag. */
2081 static void
2082 pfnote (name, is_func, linestart, linelen, lno, cno)
2083      char *name;                /* tag name, or NULL if unnamed */
2084      bool is_func;              /* tag is a function */
2085      char *linestart;           /* start of the line where tag is */
2086      int linelen;               /* length of the line where tag is */
2087      int lno;                   /* line number */
2088      long cno;                  /* character number */
2089 {
2090   register node *np;
2091
2092   assert (name == NULL || name[0] != '\0');
2093   if (CTAGS && name == NULL)
2094     return;
2095
2096   np = xnew (1, node);
2097
2098   /* If ctags mode, change name "main" to M<thisfilename>. */
2099   if (CTAGS && !cxref_style && streq (name, "main"))
2100     {
2101       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2102       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2103       fp = etags_strrchr (np->name, '.');
2104       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2105         fp[0] = '\0';
2106     }
2107   else
2108     np->name = name;
2109   np->valid = TRUE;
2110   np->been_warned = FALSE;
2111   np->fdp = curfdp;
2112   np->is_func = is_func;
2113   np->lno = lno;
2114   if (np->fdp->usecharno)
2115     /* Our char numbers are 0-base, because of C language tradition?
2116        ctags compatibility?  old versions compatibility?   I don't know.
2117        Anyway, since emacs's are 1-base we expect etags.el to take care
2118        of the difference.  If we wanted to have 1-based numbers, we would
2119        uncomment the +1 below. */
2120     np->cno = cno /* + 1 */ ;
2121   else
2122     np->cno = invalidcharno;
2123   np->left = np->right = NULL;
2124   if (CTAGS && !cxref_style)
2125     {
2126       if (strlen (linestart) < 50)
2127         np->regex = concat (linestart, "$", "");
2128       else
2129         np->regex = savenstr (linestart, 50);
2130     }
2131   else
2132     np->regex = savenstr (linestart, linelen);
2133
2134   add_node (np, &nodehead);
2135 }
2136
2137 /*
2138  * free_tree ()
2139  *      recurse on left children, iterate on right children.
2140  */
2141 static void
2142 free_tree (np)
2143      register node *np;
2144 {
2145   while (np)
2146     {
2147       register node *node_right = np->right;
2148       free_tree (np->left);
2149       if (np->name != NULL)
2150         free (np->name);
2151       free (np->regex);
2152       free (np);
2153       np = node_right;
2154     }
2155 }
2156
2157 /*
2158  * free_fdesc ()
2159  *      delete a file description
2160  */
2161 static void
2162 free_fdesc (fdp)
2163      register fdesc *fdp;
2164 {
2165   if (fdp->infname != NULL) free (fdp->infname);
2166   if (fdp->infabsname != NULL) free (fdp->infabsname);
2167   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2168   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2169   if (fdp->prop != NULL) free (fdp->prop);
2170   free (fdp);
2171 }
2172
2173 /*
2174  * add_node ()
2175  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2176  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2177  *      balancing.
2178  *
2179  *      add_node is the only function allowed to add nodes, so it can
2180  *      maintain state.
2181  */
2182 static void
2183 add_node (np, cur_node_p)
2184      node *np, **cur_node_p;
2185 {
2186   register int dif;
2187   register node *cur_node = *cur_node_p;
2188
2189   if (cur_node == NULL)
2190     {
2191       *cur_node_p = np;
2192       last_node = np;
2193       return;
2194     }
2195
2196   if (!CTAGS)
2197     /* Etags Mode */
2198     {
2199       /* For each file name, tags are in a linked sublist on the right
2200          pointer.  The first tags of different files are a linked list
2201          on the left pointer.  last_node points to the end of the last
2202          used sublist. */
2203       if (last_node != NULL && last_node->fdp == np->fdp)
2204         {
2205           /* Let's use the same sublist as the last added node. */
2206           assert (last_node->right == NULL);
2207           last_node->right = np;
2208           last_node = np;
2209         }
2210       else if (cur_node->fdp == np->fdp)
2211         {
2212           /* Scanning the list we found the head of a sublist which is
2213              good for us.  Let's scan this sublist. */
2214           add_node (np, &cur_node->right);
2215         }
2216       else
2217         /* The head of this sublist is not good for us.  Let's try the
2218            next one. */
2219         add_node (np, &cur_node->left);
2220     } /* if ETAGS mode */
2221
2222   else
2223     {
2224       /* Ctags Mode */
2225       dif = strcmp (np->name, cur_node->name);
2226
2227       /*
2228        * If this tag name matches an existing one, then
2229        * do not add the node, but maybe print a warning.
2230        */
2231       if (no_duplicates && !dif)
2232         {
2233           if (np->fdp == cur_node->fdp)
2234             {
2235               if (!no_warnings)
2236                 {
2237                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2238                            np->fdp->infname, lineno, np->name);
2239                   fprintf (stderr, "Second entry ignored\n");
2240                 }
2241             }
2242           else if (!cur_node->been_warned && !no_warnings)
2243             {
2244               fprintf
2245                 (stderr,
2246                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2247                  np->fdp->infname, cur_node->fdp->infname, np->name);
2248               cur_node->been_warned = TRUE;
2249             }
2250           return;
2251         }
2252
2253       /* Actually add the node */
2254       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2255     } /* if CTAGS mode */
2256 }
2257
2258 /*
2259  * invalidate_nodes ()
2260  *      Scan the node tree and invalidate all nodes pointing to the
2261  *      given file description (CTAGS case) or free them (ETAGS case).
2262  */
2263 static void
2264 invalidate_nodes (badfdp, npp)
2265      fdesc *badfdp;
2266      node **npp;
2267 {
2268   node *np = *npp;
2269
2270   if (np == NULL)
2271     return;
2272
2273   if (CTAGS)
2274     {
2275       if (np->left != NULL)
2276         invalidate_nodes (badfdp, &np->left);
2277       if (np->fdp == badfdp)
2278         np->valid = FALSE;
2279       if (np->right != NULL)
2280         invalidate_nodes (badfdp, &np->right);
2281     }
2282   else
2283     {
2284       assert (np->fdp != NULL);
2285       if (np->fdp == badfdp)
2286         {
2287           *npp = np->left;      /* detach the sublist from the list */
2288           np->left = NULL;      /* isolate it */
2289           free_tree (np);       /* free it */
2290           invalidate_nodes (badfdp, npp);
2291         }
2292       else
2293         invalidate_nodes (badfdp, &np->left);
2294     }
2295 }
2296
2297 \f
2298 static int total_size_of_entries __P((node *));
2299 static int number_len __P((long));
2300
2301 /* Length of a non-negative number's decimal representation. */
2302 static int
2303 number_len (num)
2304      long num;
2305 {
2306   int len = 1;
2307   while ((num /= 10) > 0)
2308     len += 1;
2309   return len;
2310 }
2311
2312 /*
2313  * Return total number of characters that put_entries will output for
2314  * the nodes in the linked list at the right of the specified node.
2315  * This count is irrelevant with etags.el since emacs 19.34 at least,
2316  * but is still supplied for backward compatibility.
2317  */
2318 static int
2319 total_size_of_entries (np)
2320      register node *np;
2321 {
2322   register int total = 0;
2323
2324   for (; np != NULL; np = np->right)
2325     if (np->valid)
2326       {
2327         total += strlen (np->regex) + 1;                /* pat\177 */
2328         if (np->name != NULL)
2329           total += strlen (np->name) + 1;               /* name\001 */
2330         total += number_len ((long) np->lno) + 1;       /* lno, */
2331         if (np->cno != invalidcharno)                   /* cno */
2332           total += number_len (np->cno);
2333         total += 1;                                     /* newline */
2334       }
2335
2336   return total;
2337 }
2338
2339 static void
2340 put_entries (np)
2341      register node *np;
2342 {
2343   register char *sp;
2344   static fdesc *fdp = NULL;
2345
2346   if (np == NULL)
2347     return;
2348
2349   /* Output subentries that precede this one */
2350   if (CTAGS)
2351     put_entries (np->left);
2352
2353   /* Output this entry */
2354   if (np->valid)
2355     {
2356       if (!CTAGS)
2357         {
2358           /* Etags mode */
2359           if (fdp != np->fdp)
2360             {
2361               fdp = np->fdp;
2362               fprintf (tagf, "\f\n%s,%d\n",
2363                        fdp->taggedfname, total_size_of_entries (np));
2364               fdp->written = TRUE;
2365             }
2366           fputs (np->regex, tagf);
2367           fputc ('\177', tagf);
2368           if (np->name != NULL)
2369             {
2370               fputs (np->name, tagf);
2371               fputc ('\001', tagf);
2372             }
2373           fprintf (tagf, "%d,", np->lno);
2374           if (np->cno != invalidcharno)
2375             fprintf (tagf, "%ld", np->cno);
2376           fputs ("\n", tagf);
2377         }
2378       else
2379         {
2380           /* Ctags mode */
2381           if (np->name == NULL)
2382             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2383
2384           if (cxref_style)
2385             {
2386               if (vgrind_style)
2387                 fprintf (stdout, "%s %s %d\n",
2388                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2389               else
2390                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2391                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2392             }
2393           else
2394             {
2395               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2396
2397               if (np->is_func)
2398                 {               /* function or #define macro with args */
2399                   putc (searchar, tagf);
2400                   putc ('^', tagf);
2401
2402                   for (sp = np->regex; *sp; sp++)
2403                     {
2404                       if (*sp == '\\' || *sp == searchar)
2405                         putc ('\\', tagf);
2406                       putc (*sp, tagf);
2407                     }
2408                   putc (searchar, tagf);
2409                 }
2410               else
2411                 {               /* anything else; text pattern inadequate */
2412                   fprintf (tagf, "%d", np->lno);
2413                 }
2414               putc ('\n', tagf);
2415             }
2416         }
2417     } /* if this node contains a valid tag */
2418
2419   /* Output subentries that follow this one */
2420   put_entries (np->right);
2421   if (!CTAGS)
2422     put_entries (np->left);
2423 }
2424
2425 \f
2426 /* C extensions. */
2427 #define C_EXT   0x00fff         /* C extensions */
2428 #define C_PLAIN 0x00000         /* C */
2429 #define C_PLPL  0x00001         /* C++ */
2430 #define C_STAR  0x00003         /* C* */
2431 #define C_JAVA  0x00005         /* JAVA */
2432 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2433 #define YACC    0x10000         /* yacc file */
2434
2435 /*
2436  * The C symbol tables.
2437  */
2438 enum sym_type
2439 {
2440   st_none,
2441   st_C_objprot, st_C_objimpl, st_C_objend,
2442   st_C_gnumacro,
2443   st_C_ignore, st_C_attribute,
2444   st_C_javastruct,
2445   st_C_operator,
2446   st_C_class, st_C_template,
2447   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2448 };
2449
2450 static unsigned int hash __P((const char *, unsigned int));
2451 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2452 static enum sym_type C_symtype __P((char *, int, int));
2453
2454 /* Feed stuff between (but not including) %[ and %] lines to:
2455      gperf -m 5
2456 %[
2457 %compare-strncmp
2458 %enum
2459 %struct-type
2460 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2461 %%
2462 if,             0,                      st_C_ignore
2463 for,            0,                      st_C_ignore
2464 while,          0,                      st_C_ignore
2465 switch,         0,                      st_C_ignore
2466 return,         0,                      st_C_ignore
2467 __attribute__,  0,                      st_C_attribute
2468 GTY,            0,                      st_C_attribute
2469 @interface,     0,                      st_C_objprot
2470 @protocol,      0,                      st_C_objprot
2471 @implementation,0,                      st_C_objimpl
2472 @end,           0,                      st_C_objend
2473 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2474 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2475 friend,         C_PLPL,                 st_C_ignore
2476 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2477 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2478 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2479 class,          0,                      st_C_class
2480 namespace,      C_PLPL,                 st_C_struct
2481 domain,         C_STAR,                 st_C_struct
2482 union,          0,                      st_C_struct
2483 struct,         0,                      st_C_struct
2484 extern,         0,                      st_C_extern
2485 enum,           0,                      st_C_enum
2486 typedef,        0,                      st_C_typedef
2487 define,         0,                      st_C_define
2488 undef,          0,                      st_C_define
2489 operator,       C_PLPL,                 st_C_operator
2490 template,       0,                      st_C_template
2491 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2492 DEFUN,          0,                      st_C_gnumacro
2493 SYSCALL,        0,                      st_C_gnumacro
2494 ENTRY,          0,                      st_C_gnumacro
2495 PSEUDO,         0,                      st_C_gnumacro
2496 # These are defined inside C functions, so currently they are not met.
2497 # EXFUN used in glibc, DEFVAR_* in emacs.
2498 #EXFUN,         0,                      st_C_gnumacro
2499 #DEFVAR_,       0,                      st_C_gnumacro
2500 %]
2501 and replace lines between %< and %> with its output, then:
2502  - remove the #if characterset check
2503  - make in_word_set static and not inline. */
2504 /*%<*/
2505 /* C code produced by gperf version 3.0.1 */
2506 /* Command-line: gperf -m 5  */
2507 /* Computed positions: -k'2-3' */
2508
2509 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2510 /* maximum key range = 33, duplicates = 0 */
2511
2512 #ifdef __GNUC__
2513 __inline
2514 #else
2515 #ifdef __cplusplus
2516 inline
2517 #endif
2518 #endif
2519 static unsigned int
2520 hash (str, len)
2521      register const char *str;
2522      register unsigned int len;
2523 {
2524   static unsigned char asso_values[] =
2525     {
2526       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2527       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2528       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2529       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2530       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2531       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2532       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2533       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2534       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2535       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2536       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2537        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2538        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2539       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2540       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2541       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2542       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2543       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2544       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2545       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2546       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2547       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2548       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2549       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2550       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2551       35, 35, 35, 35, 35, 35
2552     };
2553   register int hval = len;
2554
2555   switch (hval)
2556     {
2557       default:
2558         hval += asso_values[(unsigned char)str[2]];
2559       /*FALLTHROUGH*/
2560       case 2:
2561         hval += asso_values[(unsigned char)str[1]];
2562         break;
2563     }
2564   return hval;
2565 }
2566
2567 static struct C_stab_entry *
2568 in_word_set (str, len)
2569      register const char *str;
2570      register unsigned int len;
2571 {
2572   enum
2573     {
2574       TOTAL_KEYWORDS = 32,
2575       MIN_WORD_LENGTH = 2,
2576       MAX_WORD_LENGTH = 15,
2577       MIN_HASH_VALUE = 2,
2578       MAX_HASH_VALUE = 34
2579     };
2580
2581   static struct C_stab_entry wordlist[] =
2582     {
2583       {""}, {""},
2584       {"if",            0,                      st_C_ignore},
2585       {"GTY",           0,                      st_C_attribute},
2586       {"@end",          0,                      st_C_objend},
2587       {"union",         0,                      st_C_struct},
2588       {"define",                0,                      st_C_define},
2589       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2590       {"template",      0,                      st_C_template},
2591       {"operator",      C_PLPL,                 st_C_operator},
2592       {"@interface",    0,                      st_C_objprot},
2593       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2594       {"friend",                C_PLPL,                 st_C_ignore},
2595       {"typedef",       0,                      st_C_typedef},
2596       {"return",                0,                      st_C_ignore},
2597       {"@implementation",0,                     st_C_objimpl},
2598       {"@protocol",     0,                      st_C_objprot},
2599       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2600       {"extern",                0,                      st_C_extern},
2601       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2602       {"struct",                0,                      st_C_struct},
2603       {"domain",                C_STAR,                 st_C_struct},
2604       {"switch",                0,                      st_C_ignore},
2605       {"enum",          0,                      st_C_enum},
2606       {"for",           0,                      st_C_ignore},
2607       {"namespace",     C_PLPL,                 st_C_struct},
2608       {"class",         0,                      st_C_class},
2609       {"while",         0,                      st_C_ignore},
2610       {"undef",         0,                      st_C_define},
2611       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2612       {"__attribute__", 0,                      st_C_attribute},
2613       {"SYSCALL",       0,                      st_C_gnumacro},
2614       {"ENTRY",         0,                      st_C_gnumacro},
2615       {"PSEUDO",                0,                      st_C_gnumacro},
2616       {"DEFUN",         0,                      st_C_gnumacro}
2617     };
2618
2619   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2620     {
2621       register int key = hash (str, len);
2622
2623       if (key <= MAX_HASH_VALUE && key >= 0)
2624         {
2625           register const char *s = wordlist[key].name;
2626
2627           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2628             return &wordlist[key];
2629         }
2630     }
2631   return 0;
2632 }
2633 /*%>*/
2634
2635 static enum sym_type
2636 C_symtype (str, len, c_ext)
2637      char *str;
2638      int len;
2639      int c_ext;
2640 {
2641   register struct C_stab_entry *se = in_word_set (str, len);
2642
2643   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2644     return st_none;
2645   return se->type;
2646 }
2647
2648 \f
2649 /*
2650  * Ignoring __attribute__ ((list))
2651  */
2652 static bool inattribute;        /* looking at an __attribute__ construct */
2653
2654 /*
2655  * C functions and variables are recognized using a simple
2656  * finite automaton.  fvdef is its state variable.
2657  */
2658 static enum
2659 {
2660   fvnone,                       /* nothing seen */
2661   fdefunkey,                    /* Emacs DEFUN keyword seen */
2662   fdefunname,                   /* Emacs DEFUN name seen */
2663   foperator,                    /* func: operator keyword seen (cplpl) */
2664   fvnameseen,                   /* function or variable name seen */
2665   fstartlist,                   /* func: just after open parenthesis */
2666   finlist,                      /* func: in parameter list */
2667   flistseen,                    /* func: after parameter list */
2668   fignore,                      /* func: before open brace */
2669   vignore                       /* var-like: ignore until ';' */
2670 } fvdef;
2671
2672 static bool fvextern;           /* func or var: extern keyword seen; */
2673
2674 /*
2675  * typedefs are recognized using a simple finite automaton.
2676  * typdef is its state variable.
2677  */
2678 static enum
2679 {
2680   tnone,                        /* nothing seen */
2681   tkeyseen,                     /* typedef keyword seen */
2682   ttypeseen,                    /* defined type seen */
2683   tinbody,                      /* inside typedef body */
2684   tend,                         /* just before typedef tag */
2685   tignore                       /* junk after typedef tag */
2686 } typdef;
2687
2688 /*
2689  * struct-like structures (enum, struct and union) are recognized
2690  * using another simple finite automaton.  `structdef' is its state
2691  * variable.
2692  */
2693 static enum
2694 {
2695   snone,                        /* nothing seen yet,
2696                                    or in struct body if bracelev > 0 */
2697   skeyseen,                     /* struct-like keyword seen */
2698   stagseen,                     /* struct-like tag seen */
2699   scolonseen                    /* colon seen after struct-like tag */
2700 } structdef;
2701
2702 /*
2703  * When objdef is different from onone, objtag is the name of the class.
2704  */
2705 static char *objtag = "<uninited>";
2706
2707 /*
2708  * Yet another little state machine to deal with preprocessor lines.
2709  */
2710 static enum
2711 {
2712   dnone,                        /* nothing seen */
2713   dsharpseen,                   /* '#' seen as first char on line */
2714   ddefineseen,                  /* '#' and 'define' seen */
2715   dignorerest                   /* ignore rest of line */
2716 } definedef;
2717
2718 /*
2719  * State machine for Objective C protocols and implementations.
2720  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2721  */
2722 static enum
2723 {
2724   onone,                        /* nothing seen */
2725   oprotocol,                    /* @interface or @protocol seen */
2726   oimplementation,              /* @implementations seen */
2727   otagseen,                     /* class name seen */
2728   oparenseen,                   /* parenthesis before category seen */
2729   ocatseen,                     /* category name seen */
2730   oinbody,                      /* in @implementation body */
2731   omethodsign,                  /* in @implementation body, after +/- */
2732   omethodtag,                   /* after method name */
2733   omethodcolon,                 /* after method colon */
2734   omethodparm,                  /* after method parameter */
2735   oignore                       /* wait for @end */
2736 } objdef;
2737
2738
2739 /*
2740  * Use this structure to keep info about the token read, and how it
2741  * should be tagged.  Used by the make_C_tag function to build a tag.
2742  */
2743 static struct tok
2744 {
2745   char *line;                   /* string containing the token */
2746   int offset;                   /* where the token starts in LINE */
2747   int length;                   /* token length */
2748   /*
2749     The previous members can be used to pass strings around for generic
2750     purposes.  The following ones specifically refer to creating tags.  In this
2751     case the token contained here is the pattern that will be used to create a
2752     tag.
2753   */
2754   bool valid;                   /* do not create a tag; the token should be
2755                                    invalidated whenever a state machine is
2756                                    reset prematurely */
2757   bool named;                   /* create a named tag */
2758   int lineno;                   /* source line number of tag */
2759   long linepos;                 /* source char number of tag */
2760 } token;                        /* latest token read */
2761
2762 /*
2763  * Variables and functions for dealing with nested structures.
2764  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2765  */
2766 static void pushclass_above __P((int, char *, int));
2767 static void popclass_above __P((int));
2768 static void write_classname __P((linebuffer *, char *qualifier));
2769
2770 static struct {
2771   char **cname;                 /* nested class names */
2772   int *bracelev;                /* nested class brace level */
2773   int nl;                       /* class nesting level (elements used) */
2774   int size;                     /* length of the array */
2775 } cstack;                       /* stack for nested declaration tags */
2776 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2777 #define nestlev         (cstack.nl)
2778 /* After struct keyword or in struct body, not inside a nested function. */
2779 #define instruct        (structdef == snone && nestlev > 0                      \
2780                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2781
2782 static void
2783 pushclass_above (bracelev, str, len)
2784      int bracelev;
2785      char *str;
2786      int len;
2787 {
2788   int nl;
2789
2790   popclass_above (bracelev);
2791   nl = cstack.nl;
2792   if (nl >= cstack.size)
2793     {
2794       int size = cstack.size *= 2;
2795       xrnew (cstack.cname, size, char *);
2796       xrnew (cstack.bracelev, size, int);
2797     }
2798   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2799   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2800   cstack.bracelev[nl] = bracelev;
2801   cstack.nl = nl + 1;
2802 }
2803
2804 static void
2805 popclass_above (bracelev)
2806      int bracelev;
2807 {
2808   int nl;
2809
2810   for (nl = cstack.nl - 1;
2811        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2812        nl--)
2813     {
2814       if (cstack.cname[nl] != NULL)
2815         free (cstack.cname[nl]);
2816       cstack.nl = nl;
2817     }
2818 }
2819
2820 static void
2821 write_classname (cn, qualifier)
2822      linebuffer *cn;
2823      char *qualifier;
2824 {
2825   int i, len;
2826   int qlen = strlen (qualifier);
2827
2828   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2829     {
2830       len = 0;
2831       cn->len = 0;
2832       cn->buffer[0] = '\0';
2833     }
2834   else
2835     {
2836       len = strlen (cstack.cname[0]);
2837       linebuffer_setlen (cn, len);
2838       strcpy (cn->buffer, cstack.cname[0]);
2839     }
2840   for (i = 1; i < cstack.nl; i++)
2841     {
2842       char *s;
2843       int slen;
2844
2845       s = cstack.cname[i];
2846       if (s == NULL)
2847         continue;
2848       slen = strlen (s);
2849       len += slen + qlen;
2850       linebuffer_setlen (cn, len);
2851       strncat (cn->buffer, qualifier, qlen);
2852       strncat (cn->buffer, s, slen);
2853     }
2854 }
2855
2856 \f
2857 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2858 static void make_C_tag __P((bool));
2859
2860 /*
2861  * consider_token ()
2862  *      checks to see if the current token is at the start of a
2863  *      function or variable, or corresponds to a typedef, or
2864  *      is a struct/union/enum tag, or #define, or an enum constant.
2865  *
2866  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2867  *      with args.  C_EXTP points to which language we are looking at.
2868  *
2869  * Globals
2870  *      fvdef                   IN OUT
2871  *      structdef               IN OUT
2872  *      definedef               IN OUT
2873  *      typdef                  IN OUT
2874  *      objdef                  IN OUT
2875  */
2876
2877 static bool
2878 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2879      register char *str;        /* IN: token pointer */
2880      register int len;          /* IN: token length */
2881      register int c;            /* IN: first char after the token */
2882      int *c_extp;               /* IN, OUT: C extensions mask */
2883      int bracelev;              /* IN: brace level */
2884      int parlev;                /* IN: parenthesis level */
2885      bool *is_func_or_var;      /* OUT: function or variable found */
2886 {
2887   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2888      structtype is the type of the preceding struct-like keyword, and
2889      structbracelev is the brace level where it has been seen. */
2890   static enum sym_type structtype;
2891   static int structbracelev;
2892   static enum sym_type toktype;
2893
2894
2895   toktype = C_symtype (str, len, *c_extp);
2896
2897   /*
2898    * Skip __attribute__
2899    */
2900   if (toktype == st_C_attribute)
2901     {
2902       inattribute = TRUE;
2903       return FALSE;
2904      }
2905
2906    /*
2907     * Advance the definedef state machine.
2908     */
2909    switch (definedef)
2910      {
2911      case dnone:
2912        /* We're not on a preprocessor line. */
2913        if (toktype == st_C_gnumacro)
2914          {
2915            fvdef = fdefunkey;
2916            return FALSE;
2917          }
2918        break;
2919      case dsharpseen:
2920        if (toktype == st_C_define)
2921          {
2922            definedef = ddefineseen;
2923          }
2924        else
2925          {
2926            definedef = dignorerest;
2927          }
2928        return FALSE;
2929      case ddefineseen:
2930        /*
2931         * Make a tag for any macro, unless it is a constant
2932         * and constantypedefs is FALSE.
2933         */
2934        definedef = dignorerest;
2935        *is_func_or_var = (c == '(');
2936        if (!*is_func_or_var && !constantypedefs)
2937          return FALSE;
2938        else
2939          return TRUE;
2940      case dignorerest:
2941        return FALSE;
2942      default:
2943        error ("internal error: definedef value.", (char *)NULL);
2944      }
2945
2946    /*
2947     * Now typedefs
2948     */
2949    switch (typdef)
2950      {
2951      case tnone:
2952        if (toktype == st_C_typedef)
2953          {
2954            if (typedefs)
2955              typdef = tkeyseen;
2956            fvextern = FALSE;
2957            fvdef = fvnone;
2958            return FALSE;
2959          }
2960        break;
2961      case tkeyseen:
2962        switch (toktype)
2963          {
2964          case st_none:
2965          case st_C_class:
2966          case st_C_struct:
2967          case st_C_enum:
2968            typdef = ttypeseen;
2969          }
2970        break;
2971      case ttypeseen:
2972        if (structdef == snone && fvdef == fvnone)
2973          {
2974            fvdef = fvnameseen;
2975            return TRUE;
2976          }
2977        break;
2978      case tend:
2979        switch (toktype)
2980          {
2981          case st_C_class:
2982          case st_C_struct:
2983          case st_C_enum:
2984            return FALSE;
2985          }
2986        return TRUE;
2987      }
2988
2989    switch (toktype)
2990      {
2991      case st_C_javastruct:
2992        if (structdef == stagseen)
2993          structdef = scolonseen;
2994        return FALSE;
2995      case st_C_template:
2996      case st_C_class:
2997        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2998            && bracelev == 0
2999            && definedef == dnone && structdef == snone
3000            && typdef == tnone && fvdef == fvnone)
3001          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3002        if (toktype == st_C_template)
3003          break;
3004        /* FALLTHRU */
3005      case st_C_struct:
3006      case st_C_enum:
3007        if (parlev == 0
3008            && fvdef != vignore
3009            && (typdef == tkeyseen
3010                || (typedefs_or_cplusplus && structdef == snone)))
3011          {
3012            structdef = skeyseen;
3013            structtype = toktype;
3014            structbracelev = bracelev;
3015            if (fvdef == fvnameseen)
3016              fvdef = fvnone;
3017          }
3018        return FALSE;
3019      }
3020
3021    if (structdef == skeyseen)
3022      {
3023        structdef = stagseen;
3024        return TRUE;
3025      }
3026
3027    if (typdef != tnone)
3028      definedef = dnone;
3029
3030    /* Detect Objective C constructs. */
3031    switch (objdef)
3032      {
3033      case onone:
3034        switch (toktype)
3035          {
3036          case st_C_objprot:
3037            objdef = oprotocol;
3038            return FALSE;
3039          case st_C_objimpl:
3040            objdef = oimplementation;
3041            return FALSE;
3042          }
3043        break;
3044      case oimplementation:
3045        /* Save the class tag for functions or variables defined inside. */
3046        objtag = savenstr (str, len);
3047        objdef = oinbody;
3048        return FALSE;
3049      case oprotocol:
3050        /* Save the class tag for categories. */
3051        objtag = savenstr (str, len);
3052        objdef = otagseen;
3053        *is_func_or_var = TRUE;
3054        return TRUE;
3055      case oparenseen:
3056        objdef = ocatseen;
3057        *is_func_or_var = TRUE;
3058        return TRUE;
3059      case oinbody:
3060        break;
3061      case omethodsign:
3062        if (parlev == 0)
3063          {
3064            fvdef = fvnone;
3065            objdef = omethodtag;
3066            linebuffer_setlen (&token_name, len);
3067            strncpy (token_name.buffer, str, len);
3068            token_name.buffer[len] = '\0';
3069            return TRUE;
3070          }
3071        return FALSE;
3072      case omethodcolon:
3073        if (parlev == 0)
3074          objdef = omethodparm;
3075        return FALSE;
3076      case omethodparm:
3077        if (parlev == 0)
3078          {
3079            fvdef = fvnone;
3080            objdef = omethodtag;
3081            linebuffer_setlen (&token_name, token_name.len + len);
3082            strncat (token_name.buffer, str, len);
3083            return TRUE;
3084          }
3085        return FALSE;
3086      case oignore:
3087        if (toktype == st_C_objend)
3088          {
3089            /* Memory leakage here: the string pointed by objtag is
3090               never released, because many tests would be needed to
3091               avoid breaking on incorrect input code.  The amount of
3092               memory leaked here is the sum of the lengths of the
3093               class tags.
3094            free (objtag); */
3095            objdef = onone;
3096          }
3097        return FALSE;
3098      }
3099
3100    /* A function, variable or enum constant? */
3101    switch (toktype)
3102      {
3103      case st_C_extern:
3104        fvextern = TRUE;
3105        switch  (fvdef)
3106          {
3107          case finlist:
3108          case flistseen:
3109          case fignore:
3110          case vignore:
3111            break;
3112          default:
3113            fvdef = fvnone;
3114          }
3115        return FALSE;
3116      case st_C_ignore:
3117        fvextern = FALSE;
3118        fvdef = vignore;
3119        return FALSE;
3120      case st_C_operator:
3121        fvdef = foperator;
3122        *is_func_or_var = TRUE;
3123        return TRUE;
3124      case st_none:
3125        if (constantypedefs
3126            && structdef == snone
3127            && structtype == st_C_enum && bracelev > structbracelev)
3128          return TRUE;           /* enum constant */
3129        switch (fvdef)
3130          {
3131          case fdefunkey:
3132            if (bracelev > 0)
3133              break;
3134            fvdef = fdefunname;  /* GNU macro */
3135            *is_func_or_var = TRUE;
3136            return TRUE;
3137          case fvnone:
3138            switch (typdef)
3139              {
3140              case ttypeseen:
3141                return FALSE;
3142              case tnone:
3143                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3144                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3145                  {
3146                    fvdef = vignore;
3147                    return FALSE;
3148                  }
3149                break;
3150              }
3151           /* FALLTHRU */
3152           case fvnameseen:
3153           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3154             {
3155               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3156                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3157               fvdef = foperator;
3158               *is_func_or_var = TRUE;
3159               return TRUE;
3160             }
3161           if (bracelev > 0 && !instruct)
3162             break;
3163           fvdef = fvnameseen;   /* function or variable */
3164           *is_func_or_var = TRUE;
3165           return TRUE;
3166         }
3167       break;
3168     }
3169
3170   return FALSE;
3171 }
3172
3173 \f
3174 /*
3175  * C_entries often keeps pointers to tokens or lines which are older than
3176  * the line currently read.  By keeping two line buffers, and switching
3177  * them at end of line, it is possible to use those pointers.
3178  */
3179 static struct
3180 {
3181   long linepos;
3182   linebuffer lb;
3183 } lbs[2];
3184
3185 #define current_lb_is_new (newndx == curndx)
3186 #define switch_line_buffers() (curndx = 1 - curndx)
3187
3188 #define curlb (lbs[curndx].lb)
3189 #define newlb (lbs[newndx].lb)
3190 #define curlinepos (lbs[curndx].linepos)
3191 #define newlinepos (lbs[newndx].linepos)
3192
3193 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3194 #define cplpl (c_ext & C_PLPL)
3195 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3196
3197 #define CNL_SAVE_DEFINEDEF()                                            \
3198 do {                                                                    \
3199   curlinepos = charno;                                                  \
3200   readline (&curlb, inf);                                               \
3201   lp = curlb.buffer;                                                    \
3202   quotednl = FALSE;                                                     \
3203   newndx = curndx;                                                      \
3204 } while (0)
3205
3206 #define CNL()                                                           \
3207 do {                                                                    \
3208   CNL_SAVE_DEFINEDEF();                                                 \
3209   if (savetoken.valid)                                                  \
3210     {                                                                   \
3211       token = savetoken;                                                \
3212       savetoken.valid = FALSE;                                          \
3213     }                                                                   \
3214   definedef = dnone;                                                    \
3215 } while (0)
3216
3217
3218 static void
3219 make_C_tag (isfun)
3220      bool isfun;
3221 {
3222   /* This function is never called when token.valid is FALSE, but
3223      we must protect against invalid input or internal errors. */
3224   if (!DEBUG && !token.valid)
3225     return;
3226
3227   if (token.valid)
3228     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3229               token.offset+token.length+1, token.lineno, token.linepos);
3230   else                          /* this case is optimised away if !DEBUG */
3231     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3232               token_name.len + 17, isfun, token.line,
3233               token.offset+token.length+1, token.lineno, token.linepos);
3234
3235   token.valid = FALSE;
3236 }
3237
3238
3239 /*
3240  * C_entries ()
3241  *      This routine finds functions, variables, typedefs,
3242  *      #define's, enum constants and struct/union/enum definitions in
3243  *      C syntax and adds them to the list.
3244  */
3245 static void
3246 C_entries (c_ext, inf)
3247      int c_ext;                 /* extension of C */
3248      FILE *inf;                 /* input file */
3249 {
3250   register char c;              /* latest char read; '\0' for end of line */
3251   register char *lp;            /* pointer one beyond the character `c' */
3252   int curndx, newndx;           /* indices for current and new lb */
3253   register int tokoff;          /* offset in line of start of current token */
3254   register int toklen;          /* length of current token */
3255   char *qualifier;              /* string used to qualify names */
3256   int qlen;                     /* length of qualifier */
3257   int bracelev;                 /* current brace level */
3258   int bracketlev;               /* current bracket level */
3259   int parlev;                   /* current parenthesis level */
3260   int attrparlev;               /* __attribute__ parenthesis level */
3261   int templatelev;              /* current template level */
3262   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3263   bool incomm, inquote, inchar, quotednl, midtoken;
3264   bool yacc_rules;              /* in the rules part of a yacc file */
3265   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3266
3267
3268   linebuffer_init (&lbs[0].lb);
3269   linebuffer_init (&lbs[1].lb);
3270   if (cstack.size == 0)
3271     {
3272       cstack.size = (DEBUG) ? 1 : 4;
3273       cstack.nl = 0;
3274       cstack.cname = xnew (cstack.size, char *);
3275       cstack.bracelev = xnew (cstack.size, int);
3276     }
3277
3278   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3279   curndx = newndx = 0;
3280   lp = curlb.buffer;
3281   *lp = 0;
3282
3283   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3284   structdef = snone; definedef = dnone; objdef = onone;
3285   yacc_rules = FALSE;
3286   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3287   token.valid = savetoken.valid = FALSE;
3288   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3289   if (cjava)
3290     { qualifier = "."; qlen = 1; }
3291   else
3292     { qualifier = "::"; qlen = 2; }
3293
3294
3295   while (!feof (inf))
3296     {
3297       c = *lp++;
3298       if (c == '\\')
3299         {
3300           /* If we are at the end of the line, the next character is a
3301              '\0'; do not skip it, because it is what tells us
3302              to read the next line.  */
3303           if (*lp == '\0')
3304             {
3305               quotednl = TRUE;
3306               continue;
3307             }
3308           lp++;
3309           c = ' ';
3310         }
3311       else if (incomm)
3312         {
3313           switch (c)
3314             {
3315             case '*':
3316               if (*lp == '/')
3317                 {
3318                   c = *lp++;
3319                   incomm = FALSE;
3320                 }
3321               break;
3322             case '\0':
3323               /* Newlines inside comments do not end macro definitions in
3324                  traditional cpp. */
3325               CNL_SAVE_DEFINEDEF ();
3326               break;
3327             }
3328           continue;
3329         }
3330       else if (inquote)
3331         {
3332           switch (c)
3333             {
3334             case '"':
3335               inquote = FALSE;
3336               break;
3337             case '\0':
3338               /* Newlines inside strings do not end macro definitions
3339                  in traditional cpp, even though compilers don't
3340                  usually accept them. */
3341               CNL_SAVE_DEFINEDEF ();
3342               break;
3343             }
3344           continue;
3345         }
3346       else if (inchar)
3347         {
3348           switch (c)
3349             {
3350             case '\0':
3351               /* Hmmm, something went wrong. */
3352               CNL ();
3353               /* FALLTHRU */
3354             case '\'':
3355               inchar = FALSE;
3356               break;
3357             }
3358           continue;
3359         }
3360       else if (bracketlev > 0)
3361         {
3362           switch (c)
3363             {
3364             case ']':
3365               if (--bracketlev > 0)
3366                 continue;
3367               break;
3368             case '\0':
3369               CNL_SAVE_DEFINEDEF ();
3370               break;
3371             }
3372           continue;
3373         }
3374       else switch (c)
3375         {
3376         case '"':
3377           inquote = TRUE;
3378           if (inattribute)
3379             break;
3380           switch (fvdef)
3381             {
3382             case fdefunkey:
3383             case fstartlist:
3384             case finlist:
3385             case fignore:
3386             case vignore:
3387               break;
3388             default:
3389               fvextern = FALSE;
3390               fvdef = fvnone;
3391             }
3392           continue;
3393         case '\'':
3394           inchar = TRUE;
3395           if (inattribute)
3396             break;
3397           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3398             {
3399               fvextern = FALSE;
3400               fvdef = fvnone;
3401             }
3402           continue;
3403         case '/':
3404           if (*lp == '*')
3405             {
3406               incomm = TRUE;
3407               lp++;
3408               c = ' ';
3409             }
3410           else if (/* cplpl && */ *lp == '/')
3411             {
3412               c = '\0';
3413             }
3414           break;
3415         case '%':
3416           if ((c_ext & YACC) && *lp == '%')
3417             {
3418               /* Entering or exiting rules section in yacc file. */
3419               lp++;
3420               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3421               typdef = tnone; structdef = snone;
3422               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3423               bracelev = 0;
3424               yacc_rules = !yacc_rules;
3425               continue;
3426             }
3427           else
3428             break;
3429         case '#':
3430           if (definedef == dnone)
3431             {
3432               char *cp;
3433               bool cpptoken = TRUE;
3434
3435               /* Look back on this line.  If all blanks, or nonblanks
3436                  followed by an end of comment, this is a preprocessor
3437                  token. */
3438               for (cp = newlb.buffer; cp < lp-1; cp++)
3439                 if (!iswhite (*cp))
3440                   {
3441                     if (*cp == '*' && *(cp+1) == '/')
3442                       {
3443                         cp++;
3444                         cpptoken = TRUE;
3445                       }
3446                     else
3447                       cpptoken = FALSE;
3448                   }
3449               if (cpptoken)
3450                 definedef = dsharpseen;
3451             } /* if (definedef == dnone) */
3452           continue;
3453         case '[':
3454           bracketlev++;
3455             continue;
3456         } /* switch (c) */
3457
3458
3459       /* Consider token only if some involved conditions are satisfied. */
3460       if (typdef != tignore
3461           && definedef != dignorerest
3462           && fvdef != finlist
3463           && templatelev == 0
3464           && (definedef != dnone
3465               || structdef != scolonseen)
3466           && !inattribute)
3467         {
3468           if (midtoken)
3469             {
3470               if (endtoken (c))
3471                 {
3472                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3473                     /* This handles :: in the middle,
3474                        but not at the beginning of an identifier.
3475                        Also, space-separated :: is not recognised. */
3476                     {
3477                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3478                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3479                       lp += 2;
3480                       toklen += 2;
3481                       c = lp[-1];
3482                       goto still_in_token;
3483                     }
3484                   else
3485                     {
3486                       bool funorvar = FALSE;
3487
3488                       if (yacc_rules
3489                           || consider_token (newlb.buffer + tokoff, toklen, c,
3490                                              &c_ext, bracelev, parlev,
3491                                              &funorvar))
3492                         {
3493                           if (fvdef == foperator)
3494                             {
3495                               char *oldlp = lp;
3496                               lp = skip_spaces (lp-1);
3497                               if (*lp != '\0')
3498                                 lp += 1;
3499                               while (*lp != '\0'
3500                                      && !iswhite (*lp) && *lp != '(')
3501                                 lp += 1;
3502                               c = *lp++;
3503                               toklen += lp - oldlp;
3504                             }
3505                           token.named = FALSE;
3506                           if (!plainc
3507                               && nestlev > 0 && definedef == dnone)
3508                             /* in struct body */
3509                             {
3510                               write_classname (&token_name, qualifier);
3511                               linebuffer_setlen (&token_name,
3512                                                  token_name.len+qlen+toklen);
3513                               strcat (token_name.buffer, qualifier);
3514                               strncat (token_name.buffer,
3515                                        newlb.buffer + tokoff, toklen);
3516                               token.named = TRUE;
3517                             }
3518                           else if (objdef == ocatseen)
3519                             /* Objective C category */
3520                             {
3521                               int len = strlen (objtag) + 2 + toklen;
3522                               linebuffer_setlen (&token_name, len);
3523                               strcpy (token_name.buffer, objtag);
3524                               strcat (token_name.buffer, "(");
3525                               strncat (token_name.buffer,
3526                                        newlb.buffer + tokoff, toklen);
3527                               strcat (token_name.buffer, ")");
3528                               token.named = TRUE;
3529                             }
3530                           else if (objdef == omethodtag
3531                                    || objdef == omethodparm)
3532                             /* Objective C method */
3533                             {
3534                               token.named = TRUE;
3535                             }
3536                           else if (fvdef == fdefunname)
3537                             /* GNU DEFUN and similar macros */
3538                             {
3539                               bool defun = (newlb.buffer[tokoff] == 'F');
3540                               int off = tokoff;
3541                               int len = toklen;
3542
3543                               /* Rewrite the tag so that emacs lisp DEFUNs
3544                                  can be found by their elisp name */
3545                               if (defun)
3546                                 {
3547                                   off += 1;
3548                                   len -= 1;
3549                                 }
3550                               linebuffer_setlen (&token_name, len);
3551                               strncpy (token_name.buffer,
3552                                        newlb.buffer + off, len);
3553                               token_name.buffer[len] = '\0';
3554                               if (defun)
3555                                 while (--len >= 0)
3556                                   if (token_name.buffer[len] == '_')
3557                                     token_name.buffer[len] = '-';
3558                               token.named = defun;
3559                             }
3560                           else
3561                             {
3562                               linebuffer_setlen (&token_name, toklen);
3563                               strncpy (token_name.buffer,
3564                                        newlb.buffer + tokoff, toklen);
3565                               token_name.buffer[toklen] = '\0';
3566                               /* Name macros and members. */
3567                               token.named = (structdef == stagseen
3568                                              || typdef == ttypeseen
3569                                              || typdef == tend
3570                                              || (funorvar
3571                                                  && definedef == dignorerest)
3572                                              || (funorvar
3573                                                  && definedef == dnone
3574                                                  && structdef == snone
3575                                                  && bracelev > 0));
3576                             }
3577                           token.lineno = lineno;
3578                           token.offset = tokoff;
3579                           token.length = toklen;
3580                           token.line = newlb.buffer;
3581                           token.linepos = newlinepos;
3582                           token.valid = TRUE;
3583
3584                           if (definedef == dnone
3585                               && (fvdef == fvnameseen
3586                                   || fvdef == foperator
3587                                   || structdef == stagseen
3588                                   || typdef == tend
3589                                   || typdef == ttypeseen
3590                                   || objdef != onone))
3591                             {
3592                               if (current_lb_is_new)
3593                                 switch_line_buffers ();
3594                             }
3595                           else if (definedef != dnone
3596                                    || fvdef == fdefunname
3597                                    || instruct)
3598                             make_C_tag (funorvar);
3599                         }
3600                       else /* not yacc and consider_token failed */
3601                         {
3602                           if (inattribute && fvdef == fignore)
3603                             {
3604                               /* We have just met __attribute__ after a
3605                                  function parameter list: do not tag the
3606                                  function again. */
3607                               fvdef = fvnone;
3608                             }
3609                         }
3610                       midtoken = FALSE;
3611                     }
3612                 } /* if (endtoken (c)) */
3613               else if (intoken (c))
3614                 still_in_token:
3615                 {
3616                   toklen++;
3617                   continue;
3618                 }
3619             } /* if (midtoken) */
3620           else if (begtoken (c))
3621             {
3622               switch (definedef)
3623                 {
3624                 case dnone:
3625                   switch (fvdef)
3626                     {
3627                     case fstartlist:
3628                       /* This prevents tagging fb in
3629                          void (__attribute__((noreturn)) *fb) (void);
3630                          Fixing this is not easy and not very important. */
3631                       fvdef = finlist;
3632                       continue;
3633                     case flistseen:
3634                       if (plainc || declarations)
3635                         {
3636                           make_C_tag (TRUE); /* a function */
3637                           fvdef = fignore;
3638                         }
3639                       break;
3640                     }
3641                   if (structdef == stagseen && !cjava)
3642                     {
3643                       popclass_above (bracelev);
3644                       structdef = snone;
3645                     }
3646                   break;
3647                 case dsharpseen:
3648                   savetoken = token;
3649                   break;
3650                 }
3651               if (!yacc_rules || lp == newlb.buffer + 1)
3652                 {
3653                   tokoff = lp - 1 - newlb.buffer;
3654                   toklen = 1;
3655                   midtoken = TRUE;
3656                 }
3657               continue;
3658             } /* if (begtoken) */
3659         } /* if must look at token */
3660
3661
3662       /* Detect end of line, colon, comma, semicolon and various braces
3663          after having handled a token.*/
3664       switch (c)
3665         {
3666         case ':':
3667           if (inattribute)
3668             break;
3669           if (yacc_rules && token.offset == 0 && token.valid)
3670             {
3671               make_C_tag (FALSE); /* a yacc function */
3672               break;
3673             }
3674           if (definedef != dnone)
3675             break;
3676           switch (objdef)
3677             {
3678             case  otagseen:
3679               objdef = oignore;
3680               make_C_tag (TRUE); /* an Objective C class */
3681               break;
3682             case omethodtag:
3683             case omethodparm:
3684               objdef = omethodcolon;
3685               linebuffer_setlen (&token_name, token_name.len + 1);
3686               strcat (token_name.buffer, ":");
3687               break;
3688             }
3689           if (structdef == stagseen)
3690             {
3691               structdef = scolonseen;
3692               break;
3693             }
3694           /* Should be useless, but may be work as a safety net. */
3695           if (cplpl && fvdef == flistseen)
3696             {
3697               make_C_tag (TRUE); /* a function */
3698               fvdef = fignore;
3699               break;
3700             }
3701           break;
3702         case ';':
3703           if (definedef != dnone || inattribute)
3704             break;
3705           switch (typdef)
3706             {
3707             case tend:
3708             case ttypeseen:
3709               make_C_tag (FALSE); /* a typedef */
3710               typdef = tnone;
3711               fvdef = fvnone;
3712               break;
3713             case tnone:
3714             case tinbody:
3715             case tignore:
3716               switch (fvdef)
3717                 {
3718                 case fignore:
3719                   if (typdef == tignore || cplpl)
3720                     fvdef = fvnone;
3721                   break;
3722                 case fvnameseen:
3723                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3724                       || (members && instruct))
3725                     make_C_tag (FALSE); /* a variable */
3726                   fvextern = FALSE;
3727                   fvdef = fvnone;
3728                   token.valid = FALSE;
3729                   break;
3730                 case flistseen:
3731                   if ((declarations
3732                        && (cplpl || !instruct)
3733                        && (typdef == tnone || (typdef != tignore && instruct)))
3734                       || (members
3735                           && plainc && instruct))
3736                     make_C_tag (TRUE);  /* a function */
3737                   /* FALLTHRU */
3738                 default:
3739                   fvextern = FALSE;
3740                   fvdef = fvnone;
3741                   if (declarations
3742                        && cplpl && structdef == stagseen)
3743                     make_C_tag (FALSE); /* forward declaration */
3744                   else
3745                     token.valid = FALSE;
3746                 } /* switch (fvdef) */
3747               /* FALLTHRU */
3748             default:
3749               if (!instruct)
3750                 typdef = tnone;
3751             }
3752           if (structdef == stagseen)
3753             structdef = snone;
3754           break;
3755         case ',':
3756           if (definedef != dnone || inattribute)
3757             break;
3758           switch (objdef)
3759             {
3760             case omethodtag:
3761             case omethodparm:
3762               make_C_tag (TRUE); /* an Objective C method */
3763               objdef = oinbody;
3764               break;
3765             }
3766           switch (fvdef)
3767             {
3768             case fdefunkey:
3769             case foperator:
3770             case fstartlist:
3771             case finlist:
3772             case fignore:
3773             case vignore:
3774               break;
3775             case fdefunname:
3776               fvdef = fignore;
3777               break;
3778             case fvnameseen:
3779               if (parlev == 0
3780                   && ((globals
3781                        && bracelev == 0
3782                        && templatelev == 0
3783                        && (!fvextern || declarations))
3784                       || (members && instruct)))
3785                   make_C_tag (FALSE); /* a variable */
3786               break;
3787             case flistseen:
3788               if ((declarations && typdef == tnone && !instruct)
3789                   || (members && typdef != tignore && instruct))
3790                 {
3791                   make_C_tag (TRUE); /* a function */
3792                   fvdef = fvnameseen;
3793                 }
3794               else if (!declarations)
3795                 fvdef = fvnone;
3796               token.valid = FALSE;
3797               break;
3798             default:
3799               fvdef = fvnone;
3800             }
3801           if (structdef == stagseen)
3802             structdef = snone;
3803           break;
3804         case ']':
3805           if (definedef != dnone || inattribute)
3806             break;
3807           if (structdef == stagseen)
3808             structdef = snone;
3809           switch (typdef)
3810             {
3811             case ttypeseen:
3812             case tend:
3813               typdef = tignore;
3814               make_C_tag (FALSE);       /* a typedef */
3815               break;
3816             case tnone:
3817             case tinbody:
3818               switch (fvdef)
3819                 {
3820                 case foperator:
3821                 case finlist:
3822                 case fignore:
3823                 case vignore:
3824                   break;
3825                 case fvnameseen:
3826                   if ((members && bracelev == 1)
3827                       || (globals && bracelev == 0
3828                           && (!fvextern || declarations)))
3829                     make_C_tag (FALSE); /* a variable */
3830                   /* FALLTHRU */
3831                 default:
3832                   fvdef = fvnone;
3833                 }
3834               break;
3835             }
3836           break;
3837         case '(':
3838           if (inattribute)
3839             {
3840               attrparlev++;
3841               break;
3842             }
3843           if (definedef != dnone)
3844             break;
3845           if (objdef == otagseen && parlev == 0)
3846             objdef = oparenseen;
3847           switch (fvdef)
3848             {
3849             case fvnameseen:
3850               if (typdef == ttypeseen
3851                   && *lp != '*'
3852                   && !instruct)
3853                 {
3854                   /* This handles constructs like:
3855                      typedef void OperatorFun (int fun); */
3856                   make_C_tag (FALSE);
3857                   typdef = tignore;
3858                   fvdef = fignore;
3859                   break;
3860                 }
3861               /* FALLTHRU */
3862             case foperator:
3863               fvdef = fstartlist;
3864               break;
3865             case flistseen:
3866               fvdef = finlist;
3867               break;
3868             }
3869           parlev++;
3870           break;
3871         case ')':
3872           if (inattribute)
3873             {
3874               if (--attrparlev == 0)
3875                 inattribute = FALSE;
3876               break;
3877             }
3878           if (definedef != dnone)
3879             break;
3880           if (objdef == ocatseen && parlev == 1)
3881             {
3882               make_C_tag (TRUE); /* an Objective C category */
3883               objdef = oignore;
3884             }
3885           if (--parlev == 0)
3886             {
3887               switch (fvdef)
3888                 {
3889                 case fstartlist:
3890                 case finlist:
3891                   fvdef = flistseen;
3892                   break;
3893                 }
3894               if (!instruct
3895                   && (typdef == tend
3896                       || typdef == ttypeseen))
3897                 {
3898                   typdef = tignore;
3899                   make_C_tag (FALSE); /* a typedef */
3900                 }
3901             }
3902           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3903             parlev = 0;
3904           break;
3905         case '{':
3906           if (definedef != dnone)
3907             break;
3908           if (typdef == ttypeseen)
3909             {
3910               /* Whenever typdef is set to tinbody (currently only
3911                  here), typdefbracelev should be set to bracelev. */
3912               typdef = tinbody;
3913               typdefbracelev = bracelev;
3914             }
3915           switch (fvdef)
3916             {
3917             case flistseen:
3918               make_C_tag (TRUE);    /* a function */
3919               /* FALLTHRU */
3920             case fignore:
3921               fvdef = fvnone;
3922               break;
3923             case fvnone:
3924               switch (objdef)
3925                 {
3926                 case otagseen:
3927                   make_C_tag (TRUE); /* an Objective C class */
3928                   objdef = oignore;
3929                   break;
3930                 case omethodtag:
3931                 case omethodparm:
3932                   make_C_tag (TRUE); /* an Objective C method */
3933                   objdef = oinbody;
3934                   break;
3935                 default:
3936                   /* Neutralize `extern "C" {' grot. */
3937                   if (bracelev == 0 && structdef == snone && nestlev == 0
3938                       && typdef == tnone)
3939                     bracelev = -1;
3940                 }
3941               break;
3942             }
3943           switch (structdef)
3944             {
3945             case skeyseen:         /* unnamed struct */
3946               pushclass_above (bracelev, NULL, 0);
3947               structdef = snone;
3948               break;
3949             case stagseen:         /* named struct or enum */
3950             case scolonseen:       /* a class */
3951               pushclass_above (bracelev,token.line+token.offset, token.length);
3952               structdef = snone;
3953               make_C_tag (FALSE);  /* a struct or enum */
3954               break;
3955             }
3956           bracelev += 1;
3957           break;
3958         case '*':
3959           if (definedef != dnone)
3960             break;
3961           if (fvdef == fstartlist)
3962             {
3963               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3964               token.valid = FALSE;
3965             }
3966           break;
3967         case '}':
3968           if (definedef != dnone)
3969             break;
3970           bracelev -= 1;
3971           if (!ignoreindent && lp == newlb.buffer + 1)
3972             {
3973               if (bracelev != 0)
3974                 token.valid = FALSE; /* unexpected value, token unreliable */
3975               bracelev = 0;     /* reset brace level if first column */
3976               parlev = 0;       /* also reset paren level, just in case... */
3977             }
3978           else if (bracelev < 0)
3979             {
3980             token.valid = FALSE; /* something gone amiss, token unreliable */
3981               bracelev = 0;
3982             }
3983           if (bracelev == 0 && fvdef == vignore)
3984             fvdef = fvnone;             /* end of function */
3985           popclass_above (bracelev);
3986           structdef = snone;
3987           /* Only if typdef == tinbody is typdefbracelev significant. */
3988           if (typdef == tinbody && bracelev <= typdefbracelev)
3989             {
3990               assert (bracelev == typdefbracelev);
3991               typdef = tend;
3992             }
3993           break;
3994         case '=':
3995           if (definedef != dnone)
3996             break;
3997           switch (fvdef)
3998             {
3999             case foperator:
4000             case finlist:
4001             case fignore:
4002             case vignore:
4003               break;
4004             case fvnameseen:
4005               if ((members && bracelev == 1)
4006                   || (globals && bracelev == 0 && (!fvextern || declarations)))
4007                 make_C_tag (FALSE); /* a variable */
4008               /* FALLTHRU */
4009             default:
4010               fvdef = vignore;
4011             }
4012           break;
4013         case '<':
4014           if (cplpl
4015               && (structdef == stagseen || fvdef == fvnameseen))
4016             {
4017               templatelev++;
4018               break;
4019             }
4020           goto resetfvdef;
4021         case '>':
4022           if (templatelev > 0)
4023             {
4024               templatelev--;
4025               break;
4026             }
4027           goto resetfvdef;
4028         case '+':
4029         case '-':
4030           if (objdef == oinbody && bracelev == 0)
4031             {
4032               objdef = omethodsign;
4033               break;
4034             }
4035           /* FALLTHRU */
4036         resetfvdef:
4037         case '#': case '~': case '&': case '%': case '/':
4038         case '|': case '^': case '!': case '.': case '?':
4039           if (definedef != dnone)
4040             break;
4041           /* These surely cannot follow a function tag in C. */
4042           switch (fvdef)
4043             {
4044             case foperator:
4045             case finlist:
4046             case fignore:
4047             case vignore:
4048               break;
4049             default:
4050               fvdef = fvnone;
4051             }
4052           break;
4053         case '\0':
4054           if (objdef == otagseen)
4055             {
4056               make_C_tag (TRUE); /* an Objective C class */
4057               objdef = oignore;
4058             }
4059           /* If a macro spans multiple lines don't reset its state. */
4060           if (quotednl)
4061             CNL_SAVE_DEFINEDEF ();
4062           else
4063             CNL ();
4064           break;
4065         } /* switch (c) */
4066
4067     } /* while not eof */
4068
4069   free (lbs[0].lb.buffer);
4070   free (lbs[1].lb.buffer);
4071 }
4072
4073 /*
4074  * Process either a C++ file or a C file depending on the setting
4075  * of a global flag.
4076  */
4077 static void
4078 default_C_entries (inf)
4079      FILE *inf;
4080 {
4081   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4082 }
4083
4084 /* Always do plain C. */
4085 static void
4086 plain_C_entries (inf)
4087      FILE *inf;
4088 {
4089   C_entries (0, inf);
4090 }
4091
4092 /* Always do C++. */
4093 static void
4094 Cplusplus_entries (inf)
4095      FILE *inf;
4096 {
4097   C_entries (C_PLPL, inf);
4098 }
4099
4100 /* Always do Java. */
4101 static void
4102 Cjava_entries (inf)
4103      FILE *inf;
4104 {
4105   C_entries (C_JAVA, inf);
4106 }
4107
4108 /* Always do C*. */
4109 static void
4110 Cstar_entries (inf)
4111      FILE *inf;
4112 {
4113   C_entries (C_STAR, inf);
4114 }
4115
4116 /* Always do Yacc. */
4117 static void
4118 Yacc_entries (inf)
4119      FILE *inf;
4120 {
4121   C_entries (YACC, inf);
4122 }
4123
4124 \f
4125 /* Useful macros. */
4126 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4127   for (;                        /* loop initialization */               \
4128        !feof (file_pointer)     /* loop test */                         \
4129        &&                       /* instructions at start of loop */     \
4130           (readline (&line_buffer, file_pointer),                       \
4131            char_pointer = line_buffer.buffer,                           \
4132            TRUE);                                                       \
4133       )
4134
4135 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4136   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4137    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4138    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4139    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4140
4141 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4142 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4143   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4144    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4145    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4146
4147 /*
4148  * Read a file, but do no processing.  This is used to do regexp
4149  * matching on files that have no language defined.
4150  */
4151 static void
4152 just_read_file (inf)
4153      FILE *inf;
4154 {
4155   register char *dummy;
4156
4157   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4158     continue;
4159 }
4160
4161 \f
4162 /* Fortran parsing */
4163
4164 static void F_takeprec __P((void));
4165 static void F_getit __P((FILE *));
4166
4167 static void
4168 F_takeprec ()
4169 {
4170   dbp = skip_spaces (dbp);
4171   if (*dbp != '*')
4172     return;
4173   dbp++;
4174   dbp = skip_spaces (dbp);
4175   if (strneq (dbp, "(*)", 3))
4176     {
4177       dbp += 3;
4178       return;
4179     }
4180   if (!ISDIGIT (*dbp))
4181     {
4182       --dbp;                    /* force failure */
4183       return;
4184     }
4185   do
4186     dbp++;
4187   while (ISDIGIT (*dbp));
4188 }
4189
4190 static void
4191 F_getit (inf)
4192      FILE *inf;
4193 {
4194   register char *cp;
4195
4196   dbp = skip_spaces (dbp);
4197   if (*dbp == '\0')
4198     {
4199       readline (&lb, inf);
4200       dbp = lb.buffer;
4201       if (dbp[5] != '&')
4202         return;
4203       dbp += 6;
4204       dbp = skip_spaces (dbp);
4205     }
4206   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4207     return;
4208   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4209     continue;
4210   make_tag (dbp, cp-dbp, TRUE,
4211             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4212 }
4213
4214
4215 static void
4216 Fortran_functions (inf)
4217      FILE *inf;
4218 {
4219   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4220     {
4221       if (*dbp == '%')
4222         dbp++;                  /* Ratfor escape to fortran */
4223       dbp = skip_spaces (dbp);
4224       if (*dbp == '\0')
4225         continue;
4226       switch (lowcase (*dbp))
4227         {
4228         case 'i':
4229           if (nocase_tail ("integer"))
4230             F_takeprec ();
4231           break;
4232         case 'r':
4233           if (nocase_tail ("real"))
4234             F_takeprec ();
4235           break;
4236         case 'l':
4237           if (nocase_tail ("logical"))
4238             F_takeprec ();
4239           break;
4240         case 'c':
4241           if (nocase_tail ("complex") || nocase_tail ("character"))
4242             F_takeprec ();
4243           break;
4244         case 'd':
4245           if (nocase_tail ("double"))
4246             {
4247               dbp = skip_spaces (dbp);
4248               if (*dbp == '\0')
4249                 continue;
4250               if (nocase_tail ("precision"))
4251                 break;
4252               continue;
4253             }
4254           break;
4255         }
4256       dbp = skip_spaces (dbp);
4257       if (*dbp == '\0')
4258         continue;
4259       switch (lowcase (*dbp))
4260         {
4261         case 'f':
4262           if (nocase_tail ("function"))
4263             F_getit (inf);
4264           continue;
4265         case 's':
4266           if (nocase_tail ("subroutine"))
4267             F_getit (inf);
4268           continue;
4269         case 'e':
4270           if (nocase_tail ("entry"))
4271             F_getit (inf);
4272           continue;
4273         case 'b':
4274           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4275             {
4276               dbp = skip_spaces (dbp);
4277               if (*dbp == '\0') /* assume un-named */
4278                 make_tag ("blockdata", 9, TRUE,
4279                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4280               else
4281                 F_getit (inf);  /* look for name */
4282             }
4283           continue;
4284         }
4285     }
4286 }
4287
4288 \f
4289 /*
4290  * Ada parsing
4291  * Original code by
4292  * Philippe Waroquiers (1998)
4293  */
4294
4295 static void Ada_getit __P((FILE *, char *));
4296
4297 /* Once we are positioned after an "interesting" keyword, let's get
4298    the real tag value necessary. */
4299 static void
4300 Ada_getit (inf, name_qualifier)
4301      FILE *inf;
4302      char *name_qualifier;
4303 {
4304   register char *cp;
4305   char *name;
4306   char c;
4307
4308   while (!feof (inf))
4309     {
4310       dbp = skip_spaces (dbp);
4311       if (*dbp == '\0'
4312           || (dbp[0] == '-' && dbp[1] == '-'))
4313         {
4314           readline (&lb, inf);
4315           dbp = lb.buffer;
4316         }
4317       switch (lowcase(*dbp))
4318         {
4319         case 'b':
4320           if (nocase_tail ("body"))
4321             {
4322               /* Skipping body of   procedure body   or   package body or ....
4323                  resetting qualifier to body instead of spec. */
4324               name_qualifier = "/b";
4325               continue;
4326             }
4327           break;
4328         case 't':
4329           /* Skipping type of   task type   or   protected type ... */
4330           if (nocase_tail ("type"))
4331             continue;
4332           break;
4333         }
4334       if (*dbp == '"')
4335         {
4336           dbp += 1;
4337           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4338             continue;
4339         }
4340       else
4341         {
4342           dbp = skip_spaces (dbp);
4343           for (cp = dbp;
4344                (*cp != '\0'
4345                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4346                cp++)
4347             continue;
4348           if (cp == dbp)
4349             return;
4350         }
4351       c = *cp;
4352       *cp = '\0';
4353       name = concat (dbp, name_qualifier, "");
4354       *cp = c;
4355       make_tag (name, strlen (name), TRUE,
4356                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4357       free (name);
4358       if (c == '"')
4359         dbp = cp + 1;
4360       return;
4361     }
4362 }
4363
4364 static void
4365 Ada_funcs (inf)
4366      FILE *inf;
4367 {
4368   bool inquote = FALSE;
4369   bool skip_till_semicolumn = FALSE;
4370
4371   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4372     {
4373       while (*dbp != '\0')
4374         {
4375           /* Skip a string i.e. "abcd". */
4376           if (inquote || (*dbp == '"'))
4377             {
4378               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4379               if (dbp != NULL)
4380                 {
4381                   inquote = FALSE;
4382                   dbp += 1;
4383                   continue;     /* advance char */
4384                 }
4385               else
4386                 {
4387                   inquote = TRUE;
4388                   break;        /* advance line */
4389                 }
4390             }
4391
4392           /* Skip comments. */
4393           if (dbp[0] == '-' && dbp[1] == '-')
4394             break;              /* advance line */
4395
4396           /* Skip character enclosed in single quote i.e. 'a'
4397              and skip single quote starting an attribute i.e. 'Image. */
4398           if (*dbp == '\'')
4399             {
4400               dbp++ ;
4401               if (*dbp != '\0')
4402                 dbp++;
4403               continue;
4404             }
4405
4406           if (skip_till_semicolumn)
4407             {
4408               if (*dbp == ';')
4409                 skip_till_semicolumn = FALSE;
4410               dbp++;
4411               continue;         /* advance char */
4412             }
4413
4414           /* Search for beginning of a token.  */
4415           if (!begtoken (*dbp))
4416             {
4417               dbp++;
4418               continue;         /* advance char */
4419             }
4420
4421           /* We are at the beginning of a token. */
4422           switch (lowcase(*dbp))
4423             {
4424             case 'f':
4425               if (!packages_only && nocase_tail ("function"))
4426                 Ada_getit (inf, "/f");
4427               else
4428                 break;          /* from switch */
4429               continue;         /* advance char */
4430             case 'p':
4431               if (!packages_only && nocase_tail ("procedure"))
4432                 Ada_getit (inf, "/p");
4433               else if (nocase_tail ("package"))
4434                 Ada_getit (inf, "/s");
4435               else if (nocase_tail ("protected")) /* protected type */
4436                 Ada_getit (inf, "/t");
4437               else
4438                 break;          /* from switch */
4439               continue;         /* advance char */
4440
4441             case 'u':
4442               if (typedefs && !packages_only && nocase_tail ("use"))
4443                 {
4444                   /* when tagging types, avoid tagging  use type Pack.Typename;
4445                      for this, we will skip everything till a ; */
4446                   skip_till_semicolumn = TRUE;
4447                   continue;     /* advance char */
4448                 }
4449
4450             case 't':
4451               if (!packages_only && nocase_tail ("task"))
4452                 Ada_getit (inf, "/k");
4453               else if (typedefs && !packages_only && nocase_tail ("type"))
4454                 {
4455                   Ada_getit (inf, "/t");
4456                   while (*dbp != '\0')
4457                     dbp += 1;
4458                 }
4459               else
4460                 break;          /* from switch */
4461               continue;         /* advance char */
4462             }
4463
4464           /* Look for the end of the token. */
4465           while (!endtoken (*dbp))
4466             dbp++;
4467
4468         } /* advance char */
4469     } /* advance line */
4470 }
4471
4472 \f
4473 /*
4474  * Unix and microcontroller assembly tag handling
4475  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4476  * Idea by Bob Weiner, Motorola Inc. (1994)
4477  */
4478 static void
4479 Asm_labels (inf)
4480      FILE *inf;
4481 {
4482   register char *cp;
4483
4484   LOOP_ON_INPUT_LINES (inf, lb, cp)
4485     {
4486       /* If first char is alphabetic or one of [_.$], test for colon
4487          following identifier. */
4488       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4489         {
4490           /* Read past label. */
4491           cp++;
4492           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4493             cp++;
4494           if (*cp == ':' || iswhite (*cp))
4495             /* Found end of label, so copy it and add it to the table. */
4496             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4497                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4498         }
4499     }
4500 }
4501
4502 \f
4503 /*
4504  * Perl support
4505  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4506  * Perl variable names: /^(my|local).../
4507  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4508  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4509  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4510  */
4511 static void
4512 Perl_functions (inf)
4513      FILE *inf;
4514 {
4515   char *package = savestr ("main"); /* current package name */
4516   register char *cp;
4517
4518   LOOP_ON_INPUT_LINES (inf, lb, cp)
4519     {
4520       skip_spaces(cp);
4521
4522       if (LOOKING_AT (cp, "package"))
4523         {
4524           free (package);
4525           get_tag (cp, &package);
4526         }
4527       else if (LOOKING_AT (cp, "sub"))
4528         {
4529           char *pos;
4530           char *sp = cp;
4531
4532           while (!notinname (*cp))
4533             cp++;
4534           if (cp == sp)
4535             continue;           /* nothing found */
4536           if ((pos = etags_strchr (sp, ':')) != NULL
4537               && pos < cp && pos[1] == ':')
4538             /* The name is already qualified. */
4539             make_tag (sp, cp - sp, TRUE,
4540                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4541           else
4542             /* Qualify it. */
4543             {
4544               char savechar, *name;
4545
4546               savechar = *cp;
4547               *cp = '\0';
4548               name = concat (package, "::", sp);
4549               *cp = savechar;
4550               make_tag (name, strlen(name), TRUE,
4551                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4552               free (name);
4553             }
4554         }
4555        else if (globals)        /* only if we are tagging global vars */
4556         {
4557           /* Skip a qualifier, if any. */
4558           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4559           /* After "my" or "local", but before any following paren or space. */
4560           char *varstart = cp;
4561
4562           if (qual              /* should this be removed?  If yes, how? */
4563               && (*cp == '$' || *cp == '@' || *cp == '%'))
4564             {
4565               varstart += 1;
4566               do
4567                 cp++;
4568               while (ISALNUM (*cp) || *cp == '_');
4569             }
4570           else if (qual)
4571             {
4572               /* Should be examining a variable list at this point;
4573                  could insist on seeing an open parenthesis. */
4574               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4575                 cp++;
4576             }
4577           else
4578             continue;
4579
4580           make_tag (varstart, cp - varstart, FALSE,
4581                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4582         }
4583     }
4584   free (package);
4585 }
4586
4587
4588 /*
4589  * Python support
4590  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4591  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4592  * More ideas by seb bacon <seb@jamkit.com> (2002)
4593  */
4594 static void
4595 Python_functions (inf)
4596      FILE *inf;
4597 {
4598   register char *cp;
4599
4600   LOOP_ON_INPUT_LINES (inf, lb, cp)
4601     {
4602       cp = skip_spaces (cp);
4603       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4604         {
4605           char *name = cp;
4606           while (!notinname (*cp) && *cp != ':')
4607             cp++;
4608           make_tag (name, cp - name, TRUE,
4609                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4610         }
4611     }
4612 }
4613
4614 \f
4615 /*
4616  * PHP support
4617  * Look for:
4618  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4619  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4620  *  - /^[ \t]*define\(\"[^\"]+/
4621  * Only with --members:
4622  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4623  * Idea by Diez B. Roggisch (2001)
4624  */
4625 static void
4626 PHP_functions (inf)
4627      FILE *inf;
4628 {
4629   register char *cp, *name;
4630   bool search_identifier = FALSE;
4631
4632   LOOP_ON_INPUT_LINES (inf, lb, cp)
4633     {
4634       cp = skip_spaces (cp);
4635       name = cp;
4636       if (search_identifier
4637           && *cp != '\0')
4638         {
4639           while (!notinname (*cp))
4640             cp++;
4641           make_tag (name, cp - name, TRUE,
4642                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4643           search_identifier = FALSE;
4644         }
4645       else if (LOOKING_AT (cp, "function"))
4646         {
4647           if(*cp == '&')
4648             cp = skip_spaces (cp+1);
4649           if(*cp != '\0')
4650             {
4651               name = cp;
4652               while (!notinname (*cp))
4653                 cp++;
4654               make_tag (name, cp - name, TRUE,
4655                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4656             }
4657           else
4658             search_identifier = TRUE;
4659         }
4660       else if (LOOKING_AT (cp, "class"))
4661         {
4662           if (*cp != '\0')
4663             {
4664               name = cp;
4665               while (*cp != '\0' && !iswhite (*cp))
4666                 cp++;
4667               make_tag (name, cp - name, FALSE,
4668                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4669             }
4670           else
4671             search_identifier = TRUE;
4672         }
4673       else if (strneq (cp, "define", 6)
4674                && (cp = skip_spaces (cp+6))
4675                && *cp++ == '('
4676                && (*cp == '"' || *cp == '\''))
4677         {
4678           char quote = *cp++;
4679           name = cp;
4680           while (*cp != quote && *cp != '\0')
4681             cp++;
4682           make_tag (name, cp - name, FALSE,
4683                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4684         }
4685       else if (members
4686                && LOOKING_AT (cp, "var")
4687                && *cp == '$')
4688         {
4689           name = cp;
4690           while (!notinname(*cp))
4691             cp++;
4692           make_tag (name, cp - name, FALSE,
4693                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4694         }
4695     }
4696 }
4697
4698 \f
4699 /*
4700  * Cobol tag functions
4701  * We could look for anything that could be a paragraph name.
4702  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4703  * Idea by Corny de Souza (1993)
4704  */
4705 static void
4706 Cobol_paragraphs (inf)
4707      FILE *inf;
4708 {
4709   register char *bp, *ep;
4710
4711   LOOP_ON_INPUT_LINES (inf, lb, bp)
4712     {
4713       if (lb.len < 9)
4714         continue;
4715       bp += 8;
4716
4717       /* If eoln, compiler option or comment ignore whole line. */
4718       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4719         continue;
4720
4721       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4722         continue;
4723       if (*ep++ == '.')
4724         make_tag (bp, ep - bp, TRUE,
4725                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4726     }
4727 }
4728
4729 \f
4730 /*
4731  * Makefile support
4732  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4733  */
4734 static void
4735 Makefile_targets (inf)
4736      FILE *inf;
4737 {
4738   register char *bp;
4739
4740   LOOP_ON_INPUT_LINES (inf, lb, bp)
4741     {
4742       if (*bp == '\t' || *bp == '#')
4743         continue;
4744       while (*bp != '\0' && *bp != '=' && *bp != ':')
4745         bp++;
4746       if (*bp == ':' || (globals && *bp == '='))
4747         {
4748           /* We should detect if there is more than one tag, but we do not.
4749              We just skip initial and final spaces. */
4750           char * namestart = skip_spaces (lb.buffer);
4751           while (--bp > namestart)
4752             if (!notinname (*bp))
4753               break;
4754           make_tag (namestart, bp - namestart + 1, TRUE,
4755                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4756         }
4757     }
4758 }
4759
4760 \f
4761 /*
4762  * Pascal parsing
4763  * Original code by Mosur K. Mohan (1989)
4764  *
4765  *  Locates tags for procedures & functions.  Doesn't do any type- or
4766  *  var-definitions.  It does look for the keyword "extern" or
4767  *  "forward" immediately following the procedure statement; if found,
4768  *  the tag is skipped.
4769  */
4770 static void
4771 Pascal_functions (inf)
4772      FILE *inf;
4773 {
4774   linebuffer tline;             /* mostly copied from C_entries */
4775   long save_lcno;
4776   int save_lineno, namelen, taglen;
4777   char c, *name;
4778
4779   bool                          /* each of these flags is TRUE if: */
4780     incomment,                  /* point is inside a comment */
4781     inquote,                    /* point is inside '..' string */
4782     get_tagname,                /* point is after PROCEDURE/FUNCTION
4783                                    keyword, so next item = potential tag */
4784     found_tag,                  /* point is after a potential tag */
4785     inparms,                    /* point is within parameter-list */
4786     verify_tag;                 /* point has passed the parm-list, so the
4787                                    next token will determine whether this
4788                                    is a FORWARD/EXTERN to be ignored, or
4789                                    whether it is a real tag */
4790
4791   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4792   name = NULL;                  /* keep compiler quiet */
4793   dbp = lb.buffer;
4794   *dbp = '\0';
4795   linebuffer_init (&tline);
4796
4797   incomment = inquote = FALSE;
4798   found_tag = FALSE;            /* have a proc name; check if extern */
4799   get_tagname = FALSE;          /* found "procedure" keyword         */
4800   inparms = FALSE;              /* found '(' after "proc"            */
4801   verify_tag = FALSE;           /* check if "extern" is ahead        */
4802
4803
4804   while (!feof (inf))           /* long main loop to get next char */
4805     {
4806       c = *dbp++;
4807       if (c == '\0')            /* if end of line */
4808         {
4809           readline (&lb, inf);
4810           dbp = lb.buffer;
4811           if (*dbp == '\0')
4812             continue;
4813           if (!((found_tag && verify_tag)
4814                 || get_tagname))
4815             c = *dbp++;         /* only if don't need *dbp pointing
4816                                    to the beginning of the name of
4817                                    the procedure or function */
4818         }
4819       if (incomment)
4820         {
4821           if (c == '}')         /* within { } comments */
4822             incomment = FALSE;
4823           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4824             {
4825               dbp++;
4826               incomment = FALSE;
4827             }
4828           continue;
4829         }
4830       else if (inquote)
4831         {
4832           if (c == '\'')
4833             inquote = FALSE;
4834           continue;
4835         }
4836       else
4837         switch (c)
4838           {
4839           case '\'':
4840             inquote = TRUE;     /* found first quote */
4841             continue;
4842           case '{':             /* found open { comment */
4843             incomment = TRUE;
4844             continue;
4845           case '(':
4846             if (*dbp == '*')    /* found open (* comment */
4847               {
4848                 incomment = TRUE;
4849                 dbp++;
4850               }
4851             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4852               inparms = TRUE;
4853             continue;
4854           case ')':             /* end of parms list */
4855             if (inparms)
4856               inparms = FALSE;
4857             continue;
4858           case ';':
4859             if (found_tag && !inparms) /* end of proc or fn stmt */
4860               {
4861                 verify_tag = TRUE;
4862                 break;
4863               }
4864             continue;
4865           }
4866       if (found_tag && verify_tag && (*dbp != ' '))
4867         {
4868           /* Check if this is an "extern" declaration. */
4869           if (*dbp == '\0')
4870             continue;
4871           if (lowcase (*dbp == 'e'))
4872             {
4873               if (nocase_tail ("extern")) /* superfluous, really! */
4874                 {
4875                   found_tag = FALSE;
4876                   verify_tag = FALSE;
4877                 }
4878             }
4879           else if (lowcase (*dbp) == 'f')
4880             {
4881               if (nocase_tail ("forward")) /* check for forward reference */
4882                 {
4883                   found_tag = FALSE;
4884                   verify_tag = FALSE;
4885                 }
4886             }
4887           if (found_tag && verify_tag) /* not external proc, so make tag */
4888             {
4889               found_tag = FALSE;
4890               verify_tag = FALSE;
4891               make_tag (name, namelen, TRUE,
4892                         tline.buffer, taglen, save_lineno, save_lcno);
4893               continue;
4894             }
4895         }
4896       if (get_tagname)          /* grab name of proc or fn */
4897         {
4898           char *cp;
4899
4900           if (*dbp == '\0')
4901             continue;
4902
4903           /* Find block name. */
4904           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4905             continue;
4906
4907           /* Save all values for later tagging. */
4908           linebuffer_setlen (&tline, lb.len);
4909           strcpy (tline.buffer, lb.buffer);
4910           save_lineno = lineno;
4911           save_lcno = linecharno;
4912           name = tline.buffer + (dbp - lb.buffer);
4913           namelen = cp - dbp;
4914           taglen = cp - lb.buffer + 1;
4915
4916           dbp = cp;             /* set dbp to e-o-token */
4917           get_tagname = FALSE;
4918           found_tag = TRUE;
4919           continue;
4920
4921           /* And proceed to check for "extern". */
4922         }
4923       else if (!incomment && !inquote && !found_tag)
4924         {
4925           /* Check for proc/fn keywords. */
4926           switch (lowcase (c))
4927             {
4928             case 'p':
4929               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4930                 get_tagname = TRUE;
4931               continue;
4932             case 'f':
4933               if (nocase_tail ("unction"))
4934                 get_tagname = TRUE;
4935               continue;
4936             }
4937         }
4938     } /* while not eof */
4939
4940   free (tline.buffer);
4941 }
4942
4943 \f
4944 /*
4945  * Lisp tag functions
4946  *  look for (def or (DEF, quote or QUOTE
4947  */
4948
4949 static void L_getit __P((void));
4950
4951 static void
4952 L_getit ()
4953 {
4954   if (*dbp == '\'')             /* Skip prefix quote */
4955     dbp++;
4956   else if (*dbp == '(')
4957   {
4958     dbp++;
4959     /* Try to skip "(quote " */
4960     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4961       /* Ok, then skip "(" before name in (defstruct (foo)) */
4962       dbp = skip_spaces (dbp);
4963   }
4964   get_tag (dbp, NULL);
4965 }
4966
4967 static void
4968 Lisp_functions (inf)
4969      FILE *inf;
4970 {
4971   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4972     {
4973       if (dbp[0] != '(')
4974         continue;
4975
4976       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4977         {
4978           dbp = skip_non_spaces (dbp);
4979           dbp = skip_spaces (dbp);
4980           L_getit ();
4981         }
4982       else
4983         {
4984           /* Check for (foo::defmumble name-defined ... */
4985           do
4986             dbp++;
4987           while (!notinname (*dbp) && *dbp != ':');
4988           if (*dbp == ':')
4989             {
4990               do
4991                 dbp++;
4992               while (*dbp == ':');
4993
4994               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4995                 {
4996                   dbp = skip_non_spaces (dbp);
4997                   dbp = skip_spaces (dbp);
4998                   L_getit ();
4999                 }
5000             }
5001         }
5002     }
5003 }
5004
5005 \f
5006 /*
5007  * Lua script language parsing
5008  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5009  *
5010  *  "function" and "local function" are tags if they start at column 1.
5011  */
5012 static void
5013 Lua_functions (inf)
5014      FILE *inf;
5015 {
5016   register char *bp;
5017
5018   LOOP_ON_INPUT_LINES (inf, lb, bp)
5019     {
5020       if (bp[0] != 'f' && bp[0] != 'l')
5021         continue;
5022
5023       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5024
5025       if (LOOKING_AT (bp, "function"))
5026         get_tag (bp, NULL);
5027     }
5028 }
5029
5030 \f
5031 /*
5032  * Postscript tags
5033  * Just look for lines where the first character is '/'
5034  * Also look at "defineps" for PSWrap
5035  * Ideas by:
5036  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5037  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5038  */
5039 static void
5040 PS_functions (inf)
5041      FILE *inf;
5042 {
5043   register char *bp, *ep;
5044
5045   LOOP_ON_INPUT_LINES (inf, lb, bp)
5046     {
5047       if (bp[0] == '/')
5048         {
5049           for (ep = bp+1;
5050                *ep != '\0' && *ep != ' ' && *ep != '{';
5051                ep++)
5052             continue;
5053           make_tag (bp, ep - bp, TRUE,
5054                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5055         }
5056       else if (LOOKING_AT (bp, "defineps"))
5057         get_tag (bp, NULL);
5058     }
5059 }
5060
5061 \f
5062 /*
5063  * Forth tags
5064  * Ignore anything after \ followed by space or in ( )
5065  * Look for words defined by :
5066  * Look for constant, code, create, defer, value, and variable
5067  * OBP extensions:  Look for buffer:, field,
5068  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5069  */
5070 static void
5071 Forth_words (inf)
5072      FILE *inf;
5073 {
5074   register char *bp;
5075
5076   LOOP_ON_INPUT_LINES (inf, lb, bp)
5077     while ((bp = skip_spaces (bp))[0] != '\0')
5078       if (bp[0] == '\\' && iswhite(bp[1]))
5079         break;                  /* read next line */
5080       else if (bp[0] == '(' && iswhite(bp[1]))
5081         do                      /* skip to ) or eol */
5082           bp++;
5083         while (*bp != ')' && *bp != '\0');
5084       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5085                || LOOKING_AT_NOCASE (bp, "constant")
5086                || LOOKING_AT_NOCASE (bp, "code")
5087                || LOOKING_AT_NOCASE (bp, "create")
5088                || LOOKING_AT_NOCASE (bp, "defer")
5089                || LOOKING_AT_NOCASE (bp, "value")
5090                || LOOKING_AT_NOCASE (bp, "variable")
5091                || LOOKING_AT_NOCASE (bp, "buffer:")
5092                || LOOKING_AT_NOCASE (bp, "field"))
5093         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5094       else
5095         bp = skip_non_spaces (bp);
5096 }
5097
5098 \f
5099 /*
5100  * Scheme tag functions
5101  * look for (def... xyzzy
5102  *          (def... (xyzzy
5103  *          (def ... ((...(xyzzy ....
5104  *          (set! xyzzy
5105  * Original code by Ken Haase (1985?)
5106  */
5107 static void
5108 Scheme_functions (inf)
5109      FILE *inf;
5110 {
5111   register char *bp;
5112
5113   LOOP_ON_INPUT_LINES (inf, lb, bp)
5114     {
5115       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5116         {
5117           bp = skip_non_spaces (bp+4);
5118           /* Skip over open parens and white space */
5119           while (notinname (*bp))
5120             bp++;
5121           get_tag (bp, NULL);
5122         }
5123       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5124         get_tag (bp, NULL);
5125     }
5126 }
5127
5128 \f
5129 /* Find tags in TeX and LaTeX input files.  */
5130
5131 /* TEX_toktab is a table of TeX control sequences that define tags.
5132  * Each entry records one such control sequence.
5133  *
5134  * Original code from who knows whom.
5135  * Ideas by:
5136  *   Stefan Monnier (2002)
5137  */
5138
5139 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5140
5141 /* Default set of control sequences to put into TEX_toktab.
5142    The value of environment var TEXTAGS is prepended to this.  */
5143 static char *TEX_defenv = "\
5144 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5145 :part:appendix:entry:index:def\
5146 :newcommand:renewcommand:newenvironment:renewenvironment";
5147
5148 static void TEX_mode __P((FILE *));
5149 static void TEX_decode_env __P((char *, char *));
5150
5151 static char TEX_esc = '\\';
5152 static char TEX_opgrp = '{';
5153 static char TEX_clgrp = '}';
5154
5155 /*
5156  * TeX/LaTeX scanning loop.
5157  */
5158 static void
5159 TeX_commands (inf)
5160      FILE *inf;
5161 {
5162   char *cp;
5163   linebuffer *key;
5164
5165   /* Select either \ or ! as escape character.  */
5166   TEX_mode (inf);
5167
5168   /* Initialize token table once from environment. */
5169   if (TEX_toktab == NULL)
5170     TEX_decode_env ("TEXTAGS", TEX_defenv);
5171
5172   LOOP_ON_INPUT_LINES (inf, lb, cp)
5173     {
5174       /* Look at each TEX keyword in line. */
5175       for (;;)
5176         {
5177           /* Look for a TEX escape. */
5178           while (*cp++ != TEX_esc)
5179             if (cp[-1] == '\0' || cp[-1] == '%')
5180               goto tex_next_line;
5181
5182           for (key = TEX_toktab; key->buffer != NULL; key++)
5183             if (strneq (cp, key->buffer, key->len))
5184               {
5185                 register char *p;
5186                 int namelen, linelen;
5187                 bool opgrp = FALSE;
5188
5189                 cp = skip_spaces (cp + key->len);
5190                 if (*cp == TEX_opgrp)
5191                   {
5192                     opgrp = TRUE;
5193                     cp++;
5194                   }
5195                 for (p = cp;
5196                      (!iswhite (*p) && *p != '#' &&
5197                       *p != TEX_opgrp && *p != TEX_clgrp);
5198                      p++)
5199                   continue;
5200                 namelen = p - cp;
5201                 linelen = lb.len;
5202                 if (!opgrp || *p == TEX_clgrp)
5203                   {
5204                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5205                       p++;
5206                     linelen = p - lb.buffer + 1;
5207                   }
5208                 make_tag (cp, namelen, TRUE,
5209                           lb.buffer, linelen, lineno, linecharno);
5210                 goto tex_next_line; /* We only tag a line once */
5211               }
5212         }
5213     tex_next_line:
5214       ;
5215     }
5216 }
5217
5218 #define TEX_LESC '\\'
5219 #define TEX_SESC '!'
5220
5221 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5222    chars accordingly. */
5223 static void
5224 TEX_mode (inf)
5225      FILE *inf;
5226 {
5227   int c;
5228
5229   while ((c = getc (inf)) != EOF)
5230     {
5231       /* Skip to next line if we hit the TeX comment char. */
5232       if (c == '%')
5233         while (c != '\n' && c != EOF)
5234           c = getc (inf);
5235       else if (c == TEX_LESC || c == TEX_SESC )
5236         break;
5237     }
5238
5239   if (c == TEX_LESC)
5240     {
5241       TEX_esc = TEX_LESC;
5242       TEX_opgrp = '{';
5243       TEX_clgrp = '}';
5244     }
5245   else
5246     {
5247       TEX_esc = TEX_SESC;
5248       TEX_opgrp = '<';
5249       TEX_clgrp = '>';
5250     }
5251   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5252      No attempt is made to correct the situation. */
5253   rewind (inf);
5254 }
5255
5256 /* Read environment and prepend it to the default string.
5257    Build token table. */
5258 static void
5259 TEX_decode_env (evarname, defenv)
5260      char *evarname;
5261      char *defenv;
5262 {
5263   register char *env, *p;
5264   int i, len;
5265
5266   /* Append default string to environment. */
5267   env = getenv (evarname);
5268   if (!env)
5269     env = defenv;
5270   else
5271     {
5272       char *oldenv = env;
5273       env = concat (oldenv, defenv, "");
5274     }
5275
5276   /* Allocate a token table */
5277   for (len = 1, p = env; p;)
5278     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5279       len++;
5280   TEX_toktab = xnew (len, linebuffer);
5281
5282   /* Unpack environment string into token table. Be careful about */
5283   /* zero-length strings (leading ':', "::" and trailing ':') */
5284   for (i = 0; *env != '\0';)
5285     {
5286       p = etags_strchr (env, ':');
5287       if (!p)                   /* End of environment string. */
5288         p = env + strlen (env);
5289       if (p - env > 0)
5290         {                       /* Only non-zero strings. */
5291           TEX_toktab[i].buffer = savenstr (env, p - env);
5292           TEX_toktab[i].len = p - env;
5293           i++;
5294         }
5295       if (*p)
5296         env = p + 1;
5297       else
5298         {
5299           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5300           TEX_toktab[i].len = 0;
5301           break;
5302         }
5303     }
5304 }
5305
5306 \f
5307 /* Texinfo support.  Dave Love, Mar. 2000.  */
5308 static void
5309 Texinfo_nodes (inf)
5310      FILE * inf;
5311 {
5312   char *cp, *start;
5313   LOOP_ON_INPUT_LINES (inf, lb, cp)
5314     if (LOOKING_AT (cp, "@node"))
5315       {
5316         start = cp;
5317         while (*cp != '\0' && *cp != ',')
5318           cp++;
5319         make_tag (start, cp - start, TRUE,
5320                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5321       }
5322 }
5323
5324 \f
5325 /*
5326  * HTML support.
5327  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5328  * Contents of <a name=xxx> are tags with name xxx.
5329  *
5330  * Francesco Potortì, 2002.
5331  */
5332 static void
5333 HTML_labels (inf)
5334      FILE * inf;
5335 {
5336   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5337   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5338   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5339   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5340   char *end;
5341
5342
5343   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5344
5345   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5346     for (;;)                    /* loop on the same line */
5347       {
5348         if (skiptag)            /* skip HTML tag */
5349           {
5350             while (*dbp != '\0' && *dbp != '>')
5351               dbp++;
5352             if (*dbp == '>')
5353               {
5354                 dbp += 1;
5355                 skiptag = FALSE;
5356                 continue;       /* look on the same line */
5357               }
5358             break;              /* go to next line */
5359           }
5360
5361         else if (intag) /* look for "name=" or "id=" */
5362           {
5363             while (*dbp != '\0' && *dbp != '>'
5364                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5365               dbp++;
5366             if (*dbp == '\0')
5367               break;            /* go to next line */
5368             if (*dbp == '>')
5369               {
5370                 dbp += 1;
5371                 intag = FALSE;
5372                 continue;       /* look on the same line */
5373               }
5374             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5375                 || LOOKING_AT_NOCASE (dbp, "id="))
5376               {
5377                 bool quoted = (dbp[0] == '"');
5378
5379                 if (quoted)
5380                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5381                     continue;
5382                 else
5383                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5384                     continue;
5385                 linebuffer_setlen (&token_name, end - dbp);
5386                 strncpy (token_name.buffer, dbp, end - dbp);
5387                 token_name.buffer[end - dbp] = '\0';
5388
5389                 dbp = end;
5390                 intag = FALSE;  /* we found what we looked for */
5391                 skiptag = TRUE; /* skip to the end of the tag */
5392                 getnext = TRUE; /* then grab the text */
5393                 continue;       /* look on the same line */
5394               }
5395             dbp += 1;
5396           }
5397
5398         else if (getnext)       /* grab next tokens and tag them */
5399           {
5400             dbp = skip_spaces (dbp);
5401             if (*dbp == '\0')
5402               break;            /* go to next line */
5403             if (*dbp == '<')
5404               {
5405                 intag = TRUE;
5406                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5407                 continue;       /* look on the same line */
5408               }
5409
5410             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5411               continue;
5412             make_tag (token_name.buffer, token_name.len, TRUE,
5413                       dbp, end - dbp, lineno, linecharno);
5414             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5415             getnext = FALSE;
5416             break;              /* go to next line */
5417           }
5418
5419         else                    /* look for an interesting HTML tag */
5420           {
5421             while (*dbp != '\0' && *dbp != '<')
5422               dbp++;
5423             if (*dbp == '\0')
5424               break;            /* go to next line */
5425             intag = TRUE;
5426             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5427               {
5428                 inanchor = TRUE;
5429                 continue;       /* look on the same line */
5430               }
5431             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5432                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5433                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5434                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5435               {
5436                 intag = FALSE;
5437                 getnext = TRUE;
5438                 continue;       /* look on the same line */
5439               }
5440             dbp += 1;
5441           }
5442       }
5443 }
5444
5445 \f
5446 /*
5447  * Prolog support
5448  *
5449  * Assumes that the predicate or rule starts at column 0.
5450  * Only the first clause of a predicate or rule is added.
5451  * Original code by Sunichirou Sugou (1989)
5452  * Rewritten by Anders Lindgren (1996)
5453  */
5454 static int prolog_pr __P((char *, char *));
5455 static void prolog_skip_comment __P((linebuffer *, FILE *));
5456 static int prolog_atom __P((char *, int));
5457
5458 static void
5459 Prolog_functions (inf)
5460      FILE *inf;
5461 {
5462   char *cp, *last;
5463   int len;
5464   int allocated;
5465
5466   allocated = 0;
5467   len = 0;
5468   last = NULL;
5469
5470   LOOP_ON_INPUT_LINES (inf, lb, cp)
5471     {
5472       if (cp[0] == '\0')        /* Empty line */
5473         continue;
5474       else if (iswhite (cp[0])) /* Not a predicate */
5475         continue;
5476       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5477         prolog_skip_comment (&lb, inf);
5478       else if ((len = prolog_pr (cp, last)) > 0)
5479         {
5480           /* Predicate or rule.  Store the function name so that we
5481              only generate a tag for the first clause.  */
5482           if (last == NULL)
5483             last = xnew(len + 1, char);
5484           else if (len + 1 > allocated)
5485             xrnew (last, len + 1, char);
5486           allocated = len + 1;
5487           strncpy (last, cp, len);
5488           last[len] = '\0';
5489         }
5490     }
5491   if (last != NULL)
5492     free (last);
5493 }
5494
5495
5496 static void
5497 prolog_skip_comment (plb, inf)
5498      linebuffer *plb;
5499      FILE *inf;
5500 {
5501   char *cp;
5502
5503   do
5504     {
5505       for (cp = plb->buffer; *cp != '\0'; cp++)
5506         if (cp[0] == '*' && cp[1] == '/')
5507           return;
5508       readline (plb, inf);
5509     }
5510   while (!feof(inf));
5511 }
5512
5513 /*
5514  * A predicate or rule definition is added if it matches:
5515  *     <beginning of line><Prolog Atom><whitespace>(
5516  * or  <beginning of line><Prolog Atom><whitespace>:-
5517  *
5518  * It is added to the tags database if it doesn't match the
5519  * name of the previous clause header.
5520  *
5521  * Return the size of the name of the predicate or rule, or 0 if no
5522  * header was found.
5523  */
5524 static int
5525 prolog_pr (s, last)
5526      char *s;
5527      char *last;                /* Name of last clause. */
5528 {
5529   int pos;
5530   int len;
5531
5532   pos = prolog_atom (s, 0);
5533   if (pos < 1)
5534     return 0;
5535
5536   len = pos;
5537   pos = skip_spaces (s + pos) - s;
5538
5539   if ((s[pos] == '.'
5540        || (s[pos] == '(' && (pos += 1))
5541        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5542       && (last == NULL          /* save only the first clause */
5543           || len != (int)strlen (last)
5544           || !strneq (s, last, len)))
5545         {
5546           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5547           return len;
5548         }
5549   else
5550     return 0;
5551 }
5552
5553 /*
5554  * Consume a Prolog atom.
5555  * Return the number of bytes consumed, or -1 if there was an error.
5556  *
5557  * A prolog atom, in this context, could be one of:
5558  * - An alphanumeric sequence, starting with a lower case letter.
5559  * - A quoted arbitrary string. Single quotes can escape themselves.
5560  *   Backslash quotes everything.
5561  */
5562 static int
5563 prolog_atom (s, pos)
5564      char *s;
5565      int pos;
5566 {
5567   int origpos;
5568
5569   origpos = pos;
5570
5571   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5572     {
5573       /* The atom is unquoted. */
5574       pos++;
5575       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5576         {
5577           pos++;
5578         }
5579       return pos - origpos;
5580     }
5581   else if (s[pos] == '\'')
5582     {
5583       pos++;
5584
5585       for (;;)
5586         {
5587           if (s[pos] == '\'')
5588             {
5589               pos++;
5590               if (s[pos] != '\'')
5591                 break;
5592               pos++;            /* A double quote */
5593             }
5594           else if (s[pos] == '\0')
5595             /* Multiline quoted atoms are ignored. */
5596             return -1;
5597           else if (s[pos] == '\\')
5598             {
5599               if (s[pos+1] == '\0')
5600                 return -1;
5601               pos += 2;
5602             }
5603           else
5604             pos++;
5605         }
5606       return pos - origpos;
5607     }
5608   else
5609     return -1;
5610 }
5611
5612 \f
5613 /*
5614  * Support for Erlang
5615  *
5616  * Generates tags for functions, defines, and records.
5617  * Assumes that Erlang functions start at column 0.
5618  * Original code by Anders Lindgren (1996)
5619  */
5620 static int erlang_func __P((char *, char *));
5621 static void erlang_attribute __P((char *));
5622 static int erlang_atom __P((char *));
5623
5624 static void
5625 Erlang_functions (inf)
5626      FILE *inf;
5627 {
5628   char *cp, *last;
5629   int len;
5630   int allocated;
5631
5632   allocated = 0;
5633   len = 0;
5634   last = NULL;
5635
5636   LOOP_ON_INPUT_LINES (inf, lb, cp)
5637     {
5638       if (cp[0] == '\0')        /* Empty line */
5639         continue;
5640       else if (iswhite (cp[0])) /* Not function nor attribute */
5641         continue;
5642       else if (cp[0] == '%')    /* comment */
5643         continue;
5644       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5645         continue;
5646       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5647         {
5648           erlang_attribute (cp);
5649           if (last != NULL)
5650             {
5651               free (last);
5652               last = NULL;
5653             }
5654         }
5655       else if ((len = erlang_func (cp, last)) > 0)
5656         {
5657           /*
5658            * Function.  Store the function name so that we only
5659            * generates a tag for the first clause.
5660            */
5661           if (last == NULL)
5662             last = xnew (len + 1, char);
5663           else if (len + 1 > allocated)
5664             xrnew (last, len + 1, char);
5665           allocated = len + 1;
5666           strncpy (last, cp, len);
5667           last[len] = '\0';
5668         }
5669     }
5670   if (last != NULL)
5671     free (last);
5672 }
5673
5674
5675 /*
5676  * A function definition is added if it matches:
5677  *     <beginning of line><Erlang Atom><whitespace>(
5678  *
5679  * It is added to the tags database if it doesn't match the
5680  * name of the previous clause header.
5681  *
5682  * Return the size of the name of the function, or 0 if no function
5683  * was found.
5684  */
5685 static int
5686 erlang_func (s, last)
5687      char *s;
5688      char *last;                /* Name of last clause. */
5689 {
5690   int pos;
5691   int len;
5692
5693   pos = erlang_atom (s);
5694   if (pos < 1)
5695     return 0;
5696
5697   len = pos;
5698   pos = skip_spaces (s + pos) - s;
5699
5700   /* Save only the first clause. */
5701   if (s[pos++] == '('
5702       && (last == NULL
5703           || len != (int)strlen (last)
5704           || !strneq (s, last, len)))
5705         {
5706           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5707           return len;
5708         }
5709
5710   return 0;
5711 }
5712
5713
5714 /*
5715  * Handle attributes.  Currently, tags are generated for defines
5716  * and records.
5717  *
5718  * They are on the form:
5719  * -define(foo, bar).
5720  * -define(Foo(M, N), M+N).
5721  * -record(graph, {vtab = notable, cyclic = true}).
5722  */
5723 static void
5724 erlang_attribute (s)
5725      char *s;
5726 {
5727   char *cp = s;
5728
5729   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5730       && *cp++ == '(')
5731     {
5732       int len = erlang_atom (skip_spaces (cp));
5733       if (len > 0)
5734         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5735     }
5736   return;
5737 }
5738
5739
5740 /*
5741  * Consume an Erlang atom (or variable).
5742  * Return the number of bytes consumed, or -1 if there was an error.
5743  */
5744 static int
5745 erlang_atom (s)
5746      char *s;
5747 {
5748   int pos = 0;
5749
5750   if (ISALPHA (s[pos]) || s[pos] == '_')
5751     {
5752       /* The atom is unquoted. */
5753       do
5754         pos++;
5755       while (ISALNUM (s[pos]) || s[pos] == '_');
5756     }
5757   else if (s[pos] == '\'')
5758     {
5759       for (pos++; s[pos] != '\''; pos++)
5760         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5761             || (s[pos] == '\\' && s[++pos] == '\0'))
5762           return 0;
5763       pos++;
5764     }
5765
5766   return pos;
5767 }
5768
5769 \f
5770 static char *scan_separators __P((char *));
5771 static void add_regex __P((char *, language *));
5772 static char *substitute __P((char *, char *, struct re_registers *));
5773
5774 /*
5775  * Take a string like "/blah/" and turn it into "blah", verifying
5776  * that the first and last characters are the same, and handling
5777  * quoted separator characters.  Actually, stops on the occurrence of
5778  * an unquoted separator.  Also process \t, \n, etc. and turn into
5779  * appropriate characters. Works in place.  Null terminates name string.
5780  * Returns pointer to terminating separator, or NULL for
5781  * unterminated regexps.
5782  */
5783 static char *
5784 scan_separators (name)
5785      char *name;
5786 {
5787   char sep = name[0];
5788   char *copyto = name;
5789   bool quoted = FALSE;
5790
5791   for (++name; *name != '\0'; ++name)
5792     {
5793       if (quoted)
5794         {
5795           switch (*name)
5796             {
5797             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5798             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5799             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5800             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5801             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5802             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5803             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5804             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5805             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5806             default:
5807               if (*name == sep)
5808                 *copyto++ = sep;
5809               else
5810                 {
5811                   /* Something else is quoted, so preserve the quote. */
5812                   *copyto++ = '\\';
5813                   *copyto++ = *name;
5814                 }
5815               break;
5816             }
5817           quoted = FALSE;
5818         }
5819       else if (*name == '\\')
5820         quoted = TRUE;
5821       else if (*name == sep)
5822         break;
5823       else
5824         *copyto++ = *name;
5825     }
5826   if (*name != sep)
5827     name = NULL;                /* signal unterminated regexp */
5828
5829   /* Terminate copied string. */
5830   *copyto = '\0';
5831   return name;
5832 }
5833
5834 /* Look at the argument of --regex or --no-regex and do the right
5835    thing.  Same for each line of a regexp file. */
5836 static void
5837 analyse_regex (regex_arg)
5838      char *regex_arg;
5839 {
5840   if (regex_arg == NULL)
5841     {
5842       free_regexps ();          /* --no-regex: remove existing regexps */
5843       return;
5844     }
5845
5846   /* A real --regexp option or a line in a regexp file. */
5847   switch (regex_arg[0])
5848     {
5849       /* Comments in regexp file or null arg to --regex. */
5850     case '\0':
5851     case ' ':
5852     case '\t':
5853       break;
5854
5855       /* Read a regex file.  This is recursive and may result in a
5856          loop, which will stop when the file descriptors are exhausted. */
5857     case '@':
5858       {
5859         FILE *regexfp;
5860         linebuffer regexbuf;
5861         char *regexfile = regex_arg + 1;
5862
5863         /* regexfile is a file containing regexps, one per line. */
5864         regexfp = fopen (regexfile, "r");
5865         if (regexfp == NULL)
5866           {
5867             pfatal (regexfile);
5868             return;
5869           }
5870         linebuffer_init (&regexbuf);
5871         while (readline_internal (&regexbuf, regexfp) > 0)
5872           analyse_regex (regexbuf.buffer);
5873         free (regexbuf.buffer);
5874         fclose (regexfp);
5875       }
5876       break;
5877
5878       /* Regexp to be used for a specific language only. */
5879     case '{':
5880       {
5881         language *lang;
5882         char *lang_name = regex_arg + 1;
5883         char *cp;
5884
5885         for (cp = lang_name; *cp != '}'; cp++)
5886           if (*cp == '\0')
5887             {
5888               error ("unterminated language name in regex: %s", regex_arg);
5889               return;
5890             }
5891         *cp++ = '\0';
5892         lang = get_language_from_langname (lang_name);
5893         if (lang == NULL)
5894           return;
5895         add_regex (cp, lang);
5896       }
5897       break;
5898
5899       /* Regexp to be used for any language. */
5900     default:
5901       add_regex (regex_arg, NULL);
5902       break;
5903     }
5904 }
5905
5906 /* Separate the regexp pattern, compile it,
5907    and care for optional name and modifiers. */
5908 static void
5909 add_regex (regexp_pattern, lang)
5910      char *regexp_pattern;
5911      language *lang;
5912 {
5913   static struct re_pattern_buffer zeropattern;
5914   char sep, *pat, *name, *modifiers;
5915   const char *err;
5916   struct re_pattern_buffer *patbuf;
5917   regexp *rp;
5918   bool
5919     force_explicit_name = TRUE, /* do not use implicit tag names */
5920     ignore_case = FALSE,        /* case is significant */
5921     multi_line = FALSE,         /* matches are done one line at a time */
5922     single_line = FALSE;        /* dot does not match newline */
5923
5924
5925   if (strlen(regexp_pattern) < 3)
5926     {
5927       error ("null regexp", (char *)NULL);
5928       return;
5929     }
5930   sep = regexp_pattern[0];
5931   name = scan_separators (regexp_pattern);
5932   if (name == NULL)
5933     {
5934       error ("%s: unterminated regexp", regexp_pattern);
5935       return;
5936     }
5937   if (name[1] == sep)
5938     {
5939       error ("null name for regexp \"%s\"", regexp_pattern);
5940       return;
5941     }
5942   modifiers = scan_separators (name);
5943   if (modifiers == NULL)        /* no terminating separator --> no name */
5944     {
5945       modifiers = name;
5946       name = "";
5947     }
5948   else
5949     modifiers += 1;             /* skip separator */
5950
5951   /* Parse regex modifiers. */
5952   for (; modifiers[0] != '\0'; modifiers++)
5953     switch (modifiers[0])
5954       {
5955       case 'N':
5956         if (modifiers == name)
5957           error ("forcing explicit tag name but no name, ignoring", NULL);
5958         force_explicit_name = TRUE;
5959         break;
5960       case 'i':
5961         ignore_case = TRUE;
5962         break;
5963       case 's':
5964         single_line = TRUE;
5965         /* FALLTHRU */
5966       case 'm':
5967         multi_line = TRUE;
5968         need_filebuf = TRUE;
5969         break;
5970       default:
5971         {
5972           char wrongmod [2];
5973           wrongmod[0] = modifiers[0];
5974           wrongmod[1] = '\0';
5975           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5976         }
5977         break;
5978       }
5979
5980   patbuf = xnew (1, struct re_pattern_buffer);
5981   *patbuf = zeropattern;
5982   if (ignore_case)
5983     {
5984       static char lc_trans[CHARS];
5985       int i;
5986       for (i = 0; i < CHARS; i++)
5987         lc_trans[i] = lowcase (i);
5988       patbuf->translate = lc_trans;     /* translation table to fold case  */
5989     }
5990
5991   if (multi_line)
5992     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5993   else
5994     pat = regexp_pattern;
5995
5996   if (single_line)
5997     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5998   else
5999     re_set_syntax (RE_SYNTAX_EMACS);
6000
6001   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6002   if (multi_line)
6003     free (pat);
6004   if (err != NULL)
6005     {
6006       error ("%s while compiling pattern", err);
6007       return;
6008     }
6009
6010   rp = p_head;
6011   p_head = xnew (1, regexp);
6012   p_head->pattern = savestr (regexp_pattern);
6013   p_head->p_next = rp;
6014   p_head->lang = lang;
6015   p_head->pat = patbuf;
6016   p_head->name = savestr (name);
6017   p_head->error_signaled = FALSE;
6018   p_head->force_explicit_name = force_explicit_name;
6019   p_head->ignore_case = ignore_case;
6020   p_head->multi_line = multi_line;
6021 }
6022
6023 /*
6024  * Do the substitutions indicated by the regular expression and
6025  * arguments.
6026  */
6027 static char *
6028 substitute (in, out, regs)
6029      char *in, *out;
6030      struct re_registers *regs;
6031 {
6032   char *result, *t;
6033   int size, dig, diglen;
6034
6035   result = NULL;
6036   size = strlen (out);
6037
6038   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6039   if (out[size - 1] == '\\')
6040     fatal ("pattern error in \"%s\"", out);
6041   for (t = etags_strchr (out, '\\');
6042        t != NULL;
6043        t = etags_strchr (t + 2, '\\'))
6044     if (ISDIGIT (t[1]))
6045       {
6046         dig = t[1] - '0';
6047         diglen = regs->end[dig] - regs->start[dig];
6048         size += diglen - 2;
6049       }
6050     else
6051       size -= 1;
6052
6053   /* Allocate space and do the substitutions. */
6054   assert (size >= 0);
6055   result = xnew (size + 1, char);
6056
6057   for (t = result; *out != '\0'; out++)
6058     if (*out == '\\' && ISDIGIT (*++out))
6059       {
6060         dig = *out - '0';
6061         diglen = regs->end[dig] - regs->start[dig];
6062         strncpy (t, in + regs->start[dig], diglen);
6063         t += diglen;
6064       }
6065     else
6066       *t++ = *out;
6067   *t = '\0';
6068
6069   assert (t <= result + size);
6070   assert (t - result == (int)strlen (result));
6071
6072   return result;
6073 }
6074
6075 /* Deallocate all regexps. */
6076 static void
6077 free_regexps ()
6078 {
6079   regexp *rp;
6080   while (p_head != NULL)
6081     {
6082       rp = p_head->p_next;
6083       free (p_head->pattern);
6084       free (p_head->name);
6085       free (p_head);
6086       p_head = rp;
6087     }
6088   return;
6089 }
6090
6091 /*
6092  * Reads the whole file as a single string from `filebuf' and looks for
6093  * multi-line regular expressions, creating tags on matches.
6094  * readline already dealt with normal regexps.
6095  *
6096  * Idea by Ben Wing <ben@666.com> (2002).
6097  */
6098 static void
6099 regex_tag_multiline ()
6100 {
6101   char *buffer = filebuf.buffer;
6102   regexp *rp;
6103   char *name;
6104
6105   for (rp = p_head; rp != NULL; rp = rp->p_next)
6106     {
6107       int match = 0;
6108
6109       if (!rp->multi_line)
6110         continue;               /* skip normal regexps */
6111
6112       /* Generic initialisations before parsing file from memory. */
6113       lineno = 1;               /* reset global line number */
6114       charno = 0;               /* reset global char number */
6115       linecharno = 0;           /* reset global char number of line start */
6116
6117       /* Only use generic regexps or those for the current language. */
6118       if (rp->lang != NULL && rp->lang != curfdp->lang)
6119         continue;
6120
6121       while (match >= 0 && match < filebuf.len)
6122         {
6123           match = re_search (rp->pat, buffer, filebuf.len, charno,
6124                              filebuf.len - match, &rp->regs);
6125           switch (match)
6126             {
6127             case -2:
6128               /* Some error. */
6129               if (!rp->error_signaled)
6130                 {
6131                   error ("regexp stack overflow while matching \"%s\"",
6132                          rp->pattern);
6133                   rp->error_signaled = TRUE;
6134                 }
6135               break;
6136             case -1:
6137               /* No match. */
6138               break;
6139             default:
6140               if (match == rp->regs.end[0])
6141                 {
6142                   if (!rp->error_signaled)
6143                     {
6144                       error ("regexp matches the empty string: \"%s\"",
6145                              rp->pattern);
6146                       rp->error_signaled = TRUE;
6147                     }
6148                   match = -3;   /* exit from while loop */
6149                   break;
6150                 }
6151
6152               /* Match occurred.  Construct a tag. */
6153               while (charno < rp->regs.end[0])
6154                 if (buffer[charno++] == '\n')
6155                   lineno++, linecharno = charno;
6156               name = rp->name;
6157               if (name[0] == '\0')
6158                 name = NULL;
6159               else /* make a named tag */
6160                 name = substitute (buffer, rp->name, &rp->regs);
6161               if (rp->force_explicit_name)
6162                 /* Force explicit tag name, if a name is there. */
6163                 pfnote (name, TRUE, buffer + linecharno,
6164                         charno - linecharno + 1, lineno, linecharno);
6165               else
6166                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6167                           charno - linecharno + 1, lineno, linecharno);
6168               break;
6169             }
6170         }
6171     }
6172 }
6173
6174 \f
6175 static bool
6176 nocase_tail (cp)
6177      char *cp;
6178 {
6179   register int len = 0;
6180
6181   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6182     cp++, len++;
6183   if (*cp == '\0' && !intoken (dbp[len]))
6184     {
6185       dbp += len;
6186       return TRUE;
6187     }
6188   return FALSE;
6189 }
6190
6191 static void
6192 get_tag (bp, namepp)
6193      register char *bp;
6194      char **namepp;
6195 {
6196   register char *cp = bp;
6197
6198   if (*bp != '\0')
6199     {
6200       /* Go till you get to white space or a syntactic break */
6201       for (cp = bp + 1; !notinname (*cp); cp++)
6202         continue;
6203       make_tag (bp, cp - bp, TRUE,
6204                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6205     }
6206
6207   if (namepp != NULL)
6208     *namepp = savenstr (bp, cp - bp);
6209 }
6210
6211 /*
6212  * Read a line of text from `stream' into `lbp', excluding the
6213  * newline or CR-NL, if any.  Return the number of characters read from
6214  * `stream', which is the length of the line including the newline.
6215  *
6216  * On DOS or Windows we do not count the CR character, if any before the
6217  * NL, in the returned length; this mirrors the behavior of Emacs on those
6218  * platforms (for text files, it translates CR-NL to NL as it reads in the
6219  * file).
6220  *
6221  * If multi-line regular expressions are requested, each line read is
6222  * appended to `filebuf'.
6223  */
6224 static long
6225 readline_internal (lbp, stream)
6226      linebuffer *lbp;
6227      register FILE *stream;
6228 {
6229   char *buffer = lbp->buffer;
6230   register char *p = lbp->buffer;
6231   register char *pend;
6232   int chars_deleted;
6233
6234   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6235
6236   for (;;)
6237     {
6238       register int c = getc (stream);
6239       if (p == pend)
6240         {
6241           /* We're at the end of linebuffer: expand it. */
6242           lbp->size *= 2;
6243           xrnew (buffer, lbp->size, char);
6244           p += buffer - lbp->buffer;
6245           pend = buffer + lbp->size;
6246           lbp->buffer = buffer;
6247         }
6248       if (c == EOF)
6249         {
6250           *p = '\0';
6251           chars_deleted = 0;
6252           break;
6253         }
6254       if (c == '\n')
6255         {
6256           if (p > buffer && p[-1] == '\r')
6257             {
6258               p -= 1;
6259 #ifdef DOS_NT
6260              /* Assume CRLF->LF translation will be performed by Emacs
6261                 when loading this file, so CRs won't appear in the buffer.
6262                 It would be cleaner to compensate within Emacs;
6263                 however, Emacs does not know how many CRs were deleted
6264                 before any given point in the file.  */
6265               chars_deleted = 1;
6266 #else
6267               chars_deleted = 2;
6268 #endif
6269             }
6270           else
6271             {
6272               chars_deleted = 1;
6273             }
6274           *p = '\0';
6275           break;
6276         }
6277       *p++ = c;
6278     }
6279   lbp->len = p - buffer;
6280
6281   if (need_filebuf              /* we need filebuf for multi-line regexps */
6282       && chars_deleted > 0)     /* not at EOF */
6283     {
6284       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6285         {
6286           /* Expand filebuf. */
6287           filebuf.size *= 2;
6288           xrnew (filebuf.buffer, filebuf.size, char);
6289         }
6290       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6291       filebuf.len += lbp->len;
6292       filebuf.buffer[filebuf.len++] = '\n';
6293       filebuf.buffer[filebuf.len] = '\0';
6294     }
6295
6296   return lbp->len + chars_deleted;
6297 }
6298
6299 /*
6300  * Like readline_internal, above, but in addition try to match the
6301  * input line against relevant regular expressions and manage #line
6302  * directives.
6303  */
6304 static void
6305 readline (lbp, stream)
6306      linebuffer *lbp;
6307      FILE *stream;
6308 {
6309   long result;
6310
6311   linecharno = charno;          /* update global char number of line start */
6312   result = readline_internal (lbp, stream); /* read line */
6313   lineno += 1;                  /* increment global line number */
6314   charno += result;             /* increment global char number */
6315
6316   /* Honour #line directives. */
6317   if (!no_line_directive)
6318     {
6319       static bool discard_until_line_directive;
6320
6321       /* Check whether this is a #line directive. */
6322       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6323         {
6324           unsigned int lno;
6325           int start = 0;
6326
6327           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6328               && start > 0)     /* double quote character found */
6329             {
6330               char *endp = lbp->buffer + start;
6331
6332               while ((endp = etags_strchr (endp, '"')) != NULL
6333                      && endp[-1] == '\\')
6334                 endp++;
6335               if (endp != NULL)
6336                 /* Ok, this is a real #line directive.  Let's deal with it. */
6337                 {
6338                   char *taggedabsname;  /* absolute name of original file */
6339                   char *taggedfname;    /* name of original file as given */
6340                   char *name;           /* temp var */
6341
6342                   discard_until_line_directive = FALSE; /* found it */
6343                   name = lbp->buffer + start;
6344                   *endp = '\0';
6345                   canonicalize_filename (name); /* for DOS */
6346                   taggedabsname = absolute_filename (name, tagfiledir);
6347                   if (filename_is_absolute (name)
6348                       || filename_is_absolute (curfdp->infname))
6349                     taggedfname = savestr (taggedabsname);
6350                   else
6351                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6352
6353                   if (streq (curfdp->taggedfname, taggedfname))
6354                     /* The #line directive is only a line number change.  We
6355                        deal with this afterwards. */
6356                     free (taggedfname);
6357                   else
6358                     /* The tags following this #line directive should be
6359                        attributed to taggedfname.  In order to do this, set
6360                        curfdp accordingly. */
6361                     {
6362                       fdesc *fdp; /* file description pointer */
6363
6364                       /* Go look for a file description already set up for the
6365                          file indicated in the #line directive.  If there is
6366                          one, use it from now until the next #line
6367                          directive. */
6368                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6369                         if (streq (fdp->infname, curfdp->infname)
6370                             && streq (fdp->taggedfname, taggedfname))
6371                           /* If we remove the second test above (after the &&)
6372                              then all entries pertaining to the same file are
6373                              coalesced in the tags file.  If we use it, then
6374                              entries pertaining to the same file but generated
6375                              from different files (via #line directives) will
6376                              go into separate sections in the tags file.  These
6377                              alternatives look equivalent.  The first one
6378                              destroys some apparently useless information. */
6379                           {
6380                             curfdp = fdp;
6381                             free (taggedfname);
6382                             break;
6383                           }
6384                       /* Else, if we already tagged the real file, skip all
6385                          input lines until the next #line directive. */
6386                       if (fdp == NULL) /* not found */
6387                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6388                           if (streq (fdp->infabsname, taggedabsname))
6389                             {
6390                               discard_until_line_directive = TRUE;
6391                               free (taggedfname);
6392                               break;
6393                             }
6394                       /* Else create a new file description and use that from
6395                          now on, until the next #line directive. */
6396                       if (fdp == NULL) /* not found */
6397                         {
6398                           fdp = fdhead;
6399                           fdhead = xnew (1, fdesc);
6400                           *fdhead = *curfdp; /* copy curr. file description */
6401                           fdhead->next = fdp;
6402                           fdhead->infname = savestr (curfdp->infname);
6403                           fdhead->infabsname = savestr (curfdp->infabsname);
6404                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6405                           fdhead->taggedfname = taggedfname;
6406                           fdhead->usecharno = FALSE;
6407                           fdhead->prop = NULL;
6408                           fdhead->written = FALSE;
6409                           curfdp = fdhead;
6410                         }
6411                     }
6412                   free (taggedabsname);
6413                   lineno = lno - 1;
6414                   readline (lbp, stream);
6415                   return;
6416                 } /* if a real #line directive */
6417             } /* if #line is followed by a a number */
6418         } /* if line begins with "#line " */
6419
6420       /* If we are here, no #line directive was found. */
6421       if (discard_until_line_directive)
6422         {
6423           if (result > 0)
6424             {
6425               /* Do a tail recursion on ourselves, thus discarding the contents
6426                  of the line buffer. */
6427               readline (lbp, stream);
6428               return;
6429             }
6430           /* End of file. */
6431           discard_until_line_directive = FALSE;
6432           return;
6433         }
6434     } /* if #line directives should be considered */
6435
6436   {
6437     int match;
6438     regexp *rp;
6439     char *name;
6440
6441     /* Match against relevant regexps. */
6442     if (lbp->len > 0)
6443       for (rp = p_head; rp != NULL; rp = rp->p_next)
6444         {
6445           /* Only use generic regexps or those for the current language.
6446              Also do not use multiline regexps, which is the job of
6447              regex_tag_multiline. */
6448           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6449               || rp->multi_line)
6450             continue;
6451
6452           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6453           switch (match)
6454             {
6455             case -2:
6456               /* Some error. */
6457               if (!rp->error_signaled)
6458                 {
6459                   error ("regexp stack overflow while matching \"%s\"",
6460                          rp->pattern);
6461                   rp->error_signaled = TRUE;
6462                 }
6463               break;
6464             case -1:
6465               /* No match. */
6466               break;
6467             case 0:
6468               /* Empty string matched. */
6469               if (!rp->error_signaled)
6470                 {
6471                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6472                   rp->error_signaled = TRUE;
6473                 }
6474               break;
6475             default:
6476               /* Match occurred.  Construct a tag. */
6477               name = rp->name;
6478               if (name[0] == '\0')
6479                 name = NULL;
6480               else /* make a named tag */
6481                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6482               if (rp->force_explicit_name)
6483                 /* Force explicit tag name, if a name is there. */
6484                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6485               else
6486                 make_tag (name, strlen (name), TRUE,
6487                           lbp->buffer, match, lineno, linecharno);
6488               break;
6489             }
6490         }
6491   }
6492 }
6493
6494 \f
6495 /*
6496  * Return a pointer to a space of size strlen(cp)+1 allocated
6497  * with xnew where the string CP has been copied.
6498  */
6499 static char *
6500 savestr (cp)
6501      char *cp;
6502 {
6503   return savenstr (cp, strlen (cp));
6504 }
6505
6506 /*
6507  * Return a pointer to a space of size LEN+1 allocated with xnew where
6508  * the string CP has been copied for at most the first LEN characters.
6509  */
6510 static char *
6511 savenstr (cp, len)
6512      char *cp;
6513      int len;
6514 {
6515   register char *dp;
6516
6517   dp = xnew (len + 1, char);
6518   strncpy (dp, cp, len);
6519   dp[len] = '\0';
6520   return dp;
6521 }
6522
6523 /*
6524  * Return the ptr in sp at which the character c last
6525  * appears; NULL if not found
6526  *
6527  * Identical to POSIX strrchr, included for portability.
6528  */
6529 static char *
6530 etags_strrchr (sp, c)
6531      register const char *sp;
6532      register int c;
6533 {
6534   register const char *r;
6535
6536   r = NULL;
6537   do
6538     {
6539       if (*sp == c)
6540         r = sp;
6541   } while (*sp++);
6542   return (char *)r;
6543 }
6544
6545 /*
6546  * Return the ptr in sp at which the character c first
6547  * appears; NULL if not found
6548  *
6549  * Identical to POSIX strchr, included for portability.
6550  */
6551 static char *
6552 etags_strchr (sp, c)
6553      register const char *sp;
6554      register int c;
6555 {
6556   do
6557     {
6558       if (*sp == c)
6559         return (char *)sp;
6560     } while (*sp++);
6561   return NULL;
6562 }
6563
6564 /*
6565  * Compare two strings, ignoring case for alphabetic characters.
6566  *
6567  * Same as BSD's strcasecmp, included for portability.
6568  */
6569 static int
6570 etags_strcasecmp (s1, s2)
6571      register const char *s1;
6572      register const char *s2;
6573 {
6574   while (*s1 != '\0'
6575          && (ISALPHA (*s1) && ISALPHA (*s2)
6576              ? lowcase (*s1) == lowcase (*s2)
6577              : *s1 == *s2))
6578     s1++, s2++;
6579
6580   return (ISALPHA (*s1) && ISALPHA (*s2)
6581           ? lowcase (*s1) - lowcase (*s2)
6582           : *s1 - *s2);
6583 }
6584
6585 /*
6586  * Compare two strings, ignoring case for alphabetic characters.
6587  * Stop after a given number of characters
6588  *
6589  * Same as BSD's strncasecmp, included for portability.
6590  */
6591 static int
6592 etags_strncasecmp (s1, s2, n)
6593      register const char *s1;
6594      register const char *s2;
6595      register int n;
6596 {
6597   while (*s1 != '\0' && n-- > 0
6598          && (ISALPHA (*s1) && ISALPHA (*s2)
6599              ? lowcase (*s1) == lowcase (*s2)
6600              : *s1 == *s2))
6601     s1++, s2++;
6602
6603   if (n < 0)
6604     return 0;
6605   else
6606     return (ISALPHA (*s1) && ISALPHA (*s2)
6607             ? lowcase (*s1) - lowcase (*s2)
6608             : *s1 - *s2);
6609 }
6610
6611 /* Skip spaces (end of string is not space), return new pointer. */
6612 static char *
6613 skip_spaces (cp)
6614      char *cp;
6615 {
6616   while (iswhite (*cp))
6617     cp++;
6618   return cp;
6619 }
6620
6621 /* Skip non spaces, except end of string, return new pointer. */
6622 static char *
6623 skip_non_spaces (cp)
6624      char *cp;
6625 {
6626   while (*cp != '\0' && !iswhite (*cp))
6627     cp++;
6628   return cp;
6629 }
6630
6631 /* Print error message and exit.  */
6632 void
6633 fatal (s1, s2)
6634      char *s1, *s2;
6635 {
6636   error (s1, s2);
6637   exit (EXIT_FAILURE);
6638 }
6639
6640 static void
6641 pfatal (s1)
6642      char *s1;
6643 {
6644   perror (s1);
6645   exit (EXIT_FAILURE);
6646 }
6647
6648 static void
6649 suggest_asking_for_help ()
6650 {
6651   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6652            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6653   exit (EXIT_FAILURE);
6654 }
6655
6656 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6657 static void
6658 error (s1, s2)
6659      const char *s1, *s2;
6660 {
6661   fprintf (stderr, "%s: ", progname);
6662   fprintf (stderr, s1, s2);
6663   fprintf (stderr, "\n");
6664 }
6665
6666 /* Return a newly-allocated string whose contents
6667    concatenate those of s1, s2, s3.  */
6668 static char *
6669 concat (s1, s2, s3)
6670      char *s1, *s2, *s3;
6671 {
6672   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6673   char *result = xnew (len1 + len2 + len3 + 1, char);
6674
6675   strcpy (result, s1);
6676   strcpy (result + len1, s2);
6677   strcpy (result + len1 + len2, s3);
6678   result[len1 + len2 + len3] = '\0';
6679
6680   return result;
6681 }
6682
6683 \f
6684 /* Does the same work as the system V getcwd, but does not need to
6685    guess the buffer size in advance. */
6686 static char *
6687 etags_getcwd ()
6688 {
6689 #ifdef HAVE_GETCWD
6690   int bufsize = 200;
6691   char *path = xnew (bufsize, char);
6692
6693   while (getcwd (path, bufsize) == NULL)
6694     {
6695       if (errno != ERANGE)
6696         pfatal ("getcwd");
6697       bufsize *= 2;
6698       free (path);
6699       path = xnew (bufsize, char);
6700     }
6701
6702   canonicalize_filename (path);
6703   return path;
6704
6705 #else /* not HAVE_GETCWD */
6706 #if MSDOS
6707
6708   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6709
6710   getwd (path);
6711
6712   for (p = path; *p != '\0'; p++)
6713     if (*p == '\\')
6714       *p = '/';
6715     else
6716       *p = lowcase (*p);
6717
6718   return strdup (path);
6719 #else /* not MSDOS */
6720   linebuffer path;
6721   FILE *pipe;
6722
6723   linebuffer_init (&path);
6724   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6725   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6726     pfatal ("pwd");
6727   pclose (pipe);
6728
6729   return path.buffer;
6730 #endif /* not MSDOS */
6731 #endif /* not HAVE_GETCWD */
6732 }
6733
6734 /* Return a newly allocated string containing the file name of FILE
6735    relative to the absolute directory DIR (which should end with a slash). */
6736 static char *
6737 relative_filename (file, dir)
6738      char *file, *dir;
6739 {
6740   char *fp, *dp, *afn, *res;
6741   int i;
6742
6743   /* Find the common root of file and dir (with a trailing slash). */
6744   afn = absolute_filename (file, cwd);
6745   fp = afn;
6746   dp = dir;
6747   while (*fp++ == *dp++)
6748     continue;
6749   fp--, dp--;                   /* back to the first differing char */
6750 #ifdef DOS_NT
6751   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6752     return afn;
6753 #endif
6754   do                            /* look at the equal chars until '/' */
6755     fp--, dp--;
6756   while (*fp != '/');
6757
6758   /* Build a sequence of "../" strings for the resulting relative file name. */
6759   i = 0;
6760   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6761     i += 1;
6762   res = xnew (3*i + strlen (fp + 1) + 1, char);
6763   res[0] = '\0';
6764   while (i-- > 0)
6765     strcat (res, "../");
6766
6767   /* Add the file name relative to the common root of file and dir. */
6768   strcat (res, fp + 1);
6769   free (afn);
6770
6771   return res;
6772 }
6773
6774 /* Return a newly allocated string containing the absolute file name
6775    of FILE given DIR (which should end with a slash). */
6776 static char *
6777 absolute_filename (file, dir)
6778      char *file, *dir;
6779 {
6780   char *slashp, *cp, *res;
6781
6782   if (filename_is_absolute (file))
6783     res = savestr (file);
6784 #ifdef DOS_NT
6785   /* We don't support non-absolute file names with a drive
6786      letter, like `d:NAME' (it's too much hassle).  */
6787   else if (file[1] == ':')
6788     fatal ("%s: relative file names with drive letters not supported", file);
6789 #endif
6790   else
6791     res = concat (dir, file, "");
6792
6793   /* Delete the "/dirname/.." and "/." substrings. */
6794   slashp = etags_strchr (res, '/');
6795   while (slashp != NULL && slashp[0] != '\0')
6796     {
6797       if (slashp[1] == '.')
6798         {
6799           if (slashp[2] == '.'
6800               && (slashp[3] == '/' || slashp[3] == '\0'))
6801             {
6802               cp = slashp;
6803               do
6804                 cp--;
6805               while (cp >= res && !filename_is_absolute (cp));
6806               if (cp < res)
6807                 cp = slashp;    /* the absolute name begins with "/.." */
6808 #ifdef DOS_NT
6809               /* Under MSDOS and NT we get `d:/NAME' as absolute
6810                  file name, so the luser could say `d:/../NAME'.
6811                  We silently treat this as `d:/NAME'.  */
6812               else if (cp[0] != '/')
6813                 cp = slashp;
6814 #endif
6815               strcpy (cp, slashp + 3);
6816               slashp = cp;
6817               continue;
6818             }
6819           else if (slashp[2] == '/' || slashp[2] == '\0')
6820             {
6821               strcpy (slashp, slashp + 2);
6822               continue;
6823             }
6824         }
6825
6826       slashp = etags_strchr (slashp + 1, '/');
6827     }
6828
6829   if (res[0] == '\0')           /* just a safety net: should never happen */
6830     {
6831       free (res);
6832       return savestr ("/");
6833     }
6834   else
6835     return res;
6836 }
6837
6838 /* Return a newly allocated string containing the absolute
6839    file name of dir where FILE resides given DIR (which should
6840    end with a slash). */
6841 static char *
6842 absolute_dirname (file, dir)
6843      char *file, *dir;
6844 {
6845   char *slashp, *res;
6846   char save;
6847
6848   canonicalize_filename (file);
6849   slashp = etags_strrchr (file, '/');
6850   if (slashp == NULL)
6851     return savestr (dir);
6852   save = slashp[1];
6853   slashp[1] = '\0';
6854   res = absolute_filename (file, dir);
6855   slashp[1] = save;
6856
6857   return res;
6858 }
6859
6860 /* Whether the argument string is an absolute file name.  The argument
6861    string must have been canonicalized with canonicalize_filename. */
6862 static bool
6863 filename_is_absolute (fn)
6864      char *fn;
6865 {
6866   return (fn[0] == '/'
6867 #ifdef DOS_NT
6868           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6869 #endif
6870           );
6871 }
6872
6873 /* Translate backslashes into slashes.  Works in place. */
6874 static void
6875 canonicalize_filename (fn)
6876      register char *fn;
6877 {
6878 #ifdef DOS_NT
6879   /* Canonicalize drive letter case.  */
6880   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6881     fn[0] = upcase (fn[0]);
6882   /* Convert backslashes to slashes.  */
6883   for (; *fn != '\0'; fn++)
6884     if (*fn == '\\')
6885       *fn = '/';
6886 #else
6887   /* No action. */
6888   fn = NULL;                    /* shut up the compiler */
6889 #endif
6890 }
6891
6892 \f
6893 /* Initialize a linebuffer for use */
6894 static void
6895 linebuffer_init (lbp)
6896      linebuffer *lbp;
6897 {
6898   lbp->size = (DEBUG) ? 3 : 200;
6899   lbp->buffer = xnew (lbp->size, char);
6900   lbp->buffer[0] = '\0';
6901   lbp->len = 0;
6902 }
6903
6904 /* Set the minimum size of a string contained in a linebuffer. */
6905 static void
6906 linebuffer_setlen (lbp, toksize)
6907      linebuffer *lbp;
6908      int toksize;
6909 {
6910   while (lbp->size <= toksize)
6911     {
6912       lbp->size *= 2;
6913       xrnew (lbp->buffer, lbp->size, char);
6914     }
6915   lbp->len = toksize;
6916 }
6917
6918 /* Like malloc but get fatal error if memory is exhausted. */
6919 static PTR
6920 xmalloc (size)
6921      unsigned int size;
6922 {
6923   PTR result = (PTR) malloc (size);
6924   if (result == NULL)
6925     fatal ("virtual memory exhausted", (char *)NULL);
6926   return result;
6927 }
6928
6929 static PTR
6930 xrealloc (ptr, size)
6931      char *ptr;
6932      unsigned int size;
6933 {
6934   PTR result = (PTR) realloc (ptr, size);
6935   if (result == NULL)
6936     fatal ("virtual memory exhausted", (char *)NULL);
6937   return result;
6938 }
6939
6940 /*
6941  * Local Variables:
6942  * indent-tabs-mode: t
6943  * tab-width: 8
6944  * fill-column: 79
6945  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6946  * c-file-style: "gnu"
6947  * End:
6948  */
6949
6950 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6951    (do not change this comment) */
6952
6953 /* etags.c ends here */