lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
  32   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007
  33   Free Software Foundation, Inc.
  34
  35 This file is not considered part of GNU Emacs.
  36
  37 This program is free software; you can redistribute it and/or modify
  38 it under the terms of the GNU General Public License as published by
  39 the Free Software Foundation; either version 2 of the License, or
  40 (at your option) any later version.
  41
  42 This program is distributed in the hope that it will be useful,
  43 but WITHOUT ANY WARRANTY; without even the implied warranty of
  44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  45 GNU General Public License for more details.
  46
  47 You should have received a copy of the GNU General Public License
  48 along with this program; if not, write to the Free Software Foundation,
  49 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  50
  51
  52 /* NB To comply with the above BSD license, copyright information is
  53 reproduced in etc/ETAGS.README.  That file should be updated when the
  54 above notices are.
  55
  56 To the best of our knowledge, this code was originally based on the
  57 ctags.c distributed with BSD4.2, which was copyrighted by the
  58 University of California, as described above. */
  59
  60
  61 /*
  62  * Authors:
  63  * 1983 Ctags originally by Ken Arnold.
  64  * 1984 Fortran added by Jim Kleckner.
  65  * 1984 Ed Pelegri-Llopart added C typedefs.
  66  * 1985 Emacs TAGS format by Richard Stallman.
  67  * 1989 Sam Kendall added C++.
  68  * 1992 Joseph B. Wells improved C and C++ parsing.
  69  * 1993 Francesco Potortì reorganised C and C++.
  70  * 1994 Line-by-line regexp tags by Tom Tromey.
  71  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  72  * 2002 #line directives by Francesco Potortì.
  73  *
  74  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  75  */
  76
  77 /*
  78  * If you want to add support for a new language, start by looking at the LUA
  79  * language, which is the simplest.  Alternatively, consider shipping a
  80  * configuration file containing regexp definitions for etags.
  81  */
  82
  83 char pot_etags_version[] = "@(#) pot revision number is 17.34";
  84
  85 #define TRUE    1
  86 #define FALSE   0
  87
  88 #ifdef DEBUG
  89 #  undef DEBUG
  90 #  define DEBUG TRUE
  91 #else
  92 #  define DEBUG  FALSE
  93 #  define NDEBUG                /* disable assert */
  94 #endif
  95
  96 #ifdef HAVE_CONFIG_H
  97 # include <config.h>
  98   /* On some systems, Emacs defines static as nothing for the sake
  99      of unexec.  We don't want that here since we don't use unexec. */
 100 # undef static
 101 # ifndef PTR                    /* for XEmacs */
 102 #   define PTR void *
 103 # endif
 104 # ifndef __P                    /* for XEmacs */
 105 #   define __P(args) args
 106 # endif
 107 #else  /* no config.h */
 108 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 109 #   define __P(args) args       /* use prototypes */
 110 #   define PTR void *           /* for generic pointers */
 111 # else /* not standard C */
 112 #   define __P(args) ()         /* no prototypes */
 113 #   define const                /* remove const for old compilers' sake */
 114 #   define PTR long *           /* don't use void* */
 115 # endif
 116 #endif /* !HAVE_CONFIG_H */
 117
 118 #ifndef _GNU_SOURCE
 119 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 120 #endif
 121
 122 /* WIN32_NATIVE is for XEmacs.
 123    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 124 #ifdef WIN32_NATIVE
 125 # undef MSDOS
 126 # undef  WINDOWSNT
 127 # define WINDOWSNT
 128 #endif /* WIN32_NATIVE */
 129
 130 #ifdef MSDOS
 131 # undef MSDOS
 132 # define MSDOS TRUE
 133 # include <fcntl.h>
 134 # include <sys/param.h>
 135 # include <io.h>
 136 # ifndef HAVE_CONFIG_H
 137 #   define DOS_NT
 138 #   include <sys/config.h>
 139 # endif
 140 #else
 141 # define MSDOS FALSE
 142 #endif /* MSDOS */
 143
 144 #ifdef WINDOWSNT
 145 # include <stdlib.h>
 146 # include <fcntl.h>
 147 # include <string.h>
 148 # include <direct.h>
 149 # include <io.h>
 150 # define MAXPATHLEN _MAX_PATH
 151 # undef HAVE_NTGUI
 152 # undef  DOS_NT
 153 # define DOS_NT
 154 # ifndef HAVE_GETCWD
 155 #   define HAVE_GETCWD
 156 # endif /* undef HAVE_GETCWD */
 157 #else /* not WINDOWSNT */
 158 # ifdef STDC_HEADERS
 159 #  include <stdlib.h>
 160 #  include <string.h>
 161 # else /* no standard C headers */
 162    extern char *getenv ();
 163    extern char *strcpy ();
 164    extern char *strncpy ();
 165    extern char *strcat ();
 166    extern char *strncat ();
 167    extern unsigned long strlen ();
 168    extern PTR malloc ();
 169    extern PTR realloc ();
 170 #  ifdef VMS
 171 #   define EXIT_SUCCESS 1
 172 #   define EXIT_FAILURE 0
 173 #  else /* no VMS */
 174 #   define EXIT_SUCCESS 0
 175 #   define EXIT_FAILURE 1
 176 #  endif
 177 # endif
 178 #endif /* !WINDOWSNT */
 179
 180 #ifdef HAVE_UNISTD_H
 181 # include <unistd.h>
 182 #else
 183 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 184     extern char *getcwd (char *buf, size_t size);
 185 # endif
 186 #endif /* HAVE_UNISTD_H */
 187
 188 #include <stdio.h>
 189 #include <ctype.h>
 190 #include <errno.h>
 191 #ifndef errno
 192   extern int errno;
 193 #endif
 194 #include <sys/types.h>
 195 #include <sys/stat.h>
 196
 197 #include <assert.h>
 198 #ifdef NDEBUG
 199 # undef  assert                 /* some systems have a buggy assert.h */
 200 # define assert(x) ((void) 0)
 201 #endif
 202
 203 #if !defined (S_ISREG) && defined (S_IFREG)
 204 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 205 #endif
 206
 207 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 208 # define NO_LONG_OPTIONS TRUE
 209 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 210   extern char *optarg;
 211   extern int optind, opterr;
 212 #else
 213 # define NO_LONG_OPTIONS FALSE
 214 # include <getopt.h>
 215 #endif /* NO_LONG_OPTIONS */
 216
 217 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 218 # ifdef __CYGWIN__              /* compiling on Cygwin */
 219                              !!! NOTICE !!!
 220  the regex.h distributed with Cygwin is not compatible with etags, alas!
 221 If you want regular expression support, you should delete this notice and
 222               arrange to use the GNU regex.h and regex.c.
 223 # endif
 224 #endif
 225 #include <regex.h>
 226
 227 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 228  Leave it undefined to make the program "etags", which makes emacs-style
 229  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 230 #ifdef CTAGS
 231 # undef  CTAGS
 232 # define CTAGS TRUE
 233 #else
 234 # define CTAGS FALSE
 235 #endif
 236
 237 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 238 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 239 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 240 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 241
 242 #define CHARS 256               /* 2^sizeof(char) */
 243 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 244 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 245 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 246 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 247 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 248 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 249
 250 #define ISALNUM(c)      isalnum (CHAR(c))
 251 #define ISALPHA(c)      isalpha (CHAR(c))
 252 #define ISDIGIT(c)      isdigit (CHAR(c))
 253 #define ISLOWER(c)      islower (CHAR(c))
 254
 255 #define lowcase(c)      tolower (CHAR(c))
 256 #define upcase(c)       toupper (CHAR(c))
 257
 258
 259 /*
 260  *      xnew, xrnew -- allocate, reallocate storage
 261  *
 262  * SYNOPSIS:    Type *xnew (int n, Type);
 263  *              void xrnew (OldPointer, int n, Type);
 264  */
 265 #if DEBUG
 266 # include "chkmalloc.h"
 267 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 268                                                   (n) * sizeof (Type)))
 269 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 270                                         (char *) (op), (n) * sizeof (Type)))
 271 #else
 272 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 273 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 274                                         (char *) (op), (n) * sizeof (Type)))
 275 #endif
 276
 277 #define bool int
 278
 279 typedef void Lang_function __P((FILE *));
 280
 281 typedef struct
 282 {
 283   char *suffix;                 /* file name suffix for this compressor */
 284   char *command;                /* takes one arg and decompresses to stdout */
 285 } compressor;
 286
 287 typedef struct
 288 {
 289   char *name;                   /* language name */
 290   char *help;                   /* detailed help for the language */
 291   Lang_function *function;      /* parse function */
 292   char **suffixes;              /* name suffixes of this language's files */
 293   char **filenames;             /* names of this language's files */
 294   char **interpreters;          /* interpreters for this language */
 295   bool metasource;              /* source used to generate other sources */
 296 } language;
 297
 298 typedef struct fdesc
 299 {
 300   struct fdesc *next;           /* for the linked list */
 301   char *infname;                /* uncompressed input file name */
 302   char *infabsname;             /* absolute uncompressed input file name */
 303   char *infabsdir;              /* absolute dir of input file */
 304   char *taggedfname;            /* file name to write in tagfile */
 305   language *lang;               /* language of file */
 306   char *prop;                   /* file properties to write in tagfile */
 307   bool usecharno;               /* etags tags shall contain char number */
 308   bool written;                 /* entry written in the tags file */
 309 } fdesc;
 310
 311 typedef struct node_st
 312 {                               /* sorting structure */
 313   struct node_st *left, *right; /* left and right sons */
 314   fdesc *fdp;                   /* description of file to whom tag belongs */
 315   char *name;                   /* tag name */
 316   char *regex;                  /* search regexp */
 317   bool valid;                   /* write this tag on the tag file */
 318   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 319   bool been_warned;             /* warning already given for duplicated tag */
 320   int lno;                      /* line number tag is on */
 321   long cno;                     /* character number line starts on */
 322 } node;
 323
 324 /*
 325  * A `linebuffer' is a structure which holds a line of text.
 326  * `readline_internal' reads a line from a stream into a linebuffer
 327  * and works regardless of the length of the line.
 328  * SIZE is the size of BUFFER, LEN is the length of the string in
 329  * BUFFER after readline reads it.
 330  */
 331 typedef struct
 332 {
 333   long size;
 334   int len;
 335   char *buffer;
 336 } linebuffer;
 337
 338 /* Used to support mixing of --lang and file names. */
 339 typedef struct
 340 {
 341   enum {
 342     at_language,                /* a language specification */
 343     at_regexp,                  /* a regular expression */
 344     at_filename,                /* a file name */
 345     at_stdin,                   /* read from stdin here */
 346     at_end                      /* stop parsing the list */
 347   } arg_type;                   /* argument type */
 348   language *lang;               /* language associated with the argument */
 349   char *what;                   /* the argument itself */
 350 } argument;
 351
 352 /* Structure defining a regular expression. */
 353 typedef struct regexp
 354 {
 355   struct regexp *p_next;        /* pointer to next in list */
 356   language *lang;               /* if set, use only for this language */
 357   char *pattern;                /* the regexp pattern */
 358   char *name;                   /* tag name */
 359   struct re_pattern_buffer *pat; /* the compiled pattern */
 360   struct re_registers regs;     /* re registers */
 361   bool error_signaled;          /* already signaled for this regexp */
 362   bool force_explicit_name;     /* do not allow implict tag name */
 363   bool ignore_case;             /* ignore case when matching */
 364   bool multi_line;              /* do a multi-line match on the whole file */
 365 } regexp;
 366
 367
 368 /* Many compilers barf on this:
 369         Lang_function Ada_funcs;
 370    so let's write it this way */
 371 static void Ada_funcs __P((FILE *));
 372 static void Asm_labels __P((FILE *));
 373 static void C_entries __P((int c_ext, FILE *));
 374 static void default_C_entries __P((FILE *));
 375 static void plain_C_entries __P((FILE *));
 376 static void Cjava_entries __P((FILE *));
 377 static void Cobol_paragraphs __P((FILE *));
 378 static void Cplusplus_entries __P((FILE *));
 379 static void Cstar_entries __P((FILE *));
 380 static void Erlang_functions __P((FILE *));
 381 static void Forth_words __P((FILE *));
 382 static void Fortran_functions __P((FILE *));
 383 static void HTML_labels __P((FILE *));
 384 static void Lisp_functions __P((FILE *));
 385 static void Lua_functions __P((FILE *));
 386 static void Makefile_targets __P((FILE *));
 387 static void Pascal_functions __P((FILE *));
 388 static void Perl_functions __P((FILE *));
 389 static void PHP_functions __P((FILE *));
 390 static void PS_functions __P((FILE *));
 391 static void Prolog_functions __P((FILE *));
 392 static void Python_functions __P((FILE *));
 393 static void Scheme_functions __P((FILE *));
 394 static void TeX_commands __P((FILE *));
 395 static void Texinfo_nodes __P((FILE *));
 396 static void Yacc_entries __P((FILE *));
 397 static void just_read_file __P((FILE *));
 398
 399 static void print_language_names __P((void));
 400 static void print_version __P((void));
 401 static void print_help __P((argument *));
 402 int main __P((int, char **));
 403
 404 static compressor *get_compressor_from_suffix __P((char *, char **));
 405 static language *get_language_from_langname __P((const char *));
 406 static language *get_language_from_interpreter __P((char *));
 407 static language *get_language_from_filename __P((char *, bool));
 408 static void readline __P((linebuffer *, FILE *));
 409 static long readline_internal __P((linebuffer *, FILE *));
 410 static bool nocase_tail __P((char *));
 411 static void get_tag __P((char *, char **));
 412
 413 static void analyse_regex __P((char *));
 414 static void free_regexps __P((void));
 415 static void regex_tag_multiline __P((void));
 416 static void error __P((const char *, const char *));
 417 static void suggest_asking_for_help __P((void));
 418 void fatal __P((char *, char *));
 419 static void pfatal __P((char *));
 420 static void add_node __P((node *, node **));
 421
 422 static void init __P((void));
 423 static void process_file_name __P((char *, language *));
 424 static void process_file __P((FILE *, char *, language *));
 425 static void find_entries __P((FILE *));
 426 static void free_tree __P((node *));
 427 static void free_fdesc __P((fdesc *));
 428 static void pfnote __P((char *, bool, char *, int, int, long));
 429 static void make_tag __P((char *, int, bool, char *, int, int, long));
 430 static void invalidate_nodes __P((fdesc *, node **));
 431 static void put_entries __P((node *));
 432
 433 static char *concat __P((char *, char *, char *));
 434 static char *skip_spaces __P((char *));
 435 static char *skip_non_spaces __P((char *));
 436 static char *savenstr __P((char *, int));
 437 static char *savestr __P((char *));
 438 static char *etags_strchr __P((const char *, int));
 439 static char *etags_strrchr __P((const char *, int));
 440 static int etags_strcasecmp __P((const char *, const char *));
 441 static int etags_strncasecmp __P((const char *, const char *, int));
 442 static char *etags_getcwd __P((void));
 443 static char *relative_filename __P((char *, char *));
 444 static char *absolute_filename __P((char *, char *));
 445 static char *absolute_dirname __P((char *, char *));
 446 static bool filename_is_absolute __P((char *f));
 447 static void canonicalize_filename __P((char *));
 448 static void linebuffer_init __P((linebuffer *));
 449 static void linebuffer_setlen __P((linebuffer *, int));
 450 static PTR xmalloc __P((unsigned int));
 451 static PTR xrealloc __P((char *, unsigned int));
 452
 453 \f
 454 static char searchar = '/';     /* use /.../ searches */
 455
 456 static char *tagfile;           /* output file */
 457 static char *progname;          /* name this program was invoked with */
 458 static char *cwd;               /* current working directory */
 459 static char *tagfiledir;        /* directory of tagfile */
 460 static FILE *tagf;              /* ioptr for tags file */
 461
 462 static fdesc *fdhead;           /* head of file description list */
 463 static fdesc *curfdp;           /* current file description */
 464 static int lineno;              /* line number of current line */
 465 static long charno;             /* current character number */
 466 static long linecharno;         /* charno of start of current line */
 467 static char *dbp;               /* pointer to start of current tag */
 468
 469 static const int invalidcharno = -1;
 470
 471 static node *nodehead;          /* the head of the binary tree of tags */
 472 static node *last_node;         /* the last node created */
 473
 474 static linebuffer lb;           /* the current line */
 475 static linebuffer filebuf;      /* a buffer containing the whole file */
 476 static linebuffer token_name;   /* a buffer containing a tag name */
 477
 478 /* boolean "functions" (see init)       */
 479 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 480 static char
 481   /* white chars */
 482   *white = " \f\t\n\r\v",
 483   /* not in a name */
 484   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 485   /* token ending chars */
 486   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 487   /* token starting chars */
 488   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 489   /* valid in-token chars */
 490   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 491
 492 static bool append_to_tagfile;  /* -a: append to tags */
 493 /* The next five default to TRUE for etags, but to FALSE for ctags.  */
 494 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 495 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 496                                 /* 0 struct/enum/union decls, and C++ */
 497                                 /* member functions. */
 498 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 499                                 /* constants and variables. */
 500                                 /* -D: opposite of -d.  Default under ctags. */
 501 static bool globals;            /* create tags for global variables */
 502 static bool members;            /* create tags for C member variables */
 503 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 504 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 505 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 506 static bool update;             /* -u: update tags */
 507 static bool vgrind_style;       /* -v: create vgrind style index output */
 508 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 509 static bool cxref_style;        /* -x: create cxref style output */
 510 static bool cplusplus;          /* .[hc] means C++, not C */
 511 static bool ignoreindent;       /* -I: ignore indentation in C */
 512 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 513
 514 /* STDIN is defined in LynxOS system headers */
 515 #ifdef STDIN
 516 # undef STDIN
 517 #endif
 518
 519 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 520 static bool parsing_stdin;      /* --parse-stdin used */
 521
 522 static regexp *p_head;          /* list of all regexps */
 523 static bool need_filebuf;       /* some regexes are multi-line */
 524
 525 static struct option longopts[] =
 526 {
 527   { "append",             no_argument,       NULL,               'a'   },
 528   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 529   { "c++",                no_argument,       NULL,               'C'   },
 530   { "declarations",       no_argument,       &declarations,      TRUE  },
 531   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 532   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 533   { "help",               no_argument,       NULL,               'h'   },
 534   { "help",               no_argument,       NULL,               'H'   },
 535   { "ignore-indentation", no_argument,       NULL,               'I'   },
 536   { "language",           required_argument, NULL,               'l'   },
 537   { "members",            no_argument,       &members,           TRUE  },
 538   { "no-members",         no_argument,       &members,           FALSE },
 539   { "output",             required_argument, NULL,               'o'   },
 540   { "regex",              required_argument, NULL,               'r'   },
 541   { "no-regex",           no_argument,       NULL,               'R'   },
 542   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 543   { "parse-stdin",        required_argument, NULL,               STDIN },
 544   { "version",            no_argument,       NULL,               'V'   },
 545
 546 #if CTAGS /* Ctags options */
 547   { "backward-search",    no_argument,       NULL,               'B'   },
 548   { "cxref",              no_argument,       NULL,               'x'   },
 549   { "defines",            no_argument,       NULL,               'd'   },
 550   { "globals",            no_argument,       &globals,           TRUE  },
 551   { "typedefs",           no_argument,       NULL,               't'   },
 552   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 553   { "update",             no_argument,       NULL,               'u'   },
 554   { "vgrind",             no_argument,       NULL,               'v'   },
 555   { "no-warn",            no_argument,       NULL,               'w'   },
 556
 557 #else /* Etags options */
 558   { "no-defines",         no_argument,       NULL,               'D'   },
 559   { "no-globals",         no_argument,       &globals,           FALSE },
 560   { "include",            required_argument, NULL,               'i'   },
 561 #endif
 562   { NULL }
 563 };
 564
 565 static compressor compressors[] =
 566 {
 567   { "z", "gzip -d -c"},
 568   { "Z", "gzip -d -c"},
 569   { "gz", "gzip -d -c"},
 570   { "GZ", "gzip -d -c"},
 571   { "bz2", "bzip2 -d -c" },
 572   { NULL }
 573 };
 574
 575 /*
 576  * Language stuff.
 577  */
 578
 579 /* Ada code */
 580 static char *Ada_suffixes [] =
 581   { "ads", "adb", "ada", NULL };
 582 static char Ada_help [] =
 583 "In Ada code, functions, procedures, packages, tasks and types are\n\
 584 tags.  Use the `--packages-only' option to create tags for\n\
 585 packages only.\n\
 586 Ada tag names have suffixes indicating the type of entity:\n\
 587         Entity type:    Qualifier:\n\
 588         ------------    ----------\n\
 589         function        /f\n\
 590         procedure       /p\n\
 591         package spec    /s\n\
 592         package body    /b\n\
 593         type            /t\n\
 594         task            /k\n\
 595 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 596 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 597 will just search for any tag `bidule'.";
 598
 599 /* Assembly code */
 600 static char *Asm_suffixes [] =
 601   { "a",        /* Unix assembler */
 602     "asm", /* Microcontroller assembly */
 603     "def", /* BSO/Tasking definition includes  */
 604     "inc", /* Microcontroller include files */
 605     "ins", /* Microcontroller include files */
 606     "s", "sa", /* Unix assembler */
 607     "S",   /* cpp-processed Unix assembler */
 608     "src", /* BSO/Tasking C compiler output */
 609     NULL
 610   };
 611 static char Asm_help [] =
 612 "In assembler code, labels appearing at the beginning of a line,\n\
 613 followed by a colon, are tags.";
 614
 615
 616 /* Note that .c and .h can be considered C++, if the --c++ flag was
 617    given, or if the `class' or `template' keyowrds are met inside the file.
 618    That is why default_C_entries is called for these. */
 619 static char *default_C_suffixes [] =
 620   { "c", "h", NULL };
 621 static char default_C_help [] =
 622 "In C code, any C function or typedef is a tag, and so are\n\
 623 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 624 definitions and `enum' constants are tags unless you specify\n\
 625 `--no-defines'.  Global variables are tags unless you specify\n\
 626 `--no-globals' and so are struct members unless you specify\n\
 627 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 628 `--no-members' can make the tags table file much smaller.\n\
 629 You can tag function declarations and external variables by\n\
 630 using `--declarations'.";
 631
 632 static char *Cplusplus_suffixes [] =
 633   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 634     "M",                        /* Objective C++ */
 635     "pdb",                      /* Postscript with C syntax */
 636     NULL };
 637 static char Cplusplus_help [] =
 638 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 639 --help --lang=c --lang=c++ for full help.)\n\
 640 In addition to C tags, member functions are also recognized.  Member\n\
 641 variables are recognized unless you use the `--no-members' option.\n\
 642 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 643 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 644 `operator+'.";
 645
 646 static char *Cjava_suffixes [] =
 647   { "java", NULL };
 648 static char Cjava_help [] =
 649 "In Java code, all the tags constructs of C and C++ code are\n\
 650 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 651
 652
 653 static char *Cobol_suffixes [] =
 654   { "COB", "cob", NULL };
 655 static char Cobol_help [] =
 656 "In Cobol code, tags are paragraph names; that is, any word\n\
 657 starting in column 8 and followed by a period.";
 658
 659 static char *Cstar_suffixes [] =
 660   { "cs", "hs", NULL };
 661
 662 static char *Erlang_suffixes [] =
 663   { "erl", "hrl", NULL };
 664 static char Erlang_help [] =
 665 "In Erlang code, the tags are the functions, records and macros\n\
 666 defined in the file.";
 667
 668 char *Forth_suffixes [] =
 669   { "fth", "tok", NULL };
 670 static char Forth_help [] =
 671 "In Forth code, tags are words defined by `:',\n\
 672 constant, code, create, defer, value, variable, buffer:, field.";
 673
 674 static char *Fortran_suffixes [] =
 675   { "F", "f", "f90", "for", NULL };
 676 static char Fortran_help [] =
 677 "In Fortran code, functions, subroutines and block data are tags.";
 678
 679 static char *HTML_suffixes [] =
 680   { "htm", "html", "shtml", NULL };
 681 static char HTML_help [] =
 682 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 683 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 684 occurrences of `id='.";
 685
 686 static char *Lisp_suffixes [] =
 687   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 688 static char Lisp_help [] =
 689 "In Lisp code, any function defined with `defun', any variable\n\
 690 defined with `defvar' or `defconst', and in general the first\n\
 691 argument of any expression that starts with `(def' in column zero\n\
 692 is a tag.";
 693
 694 static char *Lua_suffixes [] =
 695   { "lua", "LUA", NULL };
 696 static char Lua_help [] =
 697 "In Lua scripts, all functions are tags.";
 698
 699 static char *Makefile_filenames [] =
 700   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 701 static char Makefile_help [] =
 702 "In makefiles, targets are tags; additionally, variables are tags\n\
 703 unless you specify `--no-globals'.";
 704
 705 static char *Objc_suffixes [] =
 706   { "lm",                       /* Objective lex file */
 707     "m",                        /* Objective C file */
 708      NULL };
 709 static char Objc_help [] =
 710 "In Objective C code, tags include Objective C definitions for classes,\n\
 711 class categories, methods and protocols.  Tags for variables and\n\
 712 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 713 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 714
 715 static char *Pascal_suffixes [] =
 716   { "p", "pas", NULL };
 717 static char Pascal_help [] =
 718 "In Pascal code, the tags are the functions and procedures defined\n\
 719 in the file.";
 720 /* " // this is for working around an Emacs highlighting bug... */
 721
 722 static char *Perl_suffixes [] =
 723   { "pl", "pm", NULL };
 724 static char *Perl_interpreters [] =
 725   { "perl", "@PERL@", NULL };
 726 static char Perl_help [] =
 727 "In Perl code, the tags are the packages, subroutines and variables\n\
 728 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 729 `--globals' if you want to tag global variables.  Tags for\n\
 730 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 731 defined in the default package is `main::SUB'.";
 732
 733 static char *PHP_suffixes [] =
 734   { "php", "php3", "php4", NULL };
 735 static char PHP_help [] =
 736 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 737 the `--no-members' option, vars are tags too.";
 738
 739 static char *plain_C_suffixes [] =
 740   { "pc",                       /* Pro*C file */
 741      NULL };
 742
 743 static char *PS_suffixes [] =
 744   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 745 static char PS_help [] =
 746 "In PostScript code, the tags are the functions.";
 747
 748 static char *Prolog_suffixes [] =
 749   { "prolog", NULL };
 750 static char Prolog_help [] =
 751 "In Prolog code, tags are predicates and rules at the beginning of\n\
 752 line.";
 753
 754 static char *Python_suffixes [] =
 755   { "py", NULL };
 756 static char Python_help [] =
 757 "In Python code, `def' or `class' at the beginning of a line\n\
 758 generate a tag.";
 759
 760 /* Can't do the `SCM' or `scm' prefix with a version number. */
 761 static char *Scheme_suffixes [] =
 762   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 763 static char Scheme_help [] =
 764 "In Scheme code, tags include anything defined with `def' or with a\n\
 765 construct whose name starts with `def'.  They also include\n\
 766 variables set with `set!' at top level in the file.";
 767
 768 static char *TeX_suffixes [] =
 769   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 770 static char TeX_help [] =
 771 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 772 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 773 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 774 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 775 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 776 \n\
 777 Other commands can be specified by setting the environment variable\n\
 778 `TEXTAGS' to a colon-separated list like, for example,\n\
 779      TEXTAGS=\"mycommand:myothercommand\".";
 780
 781
 782 static char *Texinfo_suffixes [] =
 783   { "texi", "texinfo", "txi", NULL };
 784 static char Texinfo_help [] =
 785 "for texinfo files, lines starting with @node are tagged.";
 786
 787 static char *Yacc_suffixes [] =
 788   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 789 static char Yacc_help [] =
 790 "In Bison or Yacc input files, each rule defines as a tag the\n\
 791 nonterminal it constructs.  The portions of the file that contain\n\
 792 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 793 for full help).";
 794
 795 static char auto_help [] =
 796 "`auto' is not a real language, it indicates to use\n\
 797 a default language for files base on file name suffix and file contents.";
 798
 799 static char none_help [] =
 800 "`none' is not a real language, it indicates to only do\n\
 801 regexp processing on files.";
 802
 803 static char no_lang_help [] =
 804 "No detailed help available for this language.";
 805
 806
 807 /*
 808  * Table of languages.
 809  *
 810  * It is ok for a given function to be listed under more than one
 811  * name.  I just didn't.
 812  */
 813
 814 static language lang_names [] =
 815 {
 816   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 817   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 818   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 819   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 820   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 821   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 822   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 823   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 824   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 825   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 826   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 827   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 828   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 829   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 830   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 831   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 832   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 833   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 834   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 835   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 836   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 837   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 838   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 839   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 840   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 841   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 842   { "auto",      auto_help },                      /* default guessing scheme */
 843   { "none",      none_help,      just_read_file }, /* regexp matching only */
 844   { NULL }                /* end of list */
 845 };
 846
 847 \f
 848 static void
 849 print_language_names ()
 850 {
 851   language *lang;
 852   char **name, **ext;
 853
 854   puts ("\nThese are the currently supported languages, along with the\n\
 855 default file names and dot suffixes:");
 856   for (lang = lang_names; lang->name != NULL; lang++)
 857     {
 858       printf ("  %-*s", 10, lang->name);
 859       if (lang->filenames != NULL)
 860         for (name = lang->filenames; *name != NULL; name++)
 861           printf (" %s", *name);
 862       if (lang->suffixes != NULL)
 863         for (ext = lang->suffixes; *ext != NULL; ext++)
 864           printf (" .%s", *ext);
 865       puts ("");
 866     }
 867   puts ("where `auto' means use default language for files based on file\n\
 868 name suffix, and `none' means only do regexp processing on files.\n\
 869 If no language is specified and no matching suffix is found,\n\
 870 the first line of the file is read for a sharp-bang (#!) sequence\n\
 871 followed by the name of an interpreter.  If no such sequence is found,\n\
 872 Fortran is tried first; if no tags are found, C is tried next.\n\
 873 When parsing any C file, a \"class\" or \"template\" keyword\n\
 874 switches to C++.");
 875   puts ("Compressed files are supported using gzip and bzip2.\n\
 876 \n\
 877 For detailed help on a given language use, for example,\n\
 878 etags --help --lang=ada.");
 879 }
 880
 881 #ifndef EMACS_NAME
 882 # define EMACS_NAME "standalone"
 883 #endif
 884 #ifndef VERSION
 885 # define VERSION "17.34"
 886 #endif
 887 static void
 888 print_version ()
 889 {
 890   /* Makes it easier to update automatically. */
 891   char emacs_copyright[] = "Copyright (C) 2007 Free Software Foundation, Inc.";
 892
 893   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 894   puts (emacs_copyright);
 895   puts ("This program is distributed under the terms in ETAGS.README");
 896
 897   exit (EXIT_SUCCESS);
 898 }
 899
 900 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 901 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 902 #endif
 903
 904 static void
 905 print_help (argbuffer)
 906      argument *argbuffer;
 907 {
 908   bool help_for_lang = FALSE;
 909
 910   for (; argbuffer->arg_type != at_end; argbuffer++)
 911     if (argbuffer->arg_type == at_language)
 912       {
 913         if (help_for_lang)
 914           puts ("");
 915         puts (argbuffer->lang->help);
 916         help_for_lang = TRUE;
 917       }
 918
 919   if (help_for_lang)
 920     exit (EXIT_SUCCESS);
 921
 922   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 923 \n\
 924 These are the options accepted by %s.\n", progname, progname);
 925   if (NO_LONG_OPTIONS)
 926     puts ("WARNING: long option names do not work with this executable,\n\
 927 as it is not linked with GNU getopt.");
 928   else
 929     puts ("You may use unambiguous abbreviations for the long option names.");
 930   puts ("  A - as file name means read names from stdin (one per line).\n\
 931 Absolute names are stored in the output file as they are.\n\
 932 Relative ones are stored relative to the output file's directory.\n");
 933
 934   puts ("-a, --append\n\
 935         Append tag entries to existing tags file.");
 936
 937   puts ("--packages-only\n\
 938         For Ada files, only generate tags for packages.");
 939
 940   if (CTAGS)
 941     puts ("-B, --backward-search\n\
 942         Write the search commands for the tag entries using '?', the\n\
 943         backward-search command instead of '/', the forward-search command.");
 944
 945   /* This option is mostly obsolete, because etags can now automatically
 946      detect C++.  Retained for backward compatibility and for debugging and
 947      experimentation.  In principle, we could want to tag as C++ even
 948      before any "class" or "template" keyword.
 949   puts ("-C, --c++\n\
 950         Treat files whose name suffix defaults to C language as C++ files.");
 951   */
 952
 953   puts ("--declarations\n\
 954         In C and derived languages, create tags for function declarations,");
 955   if (CTAGS)
 956     puts ("\tand create tags for extern variables if --globals is used.");
 957   else
 958     puts
 959       ("\tand create tags for extern variables unless --no-globals is used.");
 960
 961   if (CTAGS)
 962     puts ("-d, --defines\n\
 963         Create tag entries for C #define constants and enum constants, too.");
 964   else
 965     puts ("-D, --no-defines\n\
 966         Don't create tag entries for C #define constants and enum constants.\n\
 967         This makes the tags file smaller.");
 968
 969   if (!CTAGS)
 970     puts ("-i FILE, --include=FILE\n\
 971         Include a note in tag file indicating that, when searching for\n\
 972         a tag, one should also consult the tags file FILE after\n\
 973         checking the current file.");
 974
 975   puts ("-l LANG, --language=LANG\n\
 976         Force the following files to be considered as written in the\n\
 977         named language up to the next --language=LANG option.");
 978
 979   if (CTAGS)
 980     puts ("--globals\n\
 981         Create tag entries for global variables in some languages.");
 982   else
 983     puts ("--no-globals\n\
 984         Do not create tag entries for global variables in some\n\
 985         languages.  This makes the tags file smaller.");
 986
 987   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 988     puts ("--no-line-directive\n\
 989         Ignore #line preprocessor directives in C and derived languages.");
 990
 991   if (CTAGS)
 992     puts ("--members\n\
 993         Create tag entries for members of structures in some languages.");
 994   else
 995     puts ("--no-members\n\
 996         Do not create tag entries for members of structures\n\
 997         in some languages.");
 998
 999   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1000         Make a tag for each line matching a regular expression pattern\n\
1001         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1002         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
1003         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1004         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
1005   puts ("       If TAGNAME/ is present, the tags created are named.\n\
1006         For example Tcl named tags can be created with:\n\
1007           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1008         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1009         `m' means to allow multi-line matches, `s' implies `m' and\n\
1010         causes dot to match any character, including newline.");
1011
1012   puts ("-R, --no-regex\n\
1013         Don't create tags from regexps for the following files.");
1014
1015   puts ("-I, --ignore-indentation\n\
1016         In C and C++ do not assume that a closing brace in the first\n\
1017         column is the final brace of a function or structure definition.");
1018
1019   puts ("-o FILE, --output=FILE\n\
1020         Write the tags to FILE.");
1021
1022   puts ("--parse-stdin=NAME\n\
1023         Read from standard input and record tags as belonging to file NAME.");
1024
1025   if (CTAGS)
1026     {
1027       puts ("-t, --typedefs\n\
1028         Generate tag entries for C and Ada typedefs.");
1029       puts ("-T, --typedefs-and-c++\n\
1030         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1031         and C++ member functions.");
1032     }
1033
1034   if (CTAGS)
1035     puts ("-u, --update\n\
1036         Update the tag entries for the given files, leaving tag\n\
1037         entries for other files in place.  Currently, this is\n\
1038         implemented by deleting the existing entries for the given\n\
1039         files and then rewriting the new entries at the end of the\n\
1040         tags file.  It is often faster to simply rebuild the entire\n\
1041         tag file than to use this.");
1042
1043   if (CTAGS)
1044     {
1045       puts ("-v, --vgrind\n\
1046         Print on the standard output an index of items intended for\n\
1047         human consumption, similar to the output of vgrind.  The index\n\
1048         is sorted, and gives the page number of each item.");
1049
1050       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1051         puts ("-w, --no-duplicates\n\
1052         Do not create duplicate tag entries, for compatibility with\n\
1053         traditional ctags.");
1054
1055       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1056         puts ("-w, --no-warn\n\
1057         Suppress warning messages about duplicate tag entries.");
1058
1059       puts ("-x, --cxref\n\
1060         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1061         The output uses line numbers instead of page numbers, but\n\
1062         beyond that the differences are cosmetic; try both to see\n\
1063         which you like.");
1064     }
1065
1066   puts ("-V, --version\n\
1067         Print the version of the program.\n\
1068 -h, --help\n\
1069         Print this help message.\n\
1070         Followed by one or more `--language' options prints detailed\n\
1071         help about tag generation for the specified languages.");
1072
1073   print_language_names ();
1074
1075   puts ("");
1076   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1077
1078   exit (EXIT_SUCCESS);
1079 }
1080
1081 \f
1082 #ifdef VMS                      /* VMS specific functions */
1083
1084 #define EOS     '\0'
1085
1086 /* This is a BUG!  ANY arbitrary limit is a BUG!
1087    Won't someone please fix this?  */
1088 #define MAX_FILE_SPEC_LEN       255
1089 typedef struct  {
1090   short   curlen;
1091   char    body[MAX_FILE_SPEC_LEN + 1];
1092 } vspec;
1093
1094 /*
1095  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1096  returning in each successive call the next file name matching the input
1097  spec. The function expects that each in_spec passed
1098  to it will be processed to completion; in particular, up to and
1099  including the call following that in which the last matching name
1100  is returned, the function ignores the value of in_spec, and will
1101  only start processing a new spec with the following call.
1102  If an error occurs, on return out_spec contains the value
1103  of in_spec when the error occurred.
1104
1105  With each successive file name returned in out_spec, the
1106  function's return value is one. When there are no more matching
1107  names the function returns zero. If on the first call no file
1108  matches in_spec, or there is any other error, -1 is returned.
1109 */
1110
1111 #include        <rmsdef.h>
1112 #include        <descrip.h>
1113 #define         OUTSIZE MAX_FILE_SPEC_LEN
1114 static short
1115 fn_exp (out, in)
1116      vspec *out;
1117      char *in;
1118 {
1119   static long context = 0;
1120   static struct dsc$descriptor_s o;
1121   static struct dsc$descriptor_s i;
1122   static bool pass1 = TRUE;
1123   long status;
1124   short retval;
1125
1126   if (pass1)
1127     {
1128       pass1 = FALSE;
1129       o.dsc$a_pointer = (char *) out;
1130       o.dsc$w_length = (short)OUTSIZE;
1131       i.dsc$a_pointer = in;
1132       i.dsc$w_length = (short)strlen(in);
1133       i.dsc$b_dtype = DSC$K_DTYPE_T;
1134       i.dsc$b_class = DSC$K_CLASS_S;
1135       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1136       o.dsc$b_class = DSC$K_CLASS_VS;
1137     }
1138   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1139     {
1140       out->body[out->curlen] = EOS;
1141       return 1;
1142     }
1143   else if (status == RMS$_NMF)
1144     retval = 0;
1145   else
1146     {
1147       strcpy(out->body, in);
1148       retval = -1;
1149     }
1150   lib$find_file_end(&context);
1151   pass1 = TRUE;
1152   return retval;
1153 }
1154
1155 /*
1156   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1157   name of each file specified by the provided arg expanding wildcards.
1158 */
1159 static char *
1160 gfnames (arg, p_error)
1161      char *arg;
1162      bool *p_error;
1163 {
1164   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1165
1166   switch (fn_exp (&filename, arg))
1167     {
1168     case 1:
1169       *p_error = FALSE;
1170       return filename.body;
1171     case 0:
1172       *p_error = FALSE;
1173       return NULL;
1174     default:
1175       *p_error = TRUE;
1176       return filename.body;
1177     }
1178 }
1179
1180 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1181 system (cmd)
1182      char *cmd;
1183 {
1184   error ("%s", "system() function not implemented under VMS");
1185 }
1186 #endif
1187
1188 #define VERSION_DELIM   ';'
1189 char *massage_name (s)
1190      char *s;
1191 {
1192   char *start = s;
1193
1194   for ( ; *s; s++)
1195     if (*s == VERSION_DELIM)
1196       {
1197         *s = EOS;
1198         break;
1199       }
1200     else
1201       *s = lowcase (*s);
1202   return start;
1203 }
1204 #endif /* VMS */
1205
1206 \f
1207 int
1208 main (argc, argv)
1209      int argc;
1210      char *argv[];
1211 {
1212   int i;
1213   unsigned int nincluded_files;
1214   char **included_files;
1215   argument *argbuffer;
1216   int current_arg, file_count;
1217   linebuffer filename_lb;
1218   bool help_asked = FALSE;
1219 #ifdef VMS
1220   bool got_err;
1221 #endif
1222  char *optstring;
1223  int opt;
1224
1225
1226 #ifdef DOS_NT
1227   _fmode = O_BINARY;   /* all of files are treated as binary files */
1228 #endif /* DOS_NT */
1229
1230   progname = argv[0];
1231   nincluded_files = 0;
1232   included_files = xnew (argc, char *);
1233   current_arg = 0;
1234   file_count = 0;
1235
1236   /* Allocate enough no matter what happens.  Overkill, but each one
1237      is small. */
1238   argbuffer = xnew (argc, argument);
1239
1240   /*
1241    * If etags, always find typedefs and structure tags.  Why not?
1242    * Also default to find macro constants, enum constants, struct
1243    * members and global variables.
1244    */
1245   if (!CTAGS)
1246     {
1247       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1248       globals = members = TRUE;
1249     }
1250
1251   /* When the optstring begins with a '-' getopt_long does not rearrange the
1252      non-options arguments to be at the end, but leaves them alone. */
1253   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1254                       "ac:Cf:Il:o:r:RSVhH",
1255                       (CTAGS) ? "BxdtTuvw" : "Di:");
1256
1257   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1258     switch (opt)
1259       {
1260       case 0:
1261         /* If getopt returns 0, then it has already processed a
1262            long-named option.  We should do nothing.  */
1263         break;
1264
1265       case 1:
1266         /* This means that a file name has been seen.  Record it. */
1267         argbuffer[current_arg].arg_type = at_filename;
1268         argbuffer[current_arg].what     = optarg;
1269         ++current_arg;
1270         ++file_count;
1271         break;
1272
1273       case STDIN:
1274         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1275         argbuffer[current_arg].arg_type = at_stdin;
1276         argbuffer[current_arg].what     = optarg;
1277         ++current_arg;
1278         ++file_count;
1279         if (parsing_stdin)
1280           fatal ("cannot parse standard input more than once", (char *)NULL);
1281         parsing_stdin = TRUE;
1282         break;
1283
1284         /* Common options. */
1285       case 'a': append_to_tagfile = TRUE;       break;
1286       case 'C': cplusplus = TRUE;               break;
1287       case 'f':         /* for compatibility with old makefiles */
1288       case 'o':
1289         if (tagfile)
1290           {
1291             error ("-o option may only be given once.", (char *)NULL);
1292             suggest_asking_for_help ();
1293             /* NOTREACHED */
1294           }
1295         tagfile = optarg;
1296         break;
1297       case 'I':
1298       case 'S':         /* for backward compatibility */
1299         ignoreindent = TRUE;
1300         break;
1301       case 'l':
1302         {
1303           language *lang = get_language_from_langname (optarg);
1304           if (lang != NULL)
1305             {
1306               argbuffer[current_arg].lang = lang;
1307               argbuffer[current_arg].arg_type = at_language;
1308               ++current_arg;
1309             }
1310         }
1311         break;
1312       case 'c':
1313         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1314         optarg = concat (optarg, "i", ""); /* memory leak here */
1315         /* FALLTHRU */
1316       case 'r':
1317         argbuffer[current_arg].arg_type = at_regexp;
1318         argbuffer[current_arg].what = optarg;
1319         ++current_arg;
1320         break;
1321       case 'R':
1322         argbuffer[current_arg].arg_type = at_regexp;
1323         argbuffer[current_arg].what = NULL;
1324         ++current_arg;
1325         break;
1326       case 'V':
1327         print_version ();
1328         break;
1329       case 'h':
1330       case 'H':
1331         help_asked = TRUE;
1332         break;
1333
1334         /* Etags options */
1335       case 'D': constantypedefs = FALSE;                        break;
1336       case 'i': included_files[nincluded_files++] = optarg;     break;
1337
1338         /* Ctags options. */
1339       case 'B': searchar = '?';                                 break;
1340       case 'd': constantypedefs = TRUE;                         break;
1341       case 't': typedefs = TRUE;                                break;
1342       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1343       case 'u': update = TRUE;                                  break;
1344       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1345       case 'x': cxref_style = TRUE;                             break;
1346       case 'w': no_warnings = TRUE;                             break;
1347       default:
1348         suggest_asking_for_help ();
1349         /* NOTREACHED */
1350       }
1351
1352   /* No more options.  Store the rest of arguments. */
1353   for (; optind < argc; optind++)
1354     {
1355       argbuffer[current_arg].arg_type = at_filename;
1356       argbuffer[current_arg].what = argv[optind];
1357       ++current_arg;
1358       ++file_count;
1359     }
1360
1361   argbuffer[current_arg].arg_type = at_end;
1362
1363   if (help_asked)
1364     print_help (argbuffer);
1365     /* NOTREACHED */
1366
1367   if (nincluded_files == 0 && file_count == 0)
1368     {
1369       error ("no input files specified.", (char *)NULL);
1370       suggest_asking_for_help ();
1371       /* NOTREACHED */
1372     }
1373
1374   if (tagfile == NULL)
1375     tagfile = CTAGS ? "tags" : "TAGS";
1376   cwd = etags_getcwd ();        /* the current working directory */
1377   if (cwd[strlen (cwd) - 1] != '/')
1378     {
1379       char *oldcwd = cwd;
1380       cwd = concat (oldcwd, "/", "");
1381       free (oldcwd);
1382     }
1383   /* Relative file names are made relative to the current directory. */
1384   if (streq (tagfile, "-")
1385       || strneq (tagfile, "/dev/", 5))
1386     tagfiledir = cwd;
1387   else
1388     tagfiledir = absolute_dirname (tagfile, cwd);
1389
1390   init ();                      /* set up boolean "functions" */
1391
1392   linebuffer_init (&lb);
1393   linebuffer_init (&filename_lb);
1394   linebuffer_init (&filebuf);
1395   linebuffer_init (&token_name);
1396
1397   if (!CTAGS)
1398     {
1399       if (streq (tagfile, "-"))
1400         {
1401           tagf = stdout;
1402 #ifdef DOS_NT
1403           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1404              doesn't take effect until after `stdout' is already open). */
1405           if (!isatty (fileno (stdout)))
1406             setmode (fileno (stdout), O_BINARY);
1407 #endif /* DOS_NT */
1408         }
1409       else
1410         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1411       if (tagf == NULL)
1412         pfatal (tagfile);
1413     }
1414
1415   /*
1416    * Loop through files finding functions.
1417    */
1418   for (i = 0; i < current_arg; i++)
1419     {
1420       static language *lang;    /* non-NULL if language is forced */
1421       char *this_file;
1422
1423       switch (argbuffer[i].arg_type)
1424         {
1425         case at_language:
1426           lang = argbuffer[i].lang;
1427           break;
1428         case at_regexp:
1429           analyse_regex (argbuffer[i].what);
1430           break;
1431         case at_filename:
1432 #ifdef VMS
1433           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1434             {
1435               if (got_err)
1436                 {
1437                   error ("can't find file %s\n", this_file);
1438                   argc--, argv++;
1439                 }
1440               else
1441                 {
1442                   this_file = massage_name (this_file);
1443                 }
1444 #else
1445               this_file = argbuffer[i].what;
1446 #endif
1447               /* Input file named "-" means read file names from stdin
1448                  (one per line) and use them. */
1449               if (streq (this_file, "-"))
1450                 {
1451                   if (parsing_stdin)
1452                     fatal ("cannot parse standard input AND read file names from it",
1453                            (char *)NULL);
1454                   while (readline_internal (&filename_lb, stdin) > 0)
1455                     process_file_name (filename_lb.buffer, lang);
1456                 }
1457               else
1458                 process_file_name (this_file, lang);
1459 #ifdef VMS
1460             }
1461 #endif
1462           break;
1463         case at_stdin:
1464           this_file = argbuffer[i].what;
1465           process_file (stdin, this_file, lang);
1466           break;
1467         }
1468     }
1469
1470   free_regexps ();
1471   free (lb.buffer);
1472   free (filebuf.buffer);
1473   free (token_name.buffer);
1474
1475   if (!CTAGS || cxref_style)
1476     {
1477       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1478       put_entries (nodehead);
1479       free_tree (nodehead);
1480       nodehead = NULL;
1481       if (!CTAGS)
1482         {
1483           fdesc *fdp;
1484
1485           /* Output file entries that have no tags. */
1486           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1487             if (!fdp->written)
1488               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1489
1490           while (nincluded_files-- > 0)
1491             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1492
1493           if (fclose (tagf) == EOF)
1494             pfatal (tagfile);
1495         }
1496
1497       exit (EXIT_SUCCESS);
1498     }
1499
1500   if (update)
1501     {
1502       char cmd[BUFSIZ];
1503       for (i = 0; i < current_arg; ++i)
1504         {
1505           switch (argbuffer[i].arg_type)
1506             {
1507             case at_filename:
1508             case at_stdin:
1509               break;
1510             default:
1511               continue;         /* the for loop */
1512             }
1513           sprintf (cmd,
1514                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1515                    tagfile, argbuffer[i].what, tagfile);
1516           if (system (cmd) != EXIT_SUCCESS)
1517             fatal ("failed to execute shell command", (char *)NULL);
1518         }
1519       append_to_tagfile = TRUE;
1520     }
1521
1522   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1523   if (tagf == NULL)
1524     pfatal (tagfile);
1525   put_entries (nodehead);       /* write all the tags (CTAGS) */
1526   free_tree (nodehead);
1527   nodehead = NULL;
1528   if (fclose (tagf) == EOF)
1529     pfatal (tagfile);
1530
1531   if (CTAGS)
1532     if (append_to_tagfile || update)
1533       {
1534         char cmd[2*BUFSIZ+20];
1535         /* Maybe these should be used:
1536            setenv ("LC_COLLATE", "C", 1);
1537            setenv ("LC_ALL", "C", 1); */
1538         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1539         exit (system (cmd));
1540       }
1541   return EXIT_SUCCESS;
1542 }
1543
1544
1545 /*
1546  * Return a compressor given the file name.  If EXTPTR is non-zero,
1547  * return a pointer into FILE where the compressor-specific
1548  * extension begins.  If no compressor is found, NULL is returned
1549  * and EXTPTR is not significant.
1550  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1551  */
1552 static compressor *
1553 get_compressor_from_suffix (file, extptr)
1554      char *file;
1555      char **extptr;
1556 {
1557   compressor *compr;
1558   char *slash, *suffix;
1559
1560   /* This relies on FN to be after canonicalize_filename,
1561      so we don't need to consider backslashes on DOS_NT.  */
1562   slash = etags_strrchr (file, '/');
1563   suffix = etags_strrchr (file, '.');
1564   if (suffix == NULL || suffix < slash)
1565     return NULL;
1566   if (extptr != NULL)
1567     *extptr = suffix;
1568   suffix += 1;
1569   /* Let those poor souls who live with DOS 8+3 file name limits get
1570      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1571      Only the first do loop is run if not MSDOS */
1572   do
1573     {
1574       for (compr = compressors; compr->suffix != NULL; compr++)
1575         if (streq (compr->suffix, suffix))
1576           return compr;
1577       if (!MSDOS)
1578         break;                  /* do it only once: not really a loop */
1579       if (extptr != NULL)
1580         *extptr = ++suffix;
1581     } while (*suffix != '\0');
1582   return NULL;
1583 }
1584
1585
1586
1587 /*
1588  * Return a language given the name.
1589  */
1590 static language *
1591 get_language_from_langname (name)
1592      const char *name;
1593 {
1594   language *lang;
1595
1596   if (name == NULL)
1597     error ("empty language name", (char *)NULL);
1598   else
1599     {
1600       for (lang = lang_names; lang->name != NULL; lang++)
1601         if (streq (name, lang->name))
1602           return lang;
1603       error ("unknown language \"%s\"", name);
1604     }
1605
1606   return NULL;
1607 }
1608
1609
1610 /*
1611  * Return a language given the interpreter name.
1612  */
1613 static language *
1614 get_language_from_interpreter (interpreter)
1615      char *interpreter;
1616 {
1617   language *lang;
1618   char **iname;
1619
1620   if (interpreter == NULL)
1621     return NULL;
1622   for (lang = lang_names; lang->name != NULL; lang++)
1623     if (lang->interpreters != NULL)
1624       for (iname = lang->interpreters; *iname != NULL; iname++)
1625         if (streq (*iname, interpreter))
1626             return lang;
1627
1628   return NULL;
1629 }
1630
1631
1632
1633 /*
1634  * Return a language given the file name.
1635  */
1636 static language *
1637 get_language_from_filename (file, case_sensitive)
1638      char *file;
1639      bool case_sensitive;
1640 {
1641   language *lang;
1642   char **name, **ext, *suffix;
1643
1644   /* Try whole file name first. */
1645   for (lang = lang_names; lang->name != NULL; lang++)
1646     if (lang->filenames != NULL)
1647       for (name = lang->filenames; *name != NULL; name++)
1648         if ((case_sensitive)
1649             ? streq (*name, file)
1650             : strcaseeq (*name, file))
1651           return lang;
1652
1653   /* If not found, try suffix after last dot. */
1654   suffix = etags_strrchr (file, '.');
1655   if (suffix == NULL)
1656     return NULL;
1657   suffix += 1;
1658   for (lang = lang_names; lang->name != NULL; lang++)
1659     if (lang->suffixes != NULL)
1660       for (ext = lang->suffixes; *ext != NULL; ext++)
1661         if ((case_sensitive)
1662             ? streq (*ext, suffix)
1663             : strcaseeq (*ext, suffix))
1664           return lang;
1665   return NULL;
1666 }
1667
1668 \f
1669 /*
1670  * This routine is called on each file argument.
1671  */
1672 static void
1673 process_file_name (file, lang)
1674      char *file;
1675      language *lang;
1676 {
1677   struct stat stat_buf;
1678   FILE *inf;
1679   fdesc *fdp;
1680   compressor *compr;
1681   char *compressed_name, *uncompressed_name;
1682   char *ext, *real_name;
1683   int retval;
1684
1685   canonicalize_filename (file);
1686   if (streq (file, tagfile) && !streq (tagfile, "-"))
1687     {
1688       error ("skipping inclusion of %s in self.", file);
1689       return;
1690     }
1691   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1692     {
1693       compressed_name = NULL;
1694       real_name = uncompressed_name = savestr (file);
1695     }
1696   else
1697     {
1698       real_name = compressed_name = savestr (file);
1699       uncompressed_name = savenstr (file, ext - file);
1700     }
1701
1702   /* If the canonicalized uncompressed name
1703      has already been dealt with, skip it silently. */
1704   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1705     {
1706       assert (fdp->infname != NULL);
1707       if (streq (uncompressed_name, fdp->infname))
1708         goto cleanup;
1709     }
1710
1711   if (stat (real_name, &stat_buf) != 0)
1712     {
1713       /* Reset real_name and try with a different name. */
1714       real_name = NULL;
1715       if (compressed_name != NULL) /* try with the given suffix */
1716         {
1717           if (stat (uncompressed_name, &stat_buf) == 0)
1718             real_name = uncompressed_name;
1719         }
1720       else                      /* try all possible suffixes */
1721         {
1722           for (compr = compressors; compr->suffix != NULL; compr++)
1723             {
1724               compressed_name = concat (file, ".", compr->suffix);
1725               if (stat (compressed_name, &stat_buf) != 0)
1726                 {
1727                   if (MSDOS)
1728                     {
1729                       char *suf = compressed_name + strlen (file);
1730                       size_t suflen = strlen (compr->suffix) + 1;
1731                       for ( ; suf[1]; suf++, suflen--)
1732                         {
1733                           memmove (suf, suf + 1, suflen);
1734                           if (stat (compressed_name, &stat_buf) == 0)
1735                             {
1736                               real_name = compressed_name;
1737                               break;
1738                             }
1739                         }
1740                       if (real_name != NULL)
1741                         break;
1742                     } /* MSDOS */
1743                   free (compressed_name);
1744                   compressed_name = NULL;
1745                 }
1746               else
1747                 {
1748                   real_name = compressed_name;
1749                   break;
1750                 }
1751             }
1752         }
1753       if (real_name == NULL)
1754         {
1755           perror (file);
1756           goto cleanup;
1757         }
1758     } /* try with a different name */
1759
1760   if (!S_ISREG (stat_buf.st_mode))
1761     {
1762       error ("skipping %s: it is not a regular file.", real_name);
1763       goto cleanup;
1764     }
1765   if (real_name == compressed_name)
1766     {
1767       char *cmd = concat (compr->command, " ", real_name);
1768       inf = (FILE *) popen (cmd, "r");
1769       free (cmd);
1770     }
1771   else
1772     inf = fopen (real_name, "r");
1773   if (inf == NULL)
1774     {
1775       perror (real_name);
1776       goto cleanup;
1777     }
1778
1779   process_file (inf, uncompressed_name, lang);
1780
1781   if (real_name == compressed_name)
1782     retval = pclose (inf);
1783   else
1784     retval = fclose (inf);
1785   if (retval < 0)
1786     pfatal (file);
1787
1788  cleanup:
1789   if (compressed_name) free (compressed_name);
1790   if (uncompressed_name) free (uncompressed_name);
1791   last_node = NULL;
1792   curfdp = NULL;
1793   return;
1794 }
1795
1796 static void
1797 process_file (fh, fn, lang)
1798      FILE *fh;
1799      char *fn;
1800      language *lang;
1801 {
1802   static const fdesc emptyfdesc;
1803   fdesc *fdp;
1804
1805   /* Create a new input file description entry. */
1806   fdp = xnew (1, fdesc);
1807   *fdp = emptyfdesc;
1808   fdp->next = fdhead;
1809   fdp->infname = savestr (fn);
1810   fdp->lang = lang;
1811   fdp->infabsname = absolute_filename (fn, cwd);
1812   fdp->infabsdir = absolute_dirname (fn, cwd);
1813   if (filename_is_absolute (fn))
1814     {
1815       /* An absolute file name.  Canonicalize it. */
1816       fdp->taggedfname = absolute_filename (fn, NULL);
1817     }
1818   else
1819     {
1820       /* A file name relative to cwd.  Make it relative
1821          to the directory of the tags file. */
1822       fdp->taggedfname = relative_filename (fn, tagfiledir);
1823     }
1824   fdp->usecharno = TRUE;        /* use char position when making tags */
1825   fdp->prop = NULL;
1826   fdp->written = FALSE;         /* not written on tags file yet */
1827
1828   fdhead = fdp;
1829   curfdp = fdhead;              /* the current file description */
1830
1831   find_entries (fh);
1832
1833   /* If not Ctags, and if this is not metasource and if it contained no #line
1834      directives, we can write the tags and free all nodes pointing to
1835      curfdp. */
1836   if (!CTAGS
1837       && curfdp->usecharno      /* no #line directives in this file */
1838       && !curfdp->lang->metasource)
1839     {
1840       node *np, *prev;
1841
1842       /* Look for the head of the sublist relative to this file.  See add_node
1843          for the structure of the node tree. */
1844       prev = NULL;
1845       for (np = nodehead; np != NULL; prev = np, np = np->left)
1846         if (np->fdp == curfdp)
1847           break;
1848
1849       /* If we generated tags for this file, write and delete them. */
1850       if (np != NULL)
1851         {
1852           /* This is the head of the last sublist, if any.  The following
1853              instructions depend on this being true. */
1854           assert (np->left == NULL);
1855
1856           assert (fdhead == curfdp);
1857           assert (last_node->fdp == curfdp);
1858           put_entries (np);     /* write tags for file curfdp->taggedfname */
1859           free_tree (np);       /* remove the written nodes */
1860           if (prev == NULL)
1861             nodehead = NULL;    /* no nodes left */
1862           else
1863             prev->left = NULL;  /* delete the pointer to the sublist */
1864         }
1865     }
1866 }
1867
1868 /*
1869  * This routine sets up the boolean pseudo-functions which work
1870  * by setting boolean flags dependent upon the corresponding character.
1871  * Every char which is NOT in that string is not a white char.  Therefore,
1872  * all of the array "_wht" is set to FALSE, and then the elements
1873  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1874  * of a char is TRUE if it is the string "white", else FALSE.
1875  */
1876 static void
1877 init ()
1878 {
1879   register char *sp;
1880   register int i;
1881
1882   for (i = 0; i < CHARS; i++)
1883     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1884   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1885   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1886   notinname('\0') = notinname('\n');
1887   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1888   begtoken('\0') = begtoken('\n');
1889   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1890   intoken('\0') = intoken('\n');
1891   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1892   endtoken('\0') = endtoken('\n');
1893 }
1894
1895 /*
1896  * This routine opens the specified file and calls the function
1897  * which finds the function and type definitions.
1898  */
1899 static void
1900 find_entries (inf)
1901      FILE *inf;
1902 {
1903   char *cp;
1904   language *lang = curfdp->lang;
1905   Lang_function *parser = NULL;
1906
1907   /* If user specified a language, use it. */
1908   if (lang != NULL && lang->function != NULL)
1909     {
1910       parser = lang->function;
1911     }
1912
1913   /* Else try to guess the language given the file name. */
1914   if (parser == NULL)
1915     {
1916       lang = get_language_from_filename (curfdp->infname, TRUE);
1917       if (lang != NULL && lang->function != NULL)
1918         {
1919           curfdp->lang = lang;
1920           parser = lang->function;
1921         }
1922     }
1923
1924   /* Else look for sharp-bang as the first two characters. */
1925   if (parser == NULL
1926       && readline_internal (&lb, inf) > 0
1927       && lb.len >= 2
1928       && lb.buffer[0] == '#'
1929       && lb.buffer[1] == '!')
1930     {
1931       char *lp;
1932
1933       /* Set lp to point at the first char after the last slash in the
1934          line or, if no slashes, at the first nonblank.  Then set cp to
1935          the first successive blank and terminate the string. */
1936       lp = etags_strrchr (lb.buffer+2, '/');
1937       if (lp != NULL)
1938         lp += 1;
1939       else
1940         lp = skip_spaces (lb.buffer + 2);
1941       cp = skip_non_spaces (lp);
1942       *cp = '\0';
1943
1944       if (strlen (lp) > 0)
1945         {
1946           lang = get_language_from_interpreter (lp);
1947           if (lang != NULL && lang->function != NULL)
1948             {
1949               curfdp->lang = lang;
1950               parser = lang->function;
1951             }
1952         }
1953     }
1954
1955   /* We rewind here, even if inf may be a pipe.  We fail if the
1956      length of the first line is longer than the pipe block size,
1957      which is unlikely. */
1958   rewind (inf);
1959
1960   /* Else try to guess the language given the case insensitive file name. */
1961   if (parser == NULL)
1962     {
1963       lang = get_language_from_filename (curfdp->infname, FALSE);
1964       if (lang != NULL && lang->function != NULL)
1965         {
1966           curfdp->lang = lang;
1967           parser = lang->function;
1968         }
1969     }
1970
1971   /* Else try Fortran or C. */
1972   if (parser == NULL)
1973     {
1974       node *old_last_node = last_node;
1975
1976       curfdp->lang = get_language_from_langname ("fortran");
1977       find_entries (inf);
1978
1979       if (old_last_node == last_node)
1980         /* No Fortran entries found.  Try C. */
1981         {
1982           /* We do not tag if rewind fails.
1983              Only the file name will be recorded in the tags file. */
1984           rewind (inf);
1985           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1986           find_entries (inf);
1987         }
1988       return;
1989     }
1990
1991   if (!no_line_directive
1992       && curfdp->lang != NULL && curfdp->lang->metasource)
1993     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1994        file, or anyway we parsed a file that is automatically generated from
1995        this one.  If this is the case, the bingo.c file contained #line
1996        directives that generated tags pointing to this file.  Let's delete
1997        them all before parsing this file, which is the real source. */
1998     {
1999       fdesc **fdpp = &fdhead;
2000       while (*fdpp != NULL)
2001         if (*fdpp != curfdp
2002             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
2003           /* We found one of those!  We must delete both the file description
2004              and all tags referring to it. */
2005           {
2006             fdesc *badfdp = *fdpp;
2007
2008             /* Delete the tags referring to badfdp->taggedfname
2009                that were obtained from badfdp->infname. */
2010             invalidate_nodes (badfdp, &nodehead);
2011
2012             *fdpp = badfdp->next; /* remove the bad description from the list */
2013             free_fdesc (badfdp);
2014           }
2015         else
2016           fdpp = &(*fdpp)->next; /* advance the list pointer */
2017     }
2018
2019   assert (parser != NULL);
2020
2021   /* Generic initialisations before reading from file. */
2022   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2023
2024   /* Generic initialisations before parsing file with readline. */
2025   lineno = 0;                  /* reset global line number */
2026   charno = 0;                  /* reset global char number */
2027   linecharno = 0;              /* reset global char number of line start */
2028
2029   parser (inf);
2030
2031   regex_tag_multiline ();
2032 }
2033
2034 \f
2035 /*
2036  * Check whether an implicitly named tag should be created,
2037  * then call `pfnote'.
2038  * NAME is a string that is internally copied by this function.
2039  *
2040  * TAGS format specification
2041  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2042  * The following is explained in some more detail in etc/ETAGS.EBNF.
2043  *
2044  * make_tag creates tags with "implicit tag names" (unnamed tags)
2045  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2046  *  1. NAME does not contain any of the characters in NONAM;
2047  *  2. LINESTART contains name as either a rightmost, or rightmost but
2048  *     one character, substring;
2049  *  3. the character, if any, immediately before NAME in LINESTART must
2050  *     be a character in NONAM;
2051  *  4. the character, if any, immediately after NAME in LINESTART must
2052  *     also be a character in NONAM.
2053  *
2054  * The implementation uses the notinname() macro, which recognises the
2055  * characters stored in the string `nonam'.
2056  * etags.el needs to use the same characters that are in NONAM.
2057  */
2058 static void
2059 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2060      char *name;                /* tag name, or NULL if unnamed */
2061      int namelen;               /* tag length */
2062      bool is_func;              /* tag is a function */
2063      char *linestart;           /* start of the line where tag is */
2064      int linelen;               /* length of the line where tag is */
2065      int lno;                   /* line number */
2066      long cno;                  /* character number */
2067 {
2068   bool named = (name != NULL && namelen > 0);
2069
2070   if (!CTAGS && named)          /* maybe set named to false */
2071     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2072        such that etags.el can guess a name from it. */
2073     {
2074       int i;
2075       register char *cp = name;
2076
2077       for (i = 0; i < namelen; i++)
2078         if (notinname (*cp++))
2079           break;
2080       if (i == namelen)                         /* rule #1 */
2081         {
2082           cp = linestart + linelen - namelen;
2083           if (notinname (linestart[linelen-1]))
2084             cp -= 1;                            /* rule #4 */
2085           if (cp >= linestart                   /* rule #2 */
2086               && (cp == linestart
2087                   || notinname (cp[-1]))        /* rule #3 */
2088               && strneq (name, cp, namelen))    /* rule #2 */
2089             named = FALSE;      /* use implicit tag name */
2090         }
2091     }
2092
2093   if (named)
2094     name = savenstr (name, namelen);
2095   else
2096     name = NULL;
2097   pfnote (name, is_func, linestart, linelen, lno, cno);
2098 }
2099
2100 /* Record a tag. */
2101 static void
2102 pfnote (name, is_func, linestart, linelen, lno, cno)
2103      char *name;                /* tag name, or NULL if unnamed */
2104      bool is_func;              /* tag is a function */
2105      char *linestart;           /* start of the line where tag is */
2106      int linelen;               /* length of the line where tag is */
2107      int lno;                   /* line number */
2108      long cno;                  /* character number */
2109 {
2110   register node *np;
2111
2112   assert (name == NULL || name[0] != '\0');
2113   if (CTAGS && name == NULL)
2114     return;
2115
2116   np = xnew (1, node);
2117
2118   /* If ctags mode, change name "main" to M<thisfilename>. */
2119   if (CTAGS && !cxref_style && streq (name, "main"))
2120     {
2121       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2122       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2123       fp = etags_strrchr (np->name, '.');
2124       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2125         fp[0] = '\0';
2126     }
2127   else
2128     np->name = name;
2129   np->valid = TRUE;
2130   np->been_warned = FALSE;
2131   np->fdp = curfdp;
2132   np->is_func = is_func;
2133   np->lno = lno;
2134   if (np->fdp->usecharno)
2135     /* Our char numbers are 0-base, because of C language tradition?
2136        ctags compatibility?  old versions compatibility?   I don't know.
2137        Anyway, since emacs's are 1-base we expect etags.el to take care
2138        of the difference.  If we wanted to have 1-based numbers, we would
2139        uncomment the +1 below. */
2140     np->cno = cno /* + 1 */ ;
2141   else
2142     np->cno = invalidcharno;
2143   np->left = np->right = NULL;
2144   if (CTAGS && !cxref_style)
2145     {
2146       if (strlen (linestart) < 50)
2147         np->regex = concat (linestart, "$", "");
2148       else
2149         np->regex = savenstr (linestart, 50);
2150     }
2151   else
2152     np->regex = savenstr (linestart, linelen);
2153
2154   add_node (np, &nodehead);
2155 }
2156
2157 /*
2158  * free_tree ()
2159  *      recurse on left children, iterate on right children.
2160  */
2161 static void
2162 free_tree (np)
2163      register node *np;
2164 {
2165   while (np)
2166     {
2167       register node *node_right = np->right;
2168       free_tree (np->left);
2169       if (np->name != NULL)
2170         free (np->name);
2171       free (np->regex);
2172       free (np);
2173       np = node_right;
2174     }
2175 }
2176
2177 /*
2178  * free_fdesc ()
2179  *      delete a file description
2180  */
2181 static void
2182 free_fdesc (fdp)
2183      register fdesc *fdp;
2184 {
2185   if (fdp->infname != NULL) free (fdp->infname);
2186   if (fdp->infabsname != NULL) free (fdp->infabsname);
2187   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2188   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2189   if (fdp->prop != NULL) free (fdp->prop);
2190   free (fdp);
2191 }
2192
2193 /*
2194  * add_node ()
2195  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2196  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2197  *      balancing.
2198  *
2199  *      add_node is the only function allowed to add nodes, so it can
2200  *      maintain state.
2201  */
2202 static void
2203 add_node (np, cur_node_p)
2204      node *np, **cur_node_p;
2205 {
2206   register int dif;
2207   register node *cur_node = *cur_node_p;
2208
2209   if (cur_node == NULL)
2210     {
2211       *cur_node_p = np;
2212       last_node = np;
2213       return;
2214     }
2215
2216   if (!CTAGS)
2217     /* Etags Mode */
2218     {
2219       /* For each file name, tags are in a linked sublist on the right
2220          pointer.  The first tags of different files are a linked list
2221          on the left pointer.  last_node points to the end of the last
2222          used sublist. */
2223       if (last_node != NULL && last_node->fdp == np->fdp)
2224         {
2225           /* Let's use the same sublist as the last added node. */
2226           assert (last_node->right == NULL);
2227           last_node->right = np;
2228           last_node = np;
2229         }
2230       else if (cur_node->fdp == np->fdp)
2231         {
2232           /* Scanning the list we found the head of a sublist which is
2233              good for us.  Let's scan this sublist. */
2234           add_node (np, &cur_node->right);
2235         }
2236       else
2237         /* The head of this sublist is not good for us.  Let's try the
2238            next one. */
2239         add_node (np, &cur_node->left);
2240     } /* if ETAGS mode */
2241
2242   else
2243     {
2244       /* Ctags Mode */
2245       dif = strcmp (np->name, cur_node->name);
2246
2247       /*
2248        * If this tag name matches an existing one, then
2249        * do not add the node, but maybe print a warning.
2250        */
2251       if (no_duplicates && !dif)
2252         {
2253           if (np->fdp == cur_node->fdp)
2254             {
2255               if (!no_warnings)
2256                 {
2257                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2258                            np->fdp->infname, lineno, np->name);
2259                   fprintf (stderr, "Second entry ignored\n");
2260                 }
2261             }
2262           else if (!cur_node->been_warned && !no_warnings)
2263             {
2264               fprintf
2265                 (stderr,
2266                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2267                  np->fdp->infname, cur_node->fdp->infname, np->name);
2268               cur_node->been_warned = TRUE;
2269             }
2270           return;
2271         }
2272
2273       /* Actually add the node */
2274       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2275     } /* if CTAGS mode */
2276 }
2277
2278 /*
2279  * invalidate_nodes ()
2280  *      Scan the node tree and invalidate all nodes pointing to the
2281  *      given file description (CTAGS case) or free them (ETAGS case).
2282  */
2283 static void
2284 invalidate_nodes (badfdp, npp)
2285      fdesc *badfdp;
2286      node **npp;
2287 {
2288   node *np = *npp;
2289
2290   if (np == NULL)
2291     return;
2292
2293   if (CTAGS)
2294     {
2295       if (np->left != NULL)
2296         invalidate_nodes (badfdp, &np->left);
2297       if (np->fdp == badfdp)
2298         np->valid = FALSE;
2299       if (np->right != NULL)
2300         invalidate_nodes (badfdp, &np->right);
2301     }
2302   else
2303     {
2304       assert (np->fdp != NULL);
2305       if (np->fdp == badfdp)
2306         {
2307           *npp = np->left;      /* detach the sublist from the list */
2308           np->left = NULL;      /* isolate it */
2309           free_tree (np);       /* free it */
2310           invalidate_nodes (badfdp, npp);
2311         }
2312       else
2313         invalidate_nodes (badfdp, &np->left);
2314     }
2315 }
2316
2317 \f
2318 static int total_size_of_entries __P((node *));
2319 static int number_len __P((long));
2320
2321 /* Length of a non-negative number's decimal representation. */
2322 static int
2323 number_len (num)
2324      long num;
2325 {
2326   int len = 1;
2327   while ((num /= 10) > 0)
2328     len += 1;
2329   return len;
2330 }
2331
2332 /*
2333  * Return total number of characters that put_entries will output for
2334  * the nodes in the linked list at the right of the specified node.
2335  * This count is irrelevant with etags.el since emacs 19.34 at least,
2336  * but is still supplied for backward compatibility.
2337  */
2338 static int
2339 total_size_of_entries (np)
2340      register node *np;
2341 {
2342   register int total = 0;
2343
2344   for (; np != NULL; np = np->right)
2345     if (np->valid)
2346       {
2347         total += strlen (np->regex) + 1;                /* pat\177 */
2348         if (np->name != NULL)
2349           total += strlen (np->name) + 1;               /* name\001 */
2350         total += number_len ((long) np->lno) + 1;       /* lno, */
2351         if (np->cno != invalidcharno)                   /* cno */
2352           total += number_len (np->cno);
2353         total += 1;                                     /* newline */
2354       }
2355
2356   return total;
2357 }
2358
2359 static void
2360 put_entries (np)
2361      register node *np;
2362 {
2363   register char *sp;
2364   static fdesc *fdp = NULL;
2365
2366   if (np == NULL)
2367     return;
2368
2369   /* Output subentries that precede this one */
2370   if (CTAGS)
2371     put_entries (np->left);
2372
2373   /* Output this entry */
2374   if (np->valid)
2375     {
2376       if (!CTAGS)
2377         {
2378           /* Etags mode */
2379           if (fdp != np->fdp)
2380             {
2381               fdp = np->fdp;
2382               fprintf (tagf, "\f\n%s,%d\n",
2383                        fdp->taggedfname, total_size_of_entries (np));
2384               fdp->written = TRUE;
2385             }
2386           fputs (np->regex, tagf);
2387           fputc ('\177', tagf);
2388           if (np->name != NULL)
2389             {
2390               fputs (np->name, tagf);
2391               fputc ('\001', tagf);
2392             }
2393           fprintf (tagf, "%d,", np->lno);
2394           if (np->cno != invalidcharno)
2395             fprintf (tagf, "%ld", np->cno);
2396           fputs ("\n", tagf);
2397         }
2398       else
2399         {
2400           /* Ctags mode */
2401           if (np->name == NULL)
2402             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2403
2404           if (cxref_style)
2405             {
2406               if (vgrind_style)
2407                 fprintf (stdout, "%s %s %d\n",
2408                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2409               else
2410                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2411                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2412             }
2413           else
2414             {
2415               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2416
2417               if (np->is_func)
2418                 {               /* function or #define macro with args */
2419                   putc (searchar, tagf);
2420                   putc ('^', tagf);
2421
2422                   for (sp = np->regex; *sp; sp++)
2423                     {
2424                       if (*sp == '\\' || *sp == searchar)
2425                         putc ('\\', tagf);
2426                       putc (*sp, tagf);
2427                     }
2428                   putc (searchar, tagf);
2429                 }
2430               else
2431                 {               /* anything else; text pattern inadequate */
2432                   fprintf (tagf, "%d", np->lno);
2433                 }
2434               putc ('\n', tagf);
2435             }
2436         }
2437     } /* if this node contains a valid tag */
2438
2439   /* Output subentries that follow this one */
2440   put_entries (np->right);
2441   if (!CTAGS)
2442     put_entries (np->left);
2443 }
2444
2445 \f
2446 /* C extensions. */
2447 #define C_EXT   0x00fff         /* C extensions */
2448 #define C_PLAIN 0x00000         /* C */
2449 #define C_PLPL  0x00001         /* C++ */
2450 #define C_STAR  0x00003         /* C* */
2451 #define C_JAVA  0x00005         /* JAVA */
2452 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2453 #define YACC    0x10000         /* yacc file */
2454
2455 /*
2456  * The C symbol tables.
2457  */
2458 enum sym_type
2459 {
2460   st_none,
2461   st_C_objprot, st_C_objimpl, st_C_objend,
2462   st_C_gnumacro,
2463   st_C_ignore, st_C_attribute,
2464   st_C_javastruct,
2465   st_C_operator,
2466   st_C_class, st_C_template,
2467   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2468 };
2469
2470 static unsigned int hash __P((const char *, unsigned int));
2471 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2472 static enum sym_type C_symtype __P((char *, int, int));
2473
2474 /* Feed stuff between (but not including) %[ and %] lines to:
2475      gperf -m 5
2476 %[
2477 %compare-strncmp
2478 %enum
2479 %struct-type
2480 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2481 %%
2482 if,             0,                      st_C_ignore
2483 for,            0,                      st_C_ignore
2484 while,          0,                      st_C_ignore
2485 switch,         0,                      st_C_ignore
2486 return,         0,                      st_C_ignore
2487 __attribute__,  0,                      st_C_attribute
2488 @interface,     0,                      st_C_objprot
2489 @protocol,      0,                      st_C_objprot
2490 @implementation,0,                      st_C_objimpl
2491 @end,           0,                      st_C_objend
2492 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2493 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2494 friend,         C_PLPL,                 st_C_ignore
2495 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2496 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2497 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2498 class,          0,                      st_C_class
2499 namespace,      C_PLPL,                 st_C_struct
2500 domain,         C_STAR,                 st_C_struct
2501 union,          0,                      st_C_struct
2502 struct,         0,                      st_C_struct
2503 extern,         0,                      st_C_extern
2504 enum,           0,                      st_C_enum
2505 typedef,        0,                      st_C_typedef
2506 define,         0,                      st_C_define
2507 undef,          0,                      st_C_define
2508 operator,       C_PLPL,                 st_C_operator
2509 template,       0,                      st_C_template
2510 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2511 DEFUN,          0,                      st_C_gnumacro
2512 SYSCALL,        0,                      st_C_gnumacro
2513 ENTRY,          0,                      st_C_gnumacro
2514 PSEUDO,         0,                      st_C_gnumacro
2515 # These are defined inside C functions, so currently they are not met.
2516 # EXFUN used in glibc, DEFVAR_* in emacs.
2517 #EXFUN,         0,                      st_C_gnumacro
2518 #DEFVAR_,       0,                      st_C_gnumacro
2519 %]
2520 and replace lines between %< and %> with its output, then:
2521  - remove the #if characterset check
2522  - make in_word_set static and not inline. */
2523 /*%<*/
2524 /* C code produced by gperf version 3.0.1 */
2525 /* Command-line: gperf -m 5  */
2526 /* Computed positions: -k'2-3' */
2527
2528 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2529 /* maximum key range = 33, duplicates = 0 */
2530
2531 #ifdef __GNUC__
2532 __inline
2533 #else
2534 #ifdef __cplusplus
2535 inline
2536 #endif
2537 #endif
2538 static unsigned int
2539 hash (str, len)
2540      register const char *str;
2541      register unsigned int len;
2542 {
2543   static unsigned char asso_values[] =
2544     {
2545       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2546       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2547       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2548       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2549       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2550       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2551       35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2552       14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2553       35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2554       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2555       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2556        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2557        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2558       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2559       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2560       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2561       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2562       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2563       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2564       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2565       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2566       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2567       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2568       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2569       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2570       35, 35, 35, 35, 35, 35
2571     };
2572   register int hval = len;
2573
2574   switch (hval)
2575     {
2576       default:
2577         hval += asso_values[(unsigned char)str[2]];
2578       /*FALLTHROUGH*/
2579       case 2:
2580         hval += asso_values[(unsigned char)str[1]];
2581         break;
2582     }
2583   return hval;
2584 }
2585
2586 static struct C_stab_entry *
2587 in_word_set (str, len)
2588      register const char *str;
2589      register unsigned int len;
2590 {
2591   enum
2592     {
2593       TOTAL_KEYWORDS = 32,
2594       MIN_WORD_LENGTH = 2,
2595       MAX_WORD_LENGTH = 15,
2596       MIN_HASH_VALUE = 2,
2597       MAX_HASH_VALUE = 34
2598     };
2599
2600   static struct C_stab_entry wordlist[] =
2601     {
2602       {""}, {""},
2603       {"if",            0,                      st_C_ignore},
2604       {""},
2605       {"@end",          0,                      st_C_objend},
2606       {"union",         0,                      st_C_struct},
2607       {"define",                0,                      st_C_define},
2608       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2609       {"template",      0,                      st_C_template},
2610       {"operator",      C_PLPL,                 st_C_operator},
2611       {"@interface",    0,                      st_C_objprot},
2612       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2613       {"friend",                C_PLPL,                 st_C_ignore},
2614       {"typedef",       0,                      st_C_typedef},
2615       {"return",                0,                      st_C_ignore},
2616       {"@implementation",0,                     st_C_objimpl},
2617       {"@protocol",     0,                      st_C_objprot},
2618       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2619       {"extern",                0,                      st_C_extern},
2620       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2621       {"struct",                0,                      st_C_struct},
2622       {"domain",                C_STAR,                 st_C_struct},
2623       {"switch",                0,                      st_C_ignore},
2624       {"enum",          0,                      st_C_enum},
2625       {"for",           0,                      st_C_ignore},
2626       {"namespace",     C_PLPL,                 st_C_struct},
2627       {"class",         0,                      st_C_class},
2628       {"while",         0,                      st_C_ignore},
2629       {"undef",         0,                      st_C_define},
2630       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2631       {"__attribute__", 0,                      st_C_attribute},
2632       {"SYSCALL",       0,                      st_C_gnumacro},
2633       {"ENTRY",         0,                      st_C_gnumacro},
2634       {"PSEUDO",                0,                      st_C_gnumacro},
2635       {"DEFUN",         0,                      st_C_gnumacro}
2636     };
2637
2638   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2639     {
2640       register int key = hash (str, len);
2641
2642       if (key <= MAX_HASH_VALUE && key >= 0)
2643         {
2644           register const char *s = wordlist[key].name;
2645
2646           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2647             return &wordlist[key];
2648         }
2649     }
2650   return 0;
2651 }
2652 /*%>*/
2653
2654 static enum sym_type
2655 C_symtype (str, len, c_ext)
2656      char *str;
2657      int len;
2658      int c_ext;
2659 {
2660   register struct C_stab_entry *se = in_word_set (str, len);
2661
2662   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2663     return st_none;
2664   return se->type;
2665 }
2666
2667 \f
2668 /*
2669  * Ignoring __attribute__ ((list))
2670  */
2671 static bool inattribute;        /* looking at an __attribute__ construct */
2672
2673 /*
2674  * C functions and variables are recognized using a simple
2675  * finite automaton.  fvdef is its state variable.
2676  */
2677 static enum
2678 {
2679   fvnone,                       /* nothing seen */
2680   fdefunkey,                    /* Emacs DEFUN keyword seen */
2681   fdefunname,                   /* Emacs DEFUN name seen */
2682   foperator,                    /* func: operator keyword seen (cplpl) */
2683   fvnameseen,                   /* function or variable name seen */
2684   fstartlist,                   /* func: just after open parenthesis */
2685   finlist,                      /* func: in parameter list */
2686   flistseen,                    /* func: after parameter list */
2687   fignore,                      /* func: before open brace */
2688   vignore                       /* var-like: ignore until ';' */
2689 } fvdef;
2690
2691 static bool fvextern;           /* func or var: extern keyword seen; */
2692
2693 /*
2694  * typedefs are recognized using a simple finite automaton.
2695  * typdef is its state variable.
2696  */
2697 static enum
2698 {
2699   tnone,                        /* nothing seen */
2700   tkeyseen,                     /* typedef keyword seen */
2701   ttypeseen,                    /* defined type seen */
2702   tinbody,                      /* inside typedef body */
2703   tend,                         /* just before typedef tag */
2704   tignore                       /* junk after typedef tag */
2705 } typdef;
2706
2707 /*
2708  * struct-like structures (enum, struct and union) are recognized
2709  * using another simple finite automaton.  `structdef' is its state
2710  * variable.
2711  */
2712 static enum
2713 {
2714   snone,                        /* nothing seen yet,
2715                                    or in struct body if bracelev > 0 */
2716   skeyseen,                     /* struct-like keyword seen */
2717   stagseen,                     /* struct-like tag seen */
2718   scolonseen                    /* colon seen after struct-like tag */
2719 } structdef;
2720
2721 /*
2722  * When objdef is different from onone, objtag is the name of the class.
2723  */
2724 static char *objtag = "<uninited>";
2725
2726 /*
2727  * Yet another little state machine to deal with preprocessor lines.
2728  */
2729 static enum
2730 {
2731   dnone,                        /* nothing seen */
2732   dsharpseen,                   /* '#' seen as first char on line */
2733   ddefineseen,                  /* '#' and 'define' seen */
2734   dignorerest                   /* ignore rest of line */
2735 } definedef;
2736
2737 /*
2738  * State machine for Objective C protocols and implementations.
2739  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2740  */
2741 static enum
2742 {
2743   onone,                        /* nothing seen */
2744   oprotocol,                    /* @interface or @protocol seen */
2745   oimplementation,              /* @implementations seen */
2746   otagseen,                     /* class name seen */
2747   oparenseen,                   /* parenthesis before category seen */
2748   ocatseen,                     /* category name seen */
2749   oinbody,                      /* in @implementation body */
2750   omethodsign,                  /* in @implementation body, after +/- */
2751   omethodtag,                   /* after method name */
2752   omethodcolon,                 /* after method colon */
2753   omethodparm,                  /* after method parameter */
2754   oignore                       /* wait for @end */
2755 } objdef;
2756
2757
2758 /*
2759  * Use this structure to keep info about the token read, and how it
2760  * should be tagged.  Used by the make_C_tag function to build a tag.
2761  */
2762 static struct tok
2763 {
2764   char *line;                   /* string containing the token */
2765   int offset;                   /* where the token starts in LINE */
2766   int length;                   /* token length */
2767   /*
2768     The previous members can be used to pass strings around for generic
2769     purposes.  The following ones specifically refer to creating tags.  In this
2770     case the token contained here is the pattern that will be used to create a
2771     tag.
2772   */
2773   bool valid;                   /* do not create a tag; the token should be
2774                                    invalidated whenever a state machine is
2775                                    reset prematurely */
2776   bool named;                   /* create a named tag */
2777   int lineno;                   /* source line number of tag */
2778   long linepos;                 /* source char number of tag */
2779 } token;                        /* latest token read */
2780
2781 /*
2782  * Variables and functions for dealing with nested structures.
2783  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2784  */
2785 static void pushclass_above __P((int, char *, int));
2786 static void popclass_above __P((int));
2787 static void write_classname __P((linebuffer *, char *qualifier));
2788
2789 static struct {
2790   char **cname;                 /* nested class names */
2791   int *bracelev;                /* nested class brace level */
2792   int nl;                       /* class nesting level (elements used) */
2793   int size;                     /* length of the array */
2794 } cstack;                       /* stack for nested declaration tags */
2795 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2796 #define nestlev         (cstack.nl)
2797 /* After struct keyword or in struct body, not inside a nested function. */
2798 #define instruct        (structdef == snone && nestlev > 0                      \
2799                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2800
2801 static void
2802 pushclass_above (bracelev, str, len)
2803      int bracelev;
2804      char *str;
2805      int len;
2806 {
2807   int nl;
2808
2809   popclass_above (bracelev);
2810   nl = cstack.nl;
2811   if (nl >= cstack.size)
2812     {
2813       int size = cstack.size *= 2;
2814       xrnew (cstack.cname, size, char *);
2815       xrnew (cstack.bracelev, size, int);
2816     }
2817   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2818   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2819   cstack.bracelev[nl] = bracelev;
2820   cstack.nl = nl + 1;
2821 }
2822
2823 static void
2824 popclass_above (bracelev)
2825      int bracelev;
2826 {
2827   int nl;
2828
2829   for (nl = cstack.nl - 1;
2830        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2831        nl--)
2832     {
2833       if (cstack.cname[nl] != NULL)
2834         free (cstack.cname[nl]);
2835       cstack.nl = nl;
2836     }
2837 }
2838
2839 static void
2840 write_classname (cn, qualifier)
2841      linebuffer *cn;
2842      char *qualifier;
2843 {
2844   int i, len;
2845   int qlen = strlen (qualifier);
2846
2847   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2848     {
2849       len = 0;
2850       cn->len = 0;
2851       cn->buffer[0] = '\0';
2852     }
2853   else
2854     {
2855       len = strlen (cstack.cname[0]);
2856       linebuffer_setlen (cn, len);
2857       strcpy (cn->buffer, cstack.cname[0]);
2858     }
2859   for (i = 1; i < cstack.nl; i++)
2860     {
2861       char *s;
2862       int slen;
2863
2864       s = cstack.cname[i];
2865       if (s == NULL)
2866         continue;
2867       slen = strlen (s);
2868       len += slen + qlen;
2869       linebuffer_setlen (cn, len);
2870       strncat (cn->buffer, qualifier, qlen);
2871       strncat (cn->buffer, s, slen);
2872     }
2873 }
2874
2875 \f
2876 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2877 static void make_C_tag __P((bool));
2878
2879 /*
2880  * consider_token ()
2881  *      checks to see if the current token is at the start of a
2882  *      function or variable, or corresponds to a typedef, or
2883  *      is a struct/union/enum tag, or #define, or an enum constant.
2884  *
2885  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2886  *      with args.  C_EXTP points to which language we are looking at.
2887  *
2888  * Globals
2889  *      fvdef                   IN OUT
2890  *      structdef               IN OUT
2891  *      definedef               IN OUT
2892  *      typdef                  IN OUT
2893  *      objdef                  IN OUT
2894  */
2895
2896 static bool
2897 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2898      register char *str;        /* IN: token pointer */
2899      register int len;          /* IN: token length */
2900      register int c;            /* IN: first char after the token */
2901      int *c_extp;               /* IN, OUT: C extensions mask */
2902      int bracelev;              /* IN: brace level */
2903      int parlev;                /* IN: parenthesis level */
2904      bool *is_func_or_var;      /* OUT: function or variable found */
2905 {
2906   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2907      structtype is the type of the preceding struct-like keyword, and
2908      structbracelev is the brace level where it has been seen. */
2909   static enum sym_type structtype;
2910   static int structbracelev;
2911   static enum sym_type toktype;
2912
2913
2914   toktype = C_symtype (str, len, *c_extp);
2915
2916   /*
2917    * Skip __attribute__
2918    */
2919   if (toktype == st_C_attribute)
2920     {
2921       inattribute = TRUE;
2922       return FALSE;
2923      }
2924
2925    /*
2926     * Advance the definedef state machine.
2927     */
2928    switch (definedef)
2929      {
2930      case dnone:
2931        /* We're not on a preprocessor line. */
2932        if (toktype == st_C_gnumacro)
2933          {
2934            fvdef = fdefunkey;
2935            return FALSE;
2936          }
2937        break;
2938      case dsharpseen:
2939        if (toktype == st_C_define)
2940          {
2941            definedef = ddefineseen;
2942          }
2943        else
2944          {
2945            definedef = dignorerest;
2946          }
2947        return FALSE;
2948      case ddefineseen:
2949        /*
2950         * Make a tag for any macro, unless it is a constant
2951         * and constantypedefs is FALSE.
2952         */
2953        definedef = dignorerest;
2954        *is_func_or_var = (c == '(');
2955        if (!*is_func_or_var && !constantypedefs)
2956          return FALSE;
2957        else
2958          return TRUE;
2959      case dignorerest:
2960        return FALSE;
2961      default:
2962        error ("internal error: definedef value.", (char *)NULL);
2963      }
2964
2965    /*
2966     * Now typedefs
2967     */
2968    switch (typdef)
2969      {
2970      case tnone:
2971        if (toktype == st_C_typedef)
2972          {
2973            if (typedefs)
2974              typdef = tkeyseen;
2975            fvextern = FALSE;
2976            fvdef = fvnone;
2977            return FALSE;
2978          }
2979        break;
2980      case tkeyseen:
2981        switch (toktype)
2982          {
2983          case st_none:
2984          case st_C_class:
2985          case st_C_struct:
2986          case st_C_enum:
2987            typdef = ttypeseen;
2988          }
2989        break;
2990      case ttypeseen:
2991        if (structdef == snone && fvdef == fvnone)
2992          {
2993            fvdef = fvnameseen;
2994            return TRUE;
2995          }
2996        break;
2997      case tend:
2998        switch (toktype)
2999          {
3000          case st_C_class:
3001          case st_C_struct:
3002          case st_C_enum:
3003            return FALSE;
3004          }
3005        return TRUE;
3006      }
3007
3008    /*
3009     * This structdef business is NOT invoked when we are ctags and the
3010     * file is plain C.  This is because a struct tag may have the same
3011     * name as another tag, and this loses with ctags.
3012     */
3013    switch (toktype)
3014      {
3015      case st_C_javastruct:
3016        if (structdef == stagseen)
3017          structdef = scolonseen;
3018        return FALSE;
3019      case st_C_template:
3020      case st_C_class:
3021        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
3022            && bracelev == 0
3023            && definedef == dnone && structdef == snone
3024            && typdef == tnone && fvdef == fvnone)
3025          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3026        if (toktype == st_C_template)
3027          break;
3028        /* FALLTHRU */
3029      case st_C_struct:
3030      case st_C_enum:
3031        if (parlev == 0
3032            && fvdef != vignore
3033            && (typdef == tkeyseen
3034                || (typedefs_or_cplusplus && structdef == snone)))
3035          {
3036            structdef = skeyseen;
3037            structtype = toktype;
3038            structbracelev = bracelev;
3039            if (fvdef == fvnameseen)
3040              fvdef = fvnone;
3041          }
3042        return FALSE;
3043      }
3044
3045    if (structdef == skeyseen)
3046      {
3047        structdef = stagseen;
3048        return TRUE;
3049      }
3050
3051    if (typdef != tnone)
3052      definedef = dnone;
3053
3054    /* Detect Objective C constructs. */
3055    switch (objdef)
3056      {
3057      case onone:
3058        switch (toktype)
3059          {
3060          case st_C_objprot:
3061            objdef = oprotocol;
3062            return FALSE;
3063          case st_C_objimpl:
3064            objdef = oimplementation;
3065            return FALSE;
3066          }
3067        break;
3068      case oimplementation:
3069        /* Save the class tag for functions or variables defined inside. */
3070        objtag = savenstr (str, len);
3071        objdef = oinbody;
3072        return FALSE;
3073      case oprotocol:
3074        /* Save the class tag for categories. */
3075        objtag = savenstr (str, len);
3076        objdef = otagseen;
3077        *is_func_or_var = TRUE;
3078        return TRUE;
3079      case oparenseen:
3080        objdef = ocatseen;
3081        *is_func_or_var = TRUE;
3082        return TRUE;
3083      case oinbody:
3084        break;
3085      case omethodsign:
3086        if (parlev == 0)
3087          {
3088            fvdef = fvnone;
3089            objdef = omethodtag;
3090            linebuffer_setlen (&token_name, len);
3091            strncpy (token_name.buffer, str, len);
3092            token_name.buffer[len] = '\0';
3093            return TRUE;
3094          }
3095        return FALSE;
3096      case omethodcolon:
3097        if (parlev == 0)
3098          objdef = omethodparm;
3099        return FALSE;
3100      case omethodparm:
3101        if (parlev == 0)
3102          {
3103            fvdef = fvnone;
3104            objdef = omethodtag;
3105            linebuffer_setlen (&token_name, token_name.len + len);
3106            strncat (token_name.buffer, str, len);
3107            return TRUE;
3108          }
3109        return FALSE;
3110      case oignore:
3111        if (toktype == st_C_objend)
3112          {
3113            /* Memory leakage here: the string pointed by objtag is
3114               never released, because many tests would be needed to
3115               avoid breaking on incorrect input code.  The amount of
3116               memory leaked here is the sum of the lengths of the
3117               class tags.
3118            free (objtag); */
3119            objdef = onone;
3120          }
3121        return FALSE;
3122      }
3123
3124    /* A function, variable or enum constant? */
3125    switch (toktype)
3126      {
3127      case st_C_extern:
3128        fvextern = TRUE;
3129        switch  (fvdef)
3130          {
3131          case finlist:
3132          case flistseen:
3133          case fignore:
3134          case vignore:
3135            break;
3136          default:
3137            fvdef = fvnone;
3138          }
3139        return FALSE;
3140      case st_C_ignore:
3141        fvextern = FALSE;
3142        fvdef = vignore;
3143        return FALSE;
3144      case st_C_operator:
3145        fvdef = foperator;
3146        *is_func_or_var = TRUE;
3147        return TRUE;
3148      case st_none:
3149        if (constantypedefs
3150            && structdef == snone
3151            && structtype == st_C_enum && bracelev > structbracelev)
3152          return TRUE;           /* enum constant */
3153        switch (fvdef)
3154          {
3155          case fdefunkey:
3156            if (bracelev > 0)
3157              break;
3158            fvdef = fdefunname;  /* GNU macro */
3159            *is_func_or_var = TRUE;
3160            return TRUE;
3161          case fvnone:
3162            switch (typdef)
3163              {
3164              case ttypeseen:
3165                return FALSE;
3166              case tnone:
3167                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3168                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3169                  {
3170                    fvdef = vignore;
3171                    return FALSE;
3172                  }
3173                break;
3174              }
3175           /* FALLTHRU */
3176           case fvnameseen:
3177           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3178             {
3179               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3180                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3181               fvdef = foperator;
3182               *is_func_or_var = TRUE;
3183               return TRUE;
3184             }
3185           if (bracelev > 0 && !instruct)
3186             break;
3187           fvdef = fvnameseen;   /* function or variable */
3188           *is_func_or_var = TRUE;
3189           return TRUE;
3190         }
3191       break;
3192     }
3193
3194   return FALSE;
3195 }
3196
3197 \f
3198 /*
3199  * C_entries often keeps pointers to tokens or lines which are older than
3200  * the line currently read.  By keeping two line buffers, and switching
3201  * them at end of line, it is possible to use those pointers.
3202  */
3203 static struct
3204 {
3205   long linepos;
3206   linebuffer lb;
3207 } lbs[2];
3208
3209 #define current_lb_is_new (newndx == curndx)
3210 #define switch_line_buffers() (curndx = 1 - curndx)
3211
3212 #define curlb (lbs[curndx].lb)
3213 #define newlb (lbs[newndx].lb)
3214 #define curlinepos (lbs[curndx].linepos)
3215 #define newlinepos (lbs[newndx].linepos)
3216
3217 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3218 #define cplpl (c_ext & C_PLPL)
3219 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3220
3221 #define CNL_SAVE_DEFINEDEF()                                            \
3222 do {                                                                    \
3223   curlinepos = charno;                                                  \
3224   readline (&curlb, inf);                                               \
3225   lp = curlb.buffer;                                                    \
3226   quotednl = FALSE;                                                     \
3227   newndx = curndx;                                                      \
3228 } while (0)
3229
3230 #define CNL()                                                           \
3231 do {                                                                    \
3232   CNL_SAVE_DEFINEDEF();                                                 \
3233   if (savetoken.valid)                                                  \
3234     {                                                                   \
3235       token = savetoken;                                                \
3236       savetoken.valid = FALSE;                                          \
3237     }                                                                   \
3238   definedef = dnone;                                                    \
3239 } while (0)
3240
3241
3242 static void
3243 make_C_tag (isfun)
3244      bool isfun;
3245 {
3246   /* This function is never called when token.valid is FALSE, but
3247      we must protect against invalid input or internal errors. */
3248   if (!DEBUG && !token.valid)
3249     return;
3250
3251   if (token.valid)
3252     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3253               token.offset+token.length+1, token.lineno, token.linepos);
3254   else                          /* this case is optimised away if !DEBUG */
3255     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3256               token_name.len + 17, isfun, token.line,
3257               token.offset+token.length+1, token.lineno, token.linepos);
3258
3259   token.valid = FALSE;
3260 }
3261
3262
3263 /*
3264  * C_entries ()
3265  *      This routine finds functions, variables, typedefs,
3266  *      #define's, enum constants and struct/union/enum definitions in
3267  *      C syntax and adds them to the list.
3268  */
3269 static void
3270 C_entries (c_ext, inf)
3271      int c_ext;                 /* extension of C */
3272      FILE *inf;                 /* input file */
3273 {
3274   register char c;              /* latest char read; '\0' for end of line */
3275   register char *lp;            /* pointer one beyond the character `c' */
3276   int curndx, newndx;           /* indices for current and new lb */
3277   register int tokoff;          /* offset in line of start of current token */
3278   register int toklen;          /* length of current token */
3279   char *qualifier;              /* string used to qualify names */
3280   int qlen;                     /* length of qualifier */
3281   int bracelev;                 /* current brace level */
3282   int bracketlev;               /* current bracket level */
3283   int parlev;                   /* current parenthesis level */
3284   int attrparlev;               /* __attribute__ parenthesis level */
3285   int templatelev;              /* current template level */
3286   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3287   bool incomm, inquote, inchar, quotednl, midtoken;
3288   bool yacc_rules;              /* in the rules part of a yacc file */
3289   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3290
3291
3292   linebuffer_init (&lbs[0].lb);
3293   linebuffer_init (&lbs[1].lb);
3294   if (cstack.size == 0)
3295     {
3296       cstack.size = (DEBUG) ? 1 : 4;
3297       cstack.nl = 0;
3298       cstack.cname = xnew (cstack.size, char *);
3299       cstack.bracelev = xnew (cstack.size, int);
3300     }
3301
3302   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3303   curndx = newndx = 0;
3304   lp = curlb.buffer;
3305   *lp = 0;
3306
3307   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3308   structdef = snone; definedef = dnone; objdef = onone;
3309   yacc_rules = FALSE;
3310   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3311   token.valid = savetoken.valid = FALSE;
3312   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3313   if (cjava)
3314     { qualifier = "."; qlen = 1; }
3315   else
3316     { qualifier = "::"; qlen = 2; }
3317
3318
3319   while (!feof (inf))
3320     {
3321       c = *lp++;
3322       if (c == '\\')
3323         {
3324           /* If we are at the end of the line, the next character is a
3325              '\0'; do not skip it, because it is what tells us
3326              to read the next line.  */
3327           if (*lp == '\0')
3328             {
3329               quotednl = TRUE;
3330               continue;
3331             }
3332           lp++;
3333           c = ' ';
3334         }
3335       else if (incomm)
3336         {
3337           switch (c)
3338             {
3339             case '*':
3340               if (*lp == '/')
3341                 {
3342                   c = *lp++;
3343                   incomm = FALSE;
3344                 }
3345               break;
3346             case '\0':
3347               /* Newlines inside comments do not end macro definitions in
3348                  traditional cpp. */
3349               CNL_SAVE_DEFINEDEF ();
3350               break;
3351             }
3352           continue;
3353         }
3354       else if (inquote)
3355         {
3356           switch (c)
3357             {
3358             case '"':
3359               inquote = FALSE;
3360               break;
3361             case '\0':
3362               /* Newlines inside strings do not end macro definitions
3363                  in traditional cpp, even though compilers don't
3364                  usually accept them. */
3365               CNL_SAVE_DEFINEDEF ();
3366               break;
3367             }
3368           continue;
3369         }
3370       else if (inchar)
3371         {
3372           switch (c)
3373             {
3374             case '\0':
3375               /* Hmmm, something went wrong. */
3376               CNL ();
3377               /* FALLTHRU */
3378             case '\'':
3379               inchar = FALSE;
3380               break;
3381             }
3382           continue;
3383         }
3384       else if (bracketlev > 0)
3385         {
3386           switch (c)
3387             {
3388             case ']':
3389               if (--bracketlev > 0)
3390                 continue;
3391               break;
3392             case '\0':
3393               CNL_SAVE_DEFINEDEF ();
3394               break;
3395             }
3396           continue;
3397         }
3398       else switch (c)
3399         {
3400         case '"':
3401           inquote = TRUE;
3402           if (inattribute)
3403             break;
3404           switch (fvdef)
3405             {
3406             case fdefunkey:
3407             case fstartlist:
3408             case finlist:
3409             case fignore:
3410             case vignore:
3411               break;
3412             default:
3413               fvextern = FALSE;
3414               fvdef = fvnone;
3415             }
3416           continue;
3417         case '\'':
3418           inchar = TRUE;
3419           if (inattribute)
3420             break;
3421           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3422             {
3423               fvextern = FALSE;
3424               fvdef = fvnone;
3425             }
3426           continue;
3427         case '/':
3428           if (*lp == '*')
3429             {
3430               incomm = TRUE;
3431               lp++;
3432               c = ' ';
3433             }
3434           else if (/* cplpl && */ *lp == '/')
3435             {
3436               c = '\0';
3437             }
3438           break;
3439         case '%':
3440           if ((c_ext & YACC) && *lp == '%')
3441             {
3442               /* Entering or exiting rules section in yacc file. */
3443               lp++;
3444               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3445               typdef = tnone; structdef = snone;
3446               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3447               bracelev = 0;
3448               yacc_rules = !yacc_rules;
3449               continue;
3450             }
3451           else
3452             break;
3453         case '#':
3454           if (definedef == dnone)
3455             {
3456               char *cp;
3457               bool cpptoken = TRUE;
3458
3459               /* Look back on this line.  If all blanks, or nonblanks
3460                  followed by an end of comment, this is a preprocessor
3461                  token. */
3462               for (cp = newlb.buffer; cp < lp-1; cp++)
3463                 if (!iswhite (*cp))
3464                   {
3465                     if (*cp == '*' && *(cp+1) == '/')
3466                       {
3467                         cp++;
3468                         cpptoken = TRUE;
3469                       }
3470                     else
3471                       cpptoken = FALSE;
3472                   }
3473               if (cpptoken)
3474                 definedef = dsharpseen;
3475             } /* if (definedef == dnone) */
3476           continue;
3477         case '[':
3478           bracketlev++;
3479             continue;
3480         } /* switch (c) */
3481
3482
3483       /* Consider token only if some involved conditions are satisfied. */
3484       if (typdef != tignore
3485           && definedef != dignorerest
3486           && fvdef != finlist
3487           && templatelev == 0
3488           && (definedef != dnone
3489               || structdef != scolonseen)
3490           && !inattribute)
3491         {
3492           if (midtoken)
3493             {
3494               if (endtoken (c))
3495                 {
3496                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3497                     /* This handles :: in the middle,
3498                        but not at the beginning of an identifier.
3499                        Also, space-separated :: is not recognised. */
3500                     {
3501                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3502                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3503                       lp += 2;
3504                       toklen += 2;
3505                       c = lp[-1];
3506                       goto still_in_token;
3507                     }
3508                   else
3509                     {
3510                       bool funorvar = FALSE;
3511
3512                       if (yacc_rules
3513                           || consider_token (newlb.buffer + tokoff, toklen, c,
3514                                              &c_ext, bracelev, parlev,
3515                                              &funorvar))
3516                         {
3517                           if (fvdef == foperator)
3518                             {
3519                               char *oldlp = lp;
3520                               lp = skip_spaces (lp-1);
3521                               if (*lp != '\0')
3522                                 lp += 1;
3523                               while (*lp != '\0'
3524                                      && !iswhite (*lp) && *lp != '(')
3525                                 lp += 1;
3526                               c = *lp++;
3527                               toklen += lp - oldlp;
3528                             }
3529                           token.named = FALSE;
3530                           if (!plainc
3531                               && nestlev > 0 && definedef == dnone)
3532                             /* in struct body */
3533                             {
3534                               write_classname (&token_name, qualifier);
3535                               linebuffer_setlen (&token_name,
3536                                                  token_name.len+qlen+toklen);
3537                               strcat (token_name.buffer, qualifier);
3538                               strncat (token_name.buffer,
3539                                        newlb.buffer + tokoff, toklen);
3540                               token.named = TRUE;
3541                             }
3542                           else if (objdef == ocatseen)
3543                             /* Objective C category */
3544                             {
3545                               int len = strlen (objtag) + 2 + toklen;
3546                               linebuffer_setlen (&token_name, len);
3547                               strcpy (token_name.buffer, objtag);
3548                               strcat (token_name.buffer, "(");
3549                               strncat (token_name.buffer,
3550                                        newlb.buffer + tokoff, toklen);
3551                               strcat (token_name.buffer, ")");
3552                               token.named = TRUE;
3553                             }
3554                           else if (objdef == omethodtag
3555                                    || objdef == omethodparm)
3556                             /* Objective C method */
3557                             {
3558                               token.named = TRUE;
3559                             }
3560                           else if (fvdef == fdefunname)
3561                             /* GNU DEFUN and similar macros */
3562                             {
3563                               bool defun = (newlb.buffer[tokoff] == 'F');
3564                               int off = tokoff;
3565                               int len = toklen;
3566
3567                               /* Rewrite the tag so that emacs lisp DEFUNs
3568                                  can be found by their elisp name */
3569                               if (defun)
3570                                 {
3571                                   off += 1;
3572                                   len -= 1;
3573                                 }
3574                               linebuffer_setlen (&token_name, len);
3575                               strncpy (token_name.buffer,
3576                                        newlb.buffer + off, len);
3577                               token_name.buffer[len] = '\0';
3578                               if (defun)
3579                                 while (--len >= 0)
3580                                   if (token_name.buffer[len] == '_')
3581                                     token_name.buffer[len] = '-';
3582                               token.named = defun;
3583                             }
3584                           else
3585                             {
3586                               linebuffer_setlen (&token_name, toklen);
3587                               strncpy (token_name.buffer,
3588                                        newlb.buffer + tokoff, toklen);
3589                               token_name.buffer[toklen] = '\0';
3590                               /* Name macros and members. */
3591                               token.named = (structdef == stagseen
3592                                              || typdef == ttypeseen
3593                                              || typdef == tend
3594                                              || (funorvar
3595                                                  && definedef == dignorerest)
3596                                              || (funorvar
3597                                                  && definedef == dnone
3598                                                  && structdef == snone
3599                                                  && bracelev > 0));
3600                             }
3601                           token.lineno = lineno;
3602                           token.offset = tokoff;
3603                           token.length = toklen;
3604                           token.line = newlb.buffer;
3605                           token.linepos = newlinepos;
3606                           token.valid = TRUE;
3607
3608                           if (definedef == dnone
3609                               && (fvdef == fvnameseen
3610                                   || fvdef == foperator
3611                                   || structdef == stagseen
3612                                   || typdef == tend
3613                                   || typdef == ttypeseen
3614                                   || objdef != onone))
3615                             {
3616                               if (current_lb_is_new)
3617                                 switch_line_buffers ();
3618                             }
3619                           else if (definedef != dnone
3620                                    || fvdef == fdefunname
3621                                    || instruct)
3622                             make_C_tag (funorvar);
3623                         }
3624                       else /* not yacc and consider_token failed */
3625                         {
3626                           if (inattribute && fvdef == fignore)
3627                             {
3628                               /* We have just met __attribute__ after a
3629                                  function parameter list: do not tag the
3630                                  function again. */
3631                               fvdef = fvnone;
3632                             }
3633                         }
3634                       midtoken = FALSE;
3635                     }
3636                 } /* if (endtoken (c)) */
3637               else if (intoken (c))
3638                 still_in_token:
3639                 {
3640                   toklen++;
3641                   continue;
3642                 }
3643             } /* if (midtoken) */
3644           else if (begtoken (c))
3645             {
3646               switch (definedef)
3647                 {
3648                 case dnone:
3649                   switch (fvdef)
3650                     {
3651                     case fstartlist:
3652                       /* This prevents tagging fb in
3653                          void (__attribute__((noreturn)) *fb) (void);
3654                          Fixing this is not easy and not very important. */
3655                       fvdef = finlist;
3656                       continue;
3657                     case flistseen:
3658                       if (plainc || declarations)
3659                         {
3660                           make_C_tag (TRUE); /* a function */
3661                           fvdef = fignore;
3662                         }
3663                       break;
3664                     }
3665                   if (structdef == stagseen && !cjava)
3666                     {
3667                       popclass_above (bracelev);
3668                       structdef = snone;
3669                     }
3670                   break;
3671                 case dsharpseen:
3672                   savetoken = token;
3673                   break;
3674                 }
3675               if (!yacc_rules || lp == newlb.buffer + 1)
3676                 {
3677                   tokoff = lp - 1 - newlb.buffer;
3678                   toklen = 1;
3679                   midtoken = TRUE;
3680                 }
3681               continue;
3682             } /* if (begtoken) */
3683         } /* if must look at token */
3684
3685
3686       /* Detect end of line, colon, comma, semicolon and various braces
3687          after having handled a token.*/
3688       switch (c)
3689         {
3690         case ':':
3691           if (inattribute)
3692             break;
3693           if (yacc_rules && token.offset == 0 && token.valid)
3694             {
3695               make_C_tag (FALSE); /* a yacc function */
3696               break;
3697             }
3698           if (definedef != dnone)
3699             break;
3700           switch (objdef)
3701             {
3702             case  otagseen:
3703               objdef = oignore;
3704               make_C_tag (TRUE); /* an Objective C class */
3705               break;
3706             case omethodtag:
3707             case omethodparm:
3708               objdef = omethodcolon;
3709               linebuffer_setlen (&token_name, token_name.len + 1);
3710               strcat (token_name.buffer, ":");
3711               break;
3712             }
3713           if (structdef == stagseen)
3714             {
3715               structdef = scolonseen;
3716               break;
3717             }
3718           /* Should be useless, but may be work as a safety net. */
3719           if (cplpl && fvdef == flistseen)
3720             {
3721               make_C_tag (TRUE); /* a function */
3722               fvdef = fignore;
3723               break;
3724             }
3725           break;
3726         case ';':
3727           if (definedef != dnone || inattribute)
3728             break;
3729           switch (typdef)
3730             {
3731             case tend:
3732             case ttypeseen:
3733               make_C_tag (FALSE); /* a typedef */
3734               typdef = tnone;
3735               fvdef = fvnone;
3736               break;
3737             case tnone:
3738             case tinbody:
3739             case tignore:
3740               switch (fvdef)
3741                 {
3742                 case fignore:
3743                   if (typdef == tignore || cplpl)
3744                     fvdef = fvnone;
3745                   break;
3746                 case fvnameseen:
3747                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3748                       || (members && instruct))
3749                     make_C_tag (FALSE); /* a variable */
3750                   fvextern = FALSE;
3751                   fvdef = fvnone;
3752                   token.valid = FALSE;
3753                   break;
3754                 case flistseen:
3755                   if ((declarations
3756                        && (cplpl || !instruct)
3757                        && (typdef == tnone || (typdef != tignore && instruct)))
3758                       || (members
3759                           && plainc && instruct))
3760                     make_C_tag (TRUE);  /* a function */
3761                   /* FALLTHRU */
3762                 default:
3763                   fvextern = FALSE;
3764                   fvdef = fvnone;
3765                   if (declarations
3766                        && cplpl && structdef == stagseen)
3767                     make_C_tag (FALSE); /* forward declaration */
3768                   else
3769                     token.valid = FALSE;
3770                 } /* switch (fvdef) */
3771               /* FALLTHRU */
3772             default:
3773               if (!instruct)
3774                 typdef = tnone;
3775             }
3776           if (structdef == stagseen)
3777             structdef = snone;
3778           break;
3779         case ',':
3780           if (definedef != dnone || inattribute)
3781             break;
3782           switch (objdef)
3783             {
3784             case omethodtag:
3785             case omethodparm:
3786               make_C_tag (TRUE); /* an Objective C method */
3787               objdef = oinbody;
3788               break;
3789             }
3790           switch (fvdef)
3791             {
3792             case fdefunkey:
3793             case foperator:
3794             case fstartlist:
3795             case finlist:
3796             case fignore:
3797             case vignore:
3798               break;
3799             case fdefunname:
3800               fvdef = fignore;
3801               break;
3802             case fvnameseen:
3803               if (parlev == 0
3804                   && ((globals
3805                        && bracelev == 0
3806                        && templatelev == 0
3807                        && (!fvextern || declarations))
3808                       || (members && instruct)))
3809                   make_C_tag (FALSE); /* a variable */
3810               break;
3811             case flistseen:
3812               if ((declarations && typdef == tnone && !instruct)
3813                   || (members && typdef != tignore && instruct))
3814                 {
3815                   make_C_tag (TRUE); /* a function */
3816                   fvdef = fvnameseen;
3817                 }
3818               else if (!declarations)
3819                 fvdef = fvnone;
3820               token.valid = FALSE;
3821               break;
3822             default:
3823               fvdef = fvnone;
3824             }
3825           if (structdef == stagseen)
3826             structdef = snone;
3827           break;
3828         case ']':
3829           if (definedef != dnone || inattribute)
3830             break;
3831           if (structdef == stagseen)
3832             structdef = snone;
3833           switch (typdef)
3834             {
3835             case ttypeseen:
3836             case tend:
3837               typdef = tignore;
3838               make_C_tag (FALSE);       /* a typedef */
3839               break;
3840             case tnone:
3841             case tinbody:
3842               switch (fvdef)
3843                 {
3844                 case foperator:
3845                 case finlist:
3846                 case fignore:
3847                 case vignore:
3848                   break;
3849                 case fvnameseen:
3850                   if ((members && bracelev == 1)
3851                       || (globals && bracelev == 0
3852                           && (!fvextern || declarations)))
3853                     make_C_tag (FALSE); /* a variable */
3854                   /* FALLTHRU */
3855                 default:
3856                   fvdef = fvnone;
3857                 }
3858               break;
3859             }
3860           break;
3861         case '(':
3862           if (inattribute)
3863             {
3864               attrparlev++;
3865               break;
3866             }
3867           if (definedef != dnone)
3868             break;
3869           if (objdef == otagseen && parlev == 0)
3870             objdef = oparenseen;
3871           switch (fvdef)
3872             {
3873             case fvnameseen:
3874               if (typdef == ttypeseen
3875                   && *lp != '*'
3876                   && !instruct)
3877                 {
3878                   /* This handles constructs like:
3879                      typedef void OperatorFun (int fun); */
3880                   make_C_tag (FALSE);
3881                   typdef = tignore;
3882                   fvdef = fignore;
3883                   break;
3884                 }
3885               /* FALLTHRU */
3886             case foperator:
3887               fvdef = fstartlist;
3888               break;
3889             case flistseen:
3890               fvdef = finlist;
3891               break;
3892             }
3893           parlev++;
3894           break;
3895         case ')':
3896           if (inattribute)
3897             {
3898               if (--attrparlev == 0)
3899                 inattribute = FALSE;
3900               break;
3901             }
3902           if (definedef != dnone)
3903             break;
3904           if (objdef == ocatseen && parlev == 1)
3905             {
3906               make_C_tag (TRUE); /* an Objective C category */
3907               objdef = oignore;
3908             }
3909           if (--parlev == 0)
3910             {
3911               switch (fvdef)
3912                 {
3913                 case fstartlist:
3914                 case finlist:
3915                   fvdef = flistseen;
3916                   break;
3917                 }
3918               if (!instruct
3919                   && (typdef == tend
3920                       || typdef == ttypeseen))
3921                 {
3922                   typdef = tignore;
3923                   make_C_tag (FALSE); /* a typedef */
3924                 }
3925             }
3926           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3927             parlev = 0;
3928           break;
3929         case '{':
3930           if (definedef != dnone)
3931             break;
3932           if (typdef == ttypeseen)
3933             {
3934               /* Whenever typdef is set to tinbody (currently only
3935                  here), typdefbracelev should be set to bracelev. */
3936               typdef = tinbody;
3937               typdefbracelev = bracelev;
3938             }
3939           switch (fvdef)
3940             {
3941             case flistseen:
3942               make_C_tag (TRUE);    /* a function */
3943               /* FALLTHRU */
3944             case fignore:
3945               fvdef = fvnone;
3946               break;
3947             case fvnone:
3948               switch (objdef)
3949                 {
3950                 case otagseen:
3951                   make_C_tag (TRUE); /* an Objective C class */
3952                   objdef = oignore;
3953                   break;
3954                 case omethodtag:
3955                 case omethodparm:
3956                   make_C_tag (TRUE); /* an Objective C method */
3957                   objdef = oinbody;
3958                   break;
3959                 default:
3960                   /* Neutralize `extern "C" {' grot. */
3961                   if (bracelev == 0 && structdef == snone && nestlev == 0
3962                       && typdef == tnone)
3963                     bracelev = -1;
3964                 }
3965               break;
3966             }
3967           switch (structdef)
3968             {
3969             case skeyseen:         /* unnamed struct */
3970               pushclass_above (bracelev, NULL, 0);
3971               structdef = snone;
3972               break;
3973             case stagseen:         /* named struct or enum */
3974             case scolonseen:       /* a class */
3975               pushclass_above (bracelev,token.line+token.offset, token.length);
3976               structdef = snone;
3977               make_C_tag (FALSE);  /* a struct or enum */
3978               break;
3979             }
3980           bracelev++;
3981           break;
3982         case '*':
3983           if (definedef != dnone)
3984             break;
3985           if (fvdef == fstartlist)
3986             {
3987               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3988               token.valid = FALSE;
3989             }
3990           break;
3991         case '}':
3992           if (definedef != dnone)
3993             break;
3994           if (!ignoreindent && lp == newlb.buffer + 1)
3995             {
3996               if (bracelev != 0)
3997                 token.valid = FALSE;
3998               bracelev = 0;     /* reset brace level if first column */
3999               parlev = 0;       /* also reset paren level, just in case... */
4000             }
4001           else
4002             {
4003               if (--bracelev < 0)
4004                 {
4005                   bracelev = 0;
4006                   token.valid = FALSE; /* something gone amiss, token unreliable */
4007                 }
4008               if (bracelev == 0 && fvdef == vignore)
4009                 fvdef = fvnone;         /* end of function */
4010             }
4011           popclass_above (bracelev);
4012           structdef = snone;
4013           /* Only if typdef == tinbody is typdefbracelev significant. */
4014           if (typdef == tinbody && bracelev <= typdefbracelev)
4015             {
4016               assert (bracelev == typdefbracelev);
4017               typdef = tend;
4018             }
4019           break;
4020         case '=':
4021           if (definedef != dnone)
4022             break;
4023           switch (fvdef)
4024             {
4025             case foperator:
4026             case finlist:
4027             case fignore:
4028             case vignore:
4029               break;
4030             case fvnameseen:
4031               if ((members && bracelev == 1)
4032                   || (globals && bracelev == 0 && (!fvextern || declarations)))
4033                 make_C_tag (FALSE); /* a variable */
4034               /* FALLTHRU */
4035             default:
4036               fvdef = vignore;
4037             }
4038           break;
4039         case '<':
4040           if (cplpl
4041               && (structdef == stagseen || fvdef == fvnameseen))
4042             {
4043               templatelev++;
4044               break;
4045             }
4046           goto resetfvdef;
4047         case '>':
4048           if (templatelev > 0)
4049             {
4050               templatelev--;
4051               break;
4052             }
4053           goto resetfvdef;
4054         case '+':
4055         case '-':
4056           if (objdef == oinbody && bracelev == 0)
4057             {
4058               objdef = omethodsign;
4059               break;
4060             }
4061           /* FALLTHRU */
4062         resetfvdef:
4063         case '#': case '~': case '&': case '%': case '/':
4064         case '|': case '^': case '!': case '.': case '?':
4065           if (definedef != dnone)
4066             break;
4067           /* These surely cannot follow a function tag in C. */
4068           switch (fvdef)
4069             {
4070             case foperator:
4071             case finlist:
4072             case fignore:
4073             case vignore:
4074               break;
4075             default:
4076               fvdef = fvnone;
4077             }
4078           break;
4079         case '\0':
4080           if (objdef == otagseen)
4081             {
4082               make_C_tag (TRUE); /* an Objective C class */
4083               objdef = oignore;
4084             }
4085           /* If a macro spans multiple lines don't reset its state. */
4086           if (quotednl)
4087             CNL_SAVE_DEFINEDEF ();
4088           else
4089             CNL ();
4090           break;
4091         } /* switch (c) */
4092
4093     } /* while not eof */
4094
4095   free (lbs[0].lb.buffer);
4096   free (lbs[1].lb.buffer);
4097 }
4098
4099 /*
4100  * Process either a C++ file or a C file depending on the setting
4101  * of a global flag.
4102  */
4103 static void
4104 default_C_entries (inf)
4105      FILE *inf;
4106 {
4107   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4108 }
4109
4110 /* Always do plain C. */
4111 static void
4112 plain_C_entries (inf)
4113      FILE *inf;
4114 {
4115   C_entries (0, inf);
4116 }
4117
4118 /* Always do C++. */
4119 static void
4120 Cplusplus_entries (inf)
4121      FILE *inf;
4122 {
4123   C_entries (C_PLPL, inf);
4124 }
4125
4126 /* Always do Java. */
4127 static void
4128 Cjava_entries (inf)
4129      FILE *inf;
4130 {
4131   C_entries (C_JAVA, inf);
4132 }
4133
4134 /* Always do C*. */
4135 static void
4136 Cstar_entries (inf)
4137      FILE *inf;
4138 {
4139   C_entries (C_STAR, inf);
4140 }
4141
4142 /* Always do Yacc. */
4143 static void
4144 Yacc_entries (inf)
4145      FILE *inf;
4146 {
4147   C_entries (YACC, inf);
4148 }
4149
4150 \f
4151 /* Useful macros. */
4152 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4153   for (;                        /* loop initialization */               \
4154        !feof (file_pointer)     /* loop test */                         \
4155        &&                       /* instructions at start of loop */     \
4156           (readline (&line_buffer, file_pointer),                       \
4157            char_pointer = line_buffer.buffer,                           \
4158            TRUE);                                                       \
4159       )
4160
4161 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4162   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4163    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4164    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4165    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4166
4167 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4168 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4169   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4170    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4171    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4172
4173 /*
4174  * Read a file, but do no processing.  This is used to do regexp
4175  * matching on files that have no language defined.
4176  */
4177 static void
4178 just_read_file (inf)
4179      FILE *inf;
4180 {
4181   register char *dummy;
4182
4183   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4184     continue;
4185 }
4186
4187 \f
4188 /* Fortran parsing */
4189
4190 static void F_takeprec __P((void));
4191 static void F_getit __P((FILE *));
4192
4193 static void
4194 F_takeprec ()
4195 {
4196   dbp = skip_spaces (dbp);
4197   if (*dbp != '*')
4198     return;
4199   dbp++;
4200   dbp = skip_spaces (dbp);
4201   if (strneq (dbp, "(*)", 3))
4202     {
4203       dbp += 3;
4204       return;
4205     }
4206   if (!ISDIGIT (*dbp))
4207     {
4208       --dbp;                    /* force failure */
4209       return;
4210     }
4211   do
4212     dbp++;
4213   while (ISDIGIT (*dbp));
4214 }
4215
4216 static void
4217 F_getit (inf)
4218      FILE *inf;
4219 {
4220   register char *cp;
4221
4222   dbp = skip_spaces (dbp);
4223   if (*dbp == '\0')
4224     {
4225       readline (&lb, inf);
4226       dbp = lb.buffer;
4227       if (dbp[5] != '&')
4228         return;
4229       dbp += 6;
4230       dbp = skip_spaces (dbp);
4231     }
4232   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4233     return;
4234   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4235     continue;
4236   make_tag (dbp, cp-dbp, TRUE,
4237             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4238 }
4239
4240
4241 static void
4242 Fortran_functions (inf)
4243      FILE *inf;
4244 {
4245   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4246     {
4247       if (*dbp == '%')
4248         dbp++;                  /* Ratfor escape to fortran */
4249       dbp = skip_spaces (dbp);
4250       if (*dbp == '\0')
4251         continue;
4252       switch (lowcase (*dbp))
4253         {
4254         case 'i':
4255           if (nocase_tail ("integer"))
4256             F_takeprec ();
4257           break;
4258         case 'r':
4259           if (nocase_tail ("real"))
4260             F_takeprec ();
4261           break;
4262         case 'l':
4263           if (nocase_tail ("logical"))
4264             F_takeprec ();
4265           break;
4266         case 'c':
4267           if (nocase_tail ("complex") || nocase_tail ("character"))
4268             F_takeprec ();
4269           break;
4270         case 'd':
4271           if (nocase_tail ("double"))
4272             {
4273               dbp = skip_spaces (dbp);
4274               if (*dbp == '\0')
4275                 continue;
4276               if (nocase_tail ("precision"))
4277                 break;
4278               continue;
4279             }
4280           break;
4281         }
4282       dbp = skip_spaces (dbp);
4283       if (*dbp == '\0')
4284         continue;
4285       switch (lowcase (*dbp))
4286         {
4287         case 'f':
4288           if (nocase_tail ("function"))
4289             F_getit (inf);
4290           continue;
4291         case 's':
4292           if (nocase_tail ("subroutine"))
4293             F_getit (inf);
4294           continue;
4295         case 'e':
4296           if (nocase_tail ("entry"))
4297             F_getit (inf);
4298           continue;
4299         case 'b':
4300           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4301             {
4302               dbp = skip_spaces (dbp);
4303               if (*dbp == '\0') /* assume un-named */
4304                 make_tag ("blockdata", 9, TRUE,
4305                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4306               else
4307                 F_getit (inf);  /* look for name */
4308             }
4309           continue;
4310         }
4311     }
4312 }
4313
4314 \f
4315 /*
4316  * Ada parsing
4317  * Original code by
4318  * Philippe Waroquiers (1998)
4319  */
4320
4321 static void Ada_getit __P((FILE *, char *));
4322
4323 /* Once we are positioned after an "interesting" keyword, let's get
4324    the real tag value necessary. */
4325 static void
4326 Ada_getit (inf, name_qualifier)
4327      FILE *inf;
4328      char *name_qualifier;
4329 {
4330   register char *cp;
4331   char *name;
4332   char c;
4333
4334   while (!feof (inf))
4335     {
4336       dbp = skip_spaces (dbp);
4337       if (*dbp == '\0'
4338           || (dbp[0] == '-' && dbp[1] == '-'))
4339         {
4340           readline (&lb, inf);
4341           dbp = lb.buffer;
4342         }
4343       switch (lowcase(*dbp))
4344         {
4345         case 'b':
4346           if (nocase_tail ("body"))
4347             {
4348               /* Skipping body of   procedure body   or   package body or ....
4349                  resetting qualifier to body instead of spec. */
4350               name_qualifier = "/b";
4351               continue;
4352             }
4353           break;
4354         case 't':
4355           /* Skipping type of   task type   or   protected type ... */
4356           if (nocase_tail ("type"))
4357             continue;
4358           break;
4359         }
4360       if (*dbp == '"')
4361         {
4362           dbp += 1;
4363           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4364             continue;
4365         }
4366       else
4367         {
4368           dbp = skip_spaces (dbp);
4369           for (cp = dbp;
4370                (*cp != '\0'
4371                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4372                cp++)
4373             continue;
4374           if (cp == dbp)
4375             return;
4376         }
4377       c = *cp;
4378       *cp = '\0';
4379       name = concat (dbp, name_qualifier, "");
4380       *cp = c;
4381       make_tag (name, strlen (name), TRUE,
4382                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4383       free (name);
4384       if (c == '"')
4385         dbp = cp + 1;
4386       return;
4387     }
4388 }
4389
4390 static void
4391 Ada_funcs (inf)
4392      FILE *inf;
4393 {
4394   bool inquote = FALSE;
4395   bool skip_till_semicolumn = FALSE;
4396
4397   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4398     {
4399       while (*dbp != '\0')
4400         {
4401           /* Skip a string i.e. "abcd". */
4402           if (inquote || (*dbp == '"'))
4403             {
4404               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4405               if (dbp != NULL)
4406                 {
4407                   inquote = FALSE;
4408                   dbp += 1;
4409                   continue;     /* advance char */
4410                 }
4411               else
4412                 {
4413                   inquote = TRUE;
4414                   break;        /* advance line */
4415                 }
4416             }
4417
4418           /* Skip comments. */
4419           if (dbp[0] == '-' && dbp[1] == '-')
4420             break;              /* advance line */
4421
4422           /* Skip character enclosed in single quote i.e. 'a'
4423              and skip single quote starting an attribute i.e. 'Image. */
4424           if (*dbp == '\'')
4425             {
4426               dbp++ ;
4427               if (*dbp != '\0')
4428                 dbp++;
4429               continue;
4430             }
4431
4432           if (skip_till_semicolumn)
4433             {
4434               if (*dbp == ';')
4435                 skip_till_semicolumn = FALSE;
4436               dbp++;
4437               continue;         /* advance char */
4438             }
4439
4440           /* Search for beginning of a token.  */
4441           if (!begtoken (*dbp))
4442             {
4443               dbp++;
4444               continue;         /* advance char */
4445             }
4446
4447           /* We are at the beginning of a token. */
4448           switch (lowcase(*dbp))
4449             {
4450             case 'f':
4451               if (!packages_only && nocase_tail ("function"))
4452                 Ada_getit (inf, "/f");
4453               else
4454                 break;          /* from switch */
4455               continue;         /* advance char */
4456             case 'p':
4457               if (!packages_only && nocase_tail ("procedure"))
4458                 Ada_getit (inf, "/p");
4459               else if (nocase_tail ("package"))
4460                 Ada_getit (inf, "/s");
4461               else if (nocase_tail ("protected")) /* protected type */
4462                 Ada_getit (inf, "/t");
4463               else
4464                 break;          /* from switch */
4465               continue;         /* advance char */
4466
4467             case 'u':
4468               if (typedefs && !packages_only && nocase_tail ("use"))
4469                 {
4470                   /* when tagging types, avoid tagging  use type Pack.Typename;
4471                      for this, we will skip everything till a ; */
4472                   skip_till_semicolumn = TRUE;
4473                   continue;     /* advance char */
4474                 }
4475
4476             case 't':
4477               if (!packages_only && nocase_tail ("task"))
4478                 Ada_getit (inf, "/k");
4479               else if (typedefs && !packages_only && nocase_tail ("type"))
4480                 {
4481                   Ada_getit (inf, "/t");
4482                   while (*dbp != '\0')
4483                     dbp += 1;
4484                 }
4485               else
4486                 break;          /* from switch */
4487               continue;         /* advance char */
4488             }
4489
4490           /* Look for the end of the token. */
4491           while (!endtoken (*dbp))
4492             dbp++;
4493
4494         } /* advance char */
4495     } /* advance line */
4496 }
4497
4498 \f
4499 /*
4500  * Unix and microcontroller assembly tag handling
4501  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4502  * Idea by Bob Weiner, Motorola Inc. (1994)
4503  */
4504 static void
4505 Asm_labels (inf)
4506      FILE *inf;
4507 {
4508   register char *cp;
4509
4510   LOOP_ON_INPUT_LINES (inf, lb, cp)
4511     {
4512       /* If first char is alphabetic or one of [_.$], test for colon
4513          following identifier. */
4514       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4515         {
4516           /* Read past label. */
4517           cp++;
4518           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4519             cp++;
4520           if (*cp == ':' || iswhite (*cp))
4521             /* Found end of label, so copy it and add it to the table. */
4522             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4523                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4524         }
4525     }
4526 }
4527
4528 \f
4529 /*
4530  * Perl support
4531  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4532  * Perl variable names: /^(my|local).../
4533  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4534  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4535  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4536  */
4537 static void
4538 Perl_functions (inf)
4539      FILE *inf;
4540 {
4541   char *package = savestr ("main"); /* current package name */
4542   register char *cp;
4543
4544   LOOP_ON_INPUT_LINES (inf, lb, cp)
4545     {
4546       skip_spaces(cp);
4547
4548       if (LOOKING_AT (cp, "package"))
4549         {
4550           free (package);
4551           get_tag (cp, &package);
4552         }
4553       else if (LOOKING_AT (cp, "sub"))
4554         {
4555           char *pos;
4556           char *sp = cp;
4557
4558           while (!notinname (*cp))
4559             cp++;
4560           if (cp == sp)
4561             continue;           /* nothing found */
4562           if ((pos = etags_strchr (sp, ':')) != NULL
4563               && pos < cp && pos[1] == ':')
4564             /* The name is already qualified. */
4565             make_tag (sp, cp - sp, TRUE,
4566                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4567           else
4568             /* Qualify it. */
4569             {
4570               char savechar, *name;
4571
4572               savechar = *cp;
4573               *cp = '\0';
4574               name = concat (package, "::", sp);
4575               *cp = savechar;
4576               make_tag (name, strlen(name), TRUE,
4577                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4578               free (name);
4579             }
4580         }
4581        else if (globals)        /* only if we are tagging global vars */
4582         {
4583           /* Skip a qualifier, if any. */
4584           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4585           /* After "my" or "local", but before any following paren or space. */
4586           char *varstart = cp;
4587
4588           if (qual              /* should this be removed?  If yes, how? */
4589               && (*cp == '$' || *cp == '@' || *cp == '%'))
4590             {
4591               varstart += 1;
4592               do
4593                 cp++;
4594               while (ISALNUM (*cp) || *cp == '_');
4595             }
4596           else if (qual)
4597             {
4598               /* Should be examining a variable list at this point;
4599                  could insist on seeing an open parenthesis. */
4600               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4601                 cp++;
4602             }
4603           else
4604             continue;
4605
4606           make_tag (varstart, cp - varstart, FALSE,
4607                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4608         }
4609     }
4610   free (package);
4611 }
4612
4613
4614 /*
4615  * Python support
4616  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4617  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4618  * More ideas by seb bacon <seb@jamkit.com> (2002)
4619  */
4620 static void
4621 Python_functions (inf)
4622      FILE *inf;
4623 {
4624   register char *cp;
4625
4626   LOOP_ON_INPUT_LINES (inf, lb, cp)
4627     {
4628       cp = skip_spaces (cp);
4629       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4630         {
4631           char *name = cp;
4632           while (!notinname (*cp) && *cp != ':')
4633             cp++;
4634           make_tag (name, cp - name, TRUE,
4635                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4636         }
4637     }
4638 }
4639
4640 \f
4641 /*
4642  * PHP support
4643  * Look for:
4644  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4645  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4646  *  - /^[ \t]*define\(\"[^\"]+/
4647  * Only with --members:
4648  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4649  * Idea by Diez B. Roggisch (2001)
4650  */
4651 static void
4652 PHP_functions (inf)
4653      FILE *inf;
4654 {
4655   register char *cp, *name;
4656   bool search_identifier = FALSE;
4657
4658   LOOP_ON_INPUT_LINES (inf, lb, cp)
4659     {
4660       cp = skip_spaces (cp);
4661       name = cp;
4662       if (search_identifier
4663           && *cp != '\0')
4664         {
4665           while (!notinname (*cp))
4666             cp++;
4667           make_tag (name, cp - name, TRUE,
4668                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4669           search_identifier = FALSE;
4670         }
4671       else if (LOOKING_AT (cp, "function"))
4672         {
4673           if(*cp == '&')
4674             cp = skip_spaces (cp+1);
4675           if(*cp != '\0')
4676             {
4677               name = cp;
4678               while (!notinname (*cp))
4679                 cp++;
4680               make_tag (name, cp - name, TRUE,
4681                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4682             }
4683           else
4684             search_identifier = TRUE;
4685         }
4686       else if (LOOKING_AT (cp, "class"))
4687         {
4688           if (*cp != '\0')
4689             {
4690               name = cp;
4691               while (*cp != '\0' && !iswhite (*cp))
4692                 cp++;
4693               make_tag (name, cp - name, FALSE,
4694                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4695             }
4696           else
4697             search_identifier = TRUE;
4698         }
4699       else if (strneq (cp, "define", 6)
4700                && (cp = skip_spaces (cp+6))
4701                && *cp++ == '('
4702                && (*cp == '"' || *cp == '\''))
4703         {
4704           char quote = *cp++;
4705           name = cp;
4706           while (*cp != quote && *cp != '\0')
4707             cp++;
4708           make_tag (name, cp - name, FALSE,
4709                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4710         }
4711       else if (members
4712                && LOOKING_AT (cp, "var")
4713                && *cp == '$')
4714         {
4715           name = cp;
4716           while (!notinname(*cp))
4717             cp++;
4718           make_tag (name, cp - name, FALSE,
4719                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4720         }
4721     }
4722 }
4723
4724 \f
4725 /*
4726  * Cobol tag functions
4727  * We could look for anything that could be a paragraph name.
4728  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4729  * Idea by Corny de Souza (1993)
4730  */
4731 static void
4732 Cobol_paragraphs (inf)
4733      FILE *inf;
4734 {
4735   register char *bp, *ep;
4736
4737   LOOP_ON_INPUT_LINES (inf, lb, bp)
4738     {
4739       if (lb.len < 9)
4740         continue;
4741       bp += 8;
4742
4743       /* If eoln, compiler option or comment ignore whole line. */
4744       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4745         continue;
4746
4747       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4748         continue;
4749       if (*ep++ == '.')
4750         make_tag (bp, ep - bp, TRUE,
4751                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4752     }
4753 }
4754
4755 \f
4756 /*
4757  * Makefile support
4758  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4759  */
4760 static void
4761 Makefile_targets (inf)
4762      FILE *inf;
4763 {
4764   register char *bp;
4765
4766   LOOP_ON_INPUT_LINES (inf, lb, bp)
4767     {
4768       if (*bp == '\t' || *bp == '#')
4769         continue;
4770       while (*bp != '\0' && *bp != '=' && *bp != ':')
4771         bp++;
4772       if (*bp == ':' || (globals && *bp == '='))
4773         {
4774           /* We should detect if there is more than one tag, but we do not.
4775              We just skip initial and final spaces. */
4776           char * namestart = skip_spaces (lb.buffer);
4777           while (--bp > namestart)
4778             if (!notinname (*bp))
4779               break;
4780           make_tag (namestart, bp - namestart + 1, TRUE,
4781                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4782         }
4783     }
4784 }
4785
4786 \f
4787 /*
4788  * Pascal parsing
4789  * Original code by Mosur K. Mohan (1989)
4790  *
4791  *  Locates tags for procedures & functions.  Doesn't do any type- or
4792  *  var-definitions.  It does look for the keyword "extern" or
4793  *  "forward" immediately following the procedure statement; if found,
4794  *  the tag is skipped.
4795  */
4796 static void
4797 Pascal_functions (inf)
4798      FILE *inf;
4799 {
4800   linebuffer tline;             /* mostly copied from C_entries */
4801   long save_lcno;
4802   int save_lineno, namelen, taglen;
4803   char c, *name;
4804
4805   bool                          /* each of these flags is TRUE iff: */
4806     incomment,                  /* point is inside a comment */
4807     inquote,                    /* point is inside '..' string */
4808     get_tagname,                /* point is after PROCEDURE/FUNCTION
4809                                    keyword, so next item = potential tag */
4810     found_tag,                  /* point is after a potential tag */
4811     inparms,                    /* point is within parameter-list */
4812     verify_tag;                 /* point has passed the parm-list, so the
4813                                    next token will determine whether this
4814                                    is a FORWARD/EXTERN to be ignored, or
4815                                    whether it is a real tag */
4816
4817   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4818   name = NULL;                  /* keep compiler quiet */
4819   dbp = lb.buffer;
4820   *dbp = '\0';
4821   linebuffer_init (&tline);
4822
4823   incomment = inquote = FALSE;
4824   found_tag = FALSE;            /* have a proc name; check if extern */
4825   get_tagname = FALSE;          /* found "procedure" keyword         */
4826   inparms = FALSE;              /* found '(' after "proc"            */
4827   verify_tag = FALSE;           /* check if "extern" is ahead        */
4828
4829
4830   while (!feof (inf))           /* long main loop to get next char */
4831     {
4832       c = *dbp++;
4833       if (c == '\0')            /* if end of line */
4834         {
4835           readline (&lb, inf);
4836           dbp = lb.buffer;
4837           if (*dbp == '\0')
4838             continue;
4839           if (!((found_tag && verify_tag)
4840                 || get_tagname))
4841             c = *dbp++;         /* only if don't need *dbp pointing
4842                                    to the beginning of the name of
4843                                    the procedure or function */
4844         }
4845       if (incomment)
4846         {
4847           if (c == '}')         /* within { } comments */
4848             incomment = FALSE;
4849           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4850             {
4851               dbp++;
4852               incomment = FALSE;
4853             }
4854           continue;
4855         }
4856       else if (inquote)
4857         {
4858           if (c == '\'')
4859             inquote = FALSE;
4860           continue;
4861         }
4862       else
4863         switch (c)
4864           {
4865           case '\'':
4866             inquote = TRUE;     /* found first quote */
4867             continue;
4868           case '{':             /* found open { comment */
4869             incomment = TRUE;
4870             continue;
4871           case '(':
4872             if (*dbp == '*')    /* found open (* comment */
4873               {
4874                 incomment = TRUE;
4875                 dbp++;
4876               }
4877             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4878               inparms = TRUE;
4879             continue;
4880           case ')':             /* end of parms list */
4881             if (inparms)
4882               inparms = FALSE;
4883             continue;
4884           case ';':
4885             if (found_tag && !inparms) /* end of proc or fn stmt */
4886               {
4887                 verify_tag = TRUE;
4888                 break;
4889               }
4890             continue;
4891           }
4892       if (found_tag && verify_tag && (*dbp != ' '))
4893         {
4894           /* Check if this is an "extern" declaration. */
4895           if (*dbp == '\0')
4896             continue;
4897           if (lowcase (*dbp == 'e'))
4898             {
4899               if (nocase_tail ("extern")) /* superfluous, really! */
4900                 {
4901                   found_tag = FALSE;
4902                   verify_tag = FALSE;
4903                 }
4904             }
4905           else if (lowcase (*dbp) == 'f')
4906             {
4907               if (nocase_tail ("forward")) /* check for forward reference */
4908                 {
4909                   found_tag = FALSE;
4910                   verify_tag = FALSE;
4911                 }
4912             }
4913           if (found_tag && verify_tag) /* not external proc, so make tag */
4914             {
4915               found_tag = FALSE;
4916               verify_tag = FALSE;
4917               make_tag (name, namelen, TRUE,
4918                         tline.buffer, taglen, save_lineno, save_lcno);
4919               continue;
4920             }
4921         }
4922       if (get_tagname)          /* grab name of proc or fn */
4923         {
4924           char *cp;
4925
4926           if (*dbp == '\0')
4927             continue;
4928
4929           /* Find block name. */
4930           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4931             continue;
4932
4933           /* Save all values for later tagging. */
4934           linebuffer_setlen (&tline, lb.len);
4935           strcpy (tline.buffer, lb.buffer);
4936           save_lineno = lineno;
4937           save_lcno = linecharno;
4938           name = tline.buffer + (dbp - lb.buffer);
4939           namelen = cp - dbp;
4940           taglen = cp - lb.buffer + 1;
4941
4942           dbp = cp;             /* set dbp to e-o-token */
4943           get_tagname = FALSE;
4944           found_tag = TRUE;
4945           continue;
4946
4947           /* And proceed to check for "extern". */
4948         }
4949       else if (!incomment && !inquote && !found_tag)
4950         {
4951           /* Check for proc/fn keywords. */
4952           switch (lowcase (c))
4953             {
4954             case 'p':
4955               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4956                 get_tagname = TRUE;
4957               continue;
4958             case 'f':
4959               if (nocase_tail ("unction"))
4960                 get_tagname = TRUE;
4961               continue;
4962             }
4963         }
4964     } /* while not eof */
4965
4966   free (tline.buffer);
4967 }
4968
4969 \f
4970 /*
4971  * Lisp tag functions
4972  *  look for (def or (DEF, quote or QUOTE
4973  */
4974
4975 static void L_getit __P((void));
4976
4977 static void
4978 L_getit ()
4979 {
4980   if (*dbp == '\'')             /* Skip prefix quote */
4981     dbp++;
4982   else if (*dbp == '(')
4983   {
4984     dbp++;
4985     /* Try to skip "(quote " */
4986     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4987       /* Ok, then skip "(" before name in (defstruct (foo)) */
4988       dbp = skip_spaces (dbp);
4989   }
4990   get_tag (dbp, NULL);
4991 }
4992
4993 static void
4994 Lisp_functions (inf)
4995      FILE *inf;
4996 {
4997   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4998     {
4999       if (dbp[0] != '(')
5000         continue;
5001
5002       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5003         {
5004           dbp = skip_non_spaces (dbp);
5005           dbp = skip_spaces (dbp);
5006           L_getit ();
5007         }
5008       else
5009         {
5010           /* Check for (foo::defmumble name-defined ... */
5011           do
5012             dbp++;
5013           while (!notinname (*dbp) && *dbp != ':');
5014           if (*dbp == ':')
5015             {
5016               do
5017                 dbp++;
5018               while (*dbp == ':');
5019
5020               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5021                 {
5022                   dbp = skip_non_spaces (dbp);
5023                   dbp = skip_spaces (dbp);
5024                   L_getit ();
5025                 }
5026             }
5027         }
5028     }
5029 }
5030
5031 \f
5032 /*
5033  * Lua script language parsing
5034  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5035  *
5036  *  "function" and "local function" are tags if they start at column 1.
5037  */
5038 static void
5039 Lua_functions (inf)
5040      FILE *inf;
5041 {
5042   register char *bp;
5043
5044   LOOP_ON_INPUT_LINES (inf, lb, bp)
5045     {
5046       if (bp[0] != 'f' && bp[0] != 'l')
5047         continue;
5048
5049       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5050
5051       if (LOOKING_AT (bp, "function"))
5052         get_tag (bp, NULL);
5053     }
5054 }
5055
5056 \f
5057 /*
5058  * Postscript tags
5059  * Just look for lines where the first character is '/'
5060  * Also look at "defineps" for PSWrap
5061  * Ideas by:
5062  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5063  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5064  */
5065 static void
5066 PS_functions (inf)
5067      FILE *inf;
5068 {
5069   register char *bp, *ep;
5070
5071   LOOP_ON_INPUT_LINES (inf, lb, bp)
5072     {
5073       if (bp[0] == '/')
5074         {
5075           for (ep = bp+1;
5076                *ep != '\0' && *ep != ' ' && *ep != '{';
5077                ep++)
5078             continue;
5079           make_tag (bp, ep - bp, TRUE,
5080                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5081         }
5082       else if (LOOKING_AT (bp, "defineps"))
5083         get_tag (bp, NULL);
5084     }
5085 }
5086
5087 \f
5088 /*
5089  * Forth tags
5090  * Ignore anything after \ followed by space or in ( )
5091  * Look for words defined by :
5092  * Look for constant, code, create, defer, value, and variable
5093  * OBP extensions:  Look for buffer:, field,
5094  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5095  */
5096 static void
5097 Forth_words (inf)
5098      FILE *inf;
5099 {
5100   register char *bp;
5101
5102   LOOP_ON_INPUT_LINES (inf, lb, bp)
5103     while ((bp = skip_spaces (bp))[0] != '\0')
5104       if (bp[0] == '\\' && iswhite(bp[1]))
5105         break;                  /* read next line */
5106       else if (bp[0] == '(' && iswhite(bp[1]))
5107         do                      /* skip to ) or eol */
5108           bp++;
5109         while (*bp != ')' && *bp != '\0');
5110       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5111                || LOOKING_AT_NOCASE (bp, "constant")
5112                || LOOKING_AT_NOCASE (bp, "code")
5113                || LOOKING_AT_NOCASE (bp, "create")
5114                || LOOKING_AT_NOCASE (bp, "defer")
5115                || LOOKING_AT_NOCASE (bp, "value")
5116                || LOOKING_AT_NOCASE (bp, "variable")
5117                || LOOKING_AT_NOCASE (bp, "buffer:")
5118                || LOOKING_AT_NOCASE (bp, "field"))
5119         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5120       else
5121         bp = skip_non_spaces (bp);
5122 }
5123
5124 \f
5125 /*
5126  * Scheme tag functions
5127  * look for (def... xyzzy
5128  *          (def... (xyzzy
5129  *          (def ... ((...(xyzzy ....
5130  *          (set! xyzzy
5131  * Original code by Ken Haase (1985?)
5132  */
5133 static void
5134 Scheme_functions (inf)
5135      FILE *inf;
5136 {
5137   register char *bp;
5138
5139   LOOP_ON_INPUT_LINES (inf, lb, bp)
5140     {
5141       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5142         {
5143           bp = skip_non_spaces (bp+4);
5144           /* Skip over open parens and white space */
5145           while (notinname (*bp))
5146             bp++;
5147           get_tag (bp, NULL);
5148         }
5149       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5150         get_tag (bp, NULL);
5151     }
5152 }
5153
5154 \f
5155 /* Find tags in TeX and LaTeX input files.  */
5156
5157 /* TEX_toktab is a table of TeX control sequences that define tags.
5158  * Each entry records one such control sequence.
5159  *
5160  * Original code from who knows whom.
5161  * Ideas by:
5162  *   Stefan Monnier (2002)
5163  */
5164
5165 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5166
5167 /* Default set of control sequences to put into TEX_toktab.
5168    The value of environment var TEXTAGS is prepended to this.  */
5169 static char *TEX_defenv = "\
5170 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5171 :part:appendix:entry:index:def\
5172 :newcommand:renewcommand:newenvironment:renewenvironment";
5173
5174 static void TEX_mode __P((FILE *));
5175 static void TEX_decode_env __P((char *, char *));
5176
5177 static char TEX_esc = '\\';
5178 static char TEX_opgrp = '{';
5179 static char TEX_clgrp = '}';
5180
5181 /*
5182  * TeX/LaTeX scanning loop.
5183  */
5184 static void
5185 TeX_commands (inf)
5186      FILE *inf;
5187 {
5188   char *cp;
5189   linebuffer *key;
5190
5191   /* Select either \ or ! as escape character.  */
5192   TEX_mode (inf);
5193
5194   /* Initialize token table once from environment. */
5195   if (TEX_toktab == NULL)
5196     TEX_decode_env ("TEXTAGS", TEX_defenv);
5197
5198   LOOP_ON_INPUT_LINES (inf, lb, cp)
5199     {
5200       /* Look at each TEX keyword in line. */
5201       for (;;)
5202         {
5203           /* Look for a TEX escape. */
5204           while (*cp++ != TEX_esc)
5205             if (cp[-1] == '\0' || cp[-1] == '%')
5206               goto tex_next_line;
5207
5208           for (key = TEX_toktab; key->buffer != NULL; key++)
5209             if (strneq (cp, key->buffer, key->len))
5210               {
5211                 register char *p;
5212                 int namelen, linelen;
5213                 bool opgrp = FALSE;
5214
5215                 cp = skip_spaces (cp + key->len);
5216                 if (*cp == TEX_opgrp)
5217                   {
5218                     opgrp = TRUE;
5219                     cp++;
5220                   }
5221                 for (p = cp;
5222                      (!iswhite (*p) && *p != '#' &&
5223                       *p != TEX_opgrp && *p != TEX_clgrp);
5224                      p++)
5225                   continue;
5226                 namelen = p - cp;
5227                 linelen = lb.len;
5228                 if (!opgrp || *p == TEX_clgrp)
5229                   {
5230                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5231                       p++;
5232                     linelen = p - lb.buffer + 1;
5233                   }
5234                 make_tag (cp, namelen, TRUE,
5235                           lb.buffer, linelen, lineno, linecharno);
5236                 goto tex_next_line; /* We only tag a line once */
5237               }
5238         }
5239     tex_next_line:
5240       ;
5241     }
5242 }
5243
5244 #define TEX_LESC '\\'
5245 #define TEX_SESC '!'
5246
5247 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5248    chars accordingly. */
5249 static void
5250 TEX_mode (inf)
5251      FILE *inf;
5252 {
5253   int c;
5254
5255   while ((c = getc (inf)) != EOF)
5256     {
5257       /* Skip to next line if we hit the TeX comment char. */
5258       if (c == '%')
5259         while (c != '\n' && c != EOF)
5260           c = getc (inf);
5261       else if (c == TEX_LESC || c == TEX_SESC )
5262         break;
5263     }
5264
5265   if (c == TEX_LESC)
5266     {
5267       TEX_esc = TEX_LESC;
5268       TEX_opgrp = '{';
5269       TEX_clgrp = '}';
5270     }
5271   else
5272     {
5273       TEX_esc = TEX_SESC;
5274       TEX_opgrp = '<';
5275       TEX_clgrp = '>';
5276     }
5277   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5278      No attempt is made to correct the situation. */
5279   rewind (inf);
5280 }
5281
5282 /* Read environment and prepend it to the default string.
5283    Build token table. */
5284 static void
5285 TEX_decode_env (evarname, defenv)
5286      char *evarname;
5287      char *defenv;
5288 {
5289   register char *env, *p;
5290   int i, len;
5291
5292   /* Append default string to environment. */
5293   env = getenv (evarname);
5294   if (!env)
5295     env = defenv;
5296   else
5297     {
5298       char *oldenv = env;
5299       env = concat (oldenv, defenv, "");
5300     }
5301
5302   /* Allocate a token table */
5303   for (len = 1, p = env; p;)
5304     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5305       len++;
5306   TEX_toktab = xnew (len, linebuffer);
5307
5308   /* Unpack environment string into token table. Be careful about */
5309   /* zero-length strings (leading ':', "::" and trailing ':') */
5310   for (i = 0; *env != '\0';)
5311     {
5312       p = etags_strchr (env, ':');
5313       if (!p)                   /* End of environment string. */
5314         p = env + strlen (env);
5315       if (p - env > 0)
5316         {                       /* Only non-zero strings. */
5317           TEX_toktab[i].buffer = savenstr (env, p - env);
5318           TEX_toktab[i].len = p - env;
5319           i++;
5320         }
5321       if (*p)
5322         env = p + 1;
5323       else
5324         {
5325           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5326           TEX_toktab[i].len = 0;
5327           break;
5328         }
5329     }
5330 }
5331
5332 \f
5333 /* Texinfo support.  Dave Love, Mar. 2000.  */
5334 static void
5335 Texinfo_nodes (inf)
5336      FILE * inf;
5337 {
5338   char *cp, *start;
5339   LOOP_ON_INPUT_LINES (inf, lb, cp)
5340     if (LOOKING_AT (cp, "@node"))
5341       {
5342         start = cp;
5343         while (*cp != '\0' && *cp != ',')
5344           cp++;
5345         make_tag (start, cp - start, TRUE,
5346                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5347       }
5348 }
5349
5350 \f
5351 /*
5352  * HTML support.
5353  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5354  * Contents of <a name=xxx> are tags with name xxx.
5355  *
5356  * Francesco Potortì, 2002.
5357  */
5358 static void
5359 HTML_labels (inf)
5360      FILE * inf;
5361 {
5362   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5363   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5364   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5365   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5366   char *end;
5367
5368
5369   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5370
5371   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5372     for (;;)                    /* loop on the same line */
5373       {
5374         if (skiptag)            /* skip HTML tag */
5375           {
5376             while (*dbp != '\0' && *dbp != '>')
5377               dbp++;
5378             if (*dbp == '>')
5379               {
5380                 dbp += 1;
5381                 skiptag = FALSE;
5382                 continue;       /* look on the same line */
5383               }
5384             break;              /* go to next line */
5385           }
5386
5387         else if (intag) /* look for "name=" or "id=" */
5388           {
5389             while (*dbp != '\0' && *dbp != '>'
5390                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5391               dbp++;
5392             if (*dbp == '\0')
5393               break;            /* go to next line */
5394             if (*dbp == '>')
5395               {
5396                 dbp += 1;
5397                 intag = FALSE;
5398                 continue;       /* look on the same line */
5399               }
5400             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5401                 || LOOKING_AT_NOCASE (dbp, "id="))
5402               {
5403                 bool quoted = (dbp[0] == '"');
5404
5405                 if (quoted)
5406                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5407                     continue;
5408                 else
5409                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5410                     continue;
5411                 linebuffer_setlen (&token_name, end - dbp);
5412                 strncpy (token_name.buffer, dbp, end - dbp);
5413                 token_name.buffer[end - dbp] = '\0';
5414
5415                 dbp = end;
5416                 intag = FALSE;  /* we found what we looked for */
5417                 skiptag = TRUE; /* skip to the end of the tag */
5418                 getnext = TRUE; /* then grab the text */
5419                 continue;       /* look on the same line */
5420               }
5421             dbp += 1;
5422           }
5423
5424         else if (getnext)       /* grab next tokens and tag them */
5425           {
5426             dbp = skip_spaces (dbp);
5427             if (*dbp == '\0')
5428               break;            /* go to next line */
5429             if (*dbp == '<')
5430               {
5431                 intag = TRUE;
5432                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5433                 continue;       /* look on the same line */
5434               }
5435
5436             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5437               continue;
5438             make_tag (token_name.buffer, token_name.len, TRUE,
5439                       dbp, end - dbp, lineno, linecharno);
5440             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5441             getnext = FALSE;
5442             break;              /* go to next line */
5443           }
5444
5445         else                    /* look for an interesting HTML tag */
5446           {
5447             while (*dbp != '\0' && *dbp != '<')
5448               dbp++;
5449             if (*dbp == '\0')
5450               break;            /* go to next line */
5451             intag = TRUE;
5452             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5453               {
5454                 inanchor = TRUE;
5455                 continue;       /* look on the same line */
5456               }
5457             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5458                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5459                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5460                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5461               {
5462                 intag = FALSE;
5463                 getnext = TRUE;
5464                 continue;       /* look on the same line */
5465               }
5466             dbp += 1;
5467           }
5468       }
5469 }
5470
5471 \f
5472 /*
5473  * Prolog support
5474  *
5475  * Assumes that the predicate or rule starts at column 0.
5476  * Only the first clause of a predicate or rule is added.
5477  * Original code by Sunichirou Sugou (1989)
5478  * Rewritten by Anders Lindgren (1996)
5479  */
5480 static int prolog_pr __P((char *, char *));
5481 static void prolog_skip_comment __P((linebuffer *, FILE *));
5482 static int prolog_atom __P((char *, int));
5483
5484 static void
5485 Prolog_functions (inf)
5486      FILE *inf;
5487 {
5488   char *cp, *last;
5489   int len;
5490   int allocated;
5491
5492   allocated = 0;
5493   len = 0;
5494   last = NULL;
5495
5496   LOOP_ON_INPUT_LINES (inf, lb, cp)
5497     {
5498       if (cp[0] == '\0')        /* Empty line */
5499         continue;
5500       else if (iswhite (cp[0])) /* Not a predicate */
5501         continue;
5502       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5503         prolog_skip_comment (&lb, inf);
5504       else if ((len = prolog_pr (cp, last)) > 0)
5505         {
5506           /* Predicate or rule.  Store the function name so that we
5507              only generate a tag for the first clause.  */
5508           if (last == NULL)
5509             last = xnew(len + 1, char);
5510           else if (len + 1 > allocated)
5511             xrnew (last, len + 1, char);
5512           allocated = len + 1;
5513           strncpy (last, cp, len);
5514           last[len] = '\0';
5515         }
5516     }
5517   if (last != NULL)
5518     free (last);
5519 }
5520
5521
5522 static void
5523 prolog_skip_comment (plb, inf)
5524      linebuffer *plb;
5525      FILE *inf;
5526 {
5527   char *cp;
5528
5529   do
5530     {
5531       for (cp = plb->buffer; *cp != '\0'; cp++)
5532         if (cp[0] == '*' && cp[1] == '/')
5533           return;
5534       readline (plb, inf);
5535     }
5536   while (!feof(inf));
5537 }
5538
5539 /*
5540  * A predicate or rule definition is added if it matches:
5541  *     <beginning of line><Prolog Atom><whitespace>(
5542  * or  <beginning of line><Prolog Atom><whitespace>:-
5543  *
5544  * It is added to the tags database if it doesn't match the
5545  * name of the previous clause header.
5546  *
5547  * Return the size of the name of the predicate or rule, or 0 if no
5548  * header was found.
5549  */
5550 static int
5551 prolog_pr (s, last)
5552      char *s;
5553      char *last;                /* Name of last clause. */
5554 {
5555   int pos;
5556   int len;
5557
5558   pos = prolog_atom (s, 0);
5559   if (pos < 1)
5560     return 0;
5561
5562   len = pos;
5563   pos = skip_spaces (s + pos) - s;
5564
5565   if ((s[pos] == '.'
5566        || (s[pos] == '(' && (pos += 1))
5567        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5568       && (last == NULL          /* save only the first clause */
5569           || len != (int)strlen (last)
5570           || !strneq (s, last, len)))
5571         {
5572           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5573           return len;
5574         }
5575   else
5576     return 0;
5577 }
5578
5579 /*
5580  * Consume a Prolog atom.
5581  * Return the number of bytes consumed, or -1 if there was an error.
5582  *
5583  * A prolog atom, in this context, could be one of:
5584  * - An alphanumeric sequence, starting with a lower case letter.
5585  * - A quoted arbitrary string. Single quotes can escape themselves.
5586  *   Backslash quotes everything.
5587  */
5588 static int
5589 prolog_atom (s, pos)
5590      char *s;
5591      int pos;
5592 {
5593   int origpos;
5594
5595   origpos = pos;
5596
5597   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5598     {
5599       /* The atom is unquoted. */
5600       pos++;
5601       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5602         {
5603           pos++;
5604         }
5605       return pos - origpos;
5606     }
5607   else if (s[pos] == '\'')
5608     {
5609       pos++;
5610
5611       for (;;)
5612         {
5613           if (s[pos] == '\'')
5614             {
5615               pos++;
5616               if (s[pos] != '\'')
5617                 break;
5618               pos++;            /* A double quote */
5619             }
5620           else if (s[pos] == '\0')
5621             /* Multiline quoted atoms are ignored. */
5622             return -1;
5623           else if (s[pos] == '\\')
5624             {
5625               if (s[pos+1] == '\0')
5626                 return -1;
5627               pos += 2;
5628             }
5629           else
5630             pos++;
5631         }
5632       return pos - origpos;
5633     }
5634   else
5635     return -1;
5636 }
5637
5638 \f
5639 /*
5640  * Support for Erlang
5641  *
5642  * Generates tags for functions, defines, and records.
5643  * Assumes that Erlang functions start at column 0.
5644  * Original code by Anders Lindgren (1996)
5645  */
5646 static int erlang_func __P((char *, char *));
5647 static void erlang_attribute __P((char *));
5648 static int erlang_atom __P((char *));
5649
5650 static void
5651 Erlang_functions (inf)
5652      FILE *inf;
5653 {
5654   char *cp, *last;
5655   int len;
5656   int allocated;
5657
5658   allocated = 0;
5659   len = 0;
5660   last = NULL;
5661
5662   LOOP_ON_INPUT_LINES (inf, lb, cp)
5663     {
5664       if (cp[0] == '\0')        /* Empty line */
5665         continue;
5666       else if (iswhite (cp[0])) /* Not function nor attribute */
5667         continue;
5668       else if (cp[0] == '%')    /* comment */
5669         continue;
5670       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5671         continue;
5672       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5673         {
5674           erlang_attribute (cp);
5675           if (last != NULL)
5676             {
5677               free (last);
5678               last = NULL;
5679             }
5680         }
5681       else if ((len = erlang_func (cp, last)) > 0)
5682         {
5683           /*
5684            * Function.  Store the function name so that we only
5685            * generates a tag for the first clause.
5686            */
5687           if (last == NULL)
5688             last = xnew (len + 1, char);
5689           else if (len + 1 > allocated)
5690             xrnew (last, len + 1, char);
5691           allocated = len + 1;
5692           strncpy (last, cp, len);
5693           last[len] = '\0';
5694         }
5695     }
5696   if (last != NULL)
5697     free (last);
5698 }
5699
5700
5701 /*
5702  * A function definition is added if it matches:
5703  *     <beginning of line><Erlang Atom><whitespace>(
5704  *
5705  * It is added to the tags database if it doesn't match the
5706  * name of the previous clause header.
5707  *
5708  * Return the size of the name of the function, or 0 if no function
5709  * was found.
5710  */
5711 static int
5712 erlang_func (s, last)
5713      char *s;
5714      char *last;                /* Name of last clause. */
5715 {
5716   int pos;
5717   int len;
5718
5719   pos = erlang_atom (s);
5720   if (pos < 1)
5721     return 0;
5722
5723   len = pos;
5724   pos = skip_spaces (s + pos) - s;
5725
5726   /* Save only the first clause. */
5727   if (s[pos++] == '('
5728       && (last == NULL
5729           || len != (int)strlen (last)
5730           || !strneq (s, last, len)))
5731         {
5732           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5733           return len;
5734         }
5735
5736   return 0;
5737 }
5738
5739
5740 /*
5741  * Handle attributes.  Currently, tags are generated for defines
5742  * and records.
5743  *
5744  * They are on the form:
5745  * -define(foo, bar).
5746  * -define(Foo(M, N), M+N).
5747  * -record(graph, {vtab = notable, cyclic = true}).
5748  */
5749 static void
5750 erlang_attribute (s)
5751      char *s;
5752 {
5753   char *cp = s;
5754
5755   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5756       && *cp++ == '(')
5757     {
5758       int len = erlang_atom (skip_spaces (cp));
5759       if (len > 0)
5760         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5761     }
5762   return;
5763 }
5764
5765
5766 /*
5767  * Consume an Erlang atom (or variable).
5768  * Return the number of bytes consumed, or -1 if there was an error.
5769  */
5770 static int
5771 erlang_atom (s)
5772      char *s;
5773 {
5774   int pos = 0;
5775
5776   if (ISALPHA (s[pos]) || s[pos] == '_')
5777     {
5778       /* The atom is unquoted. */
5779       do
5780         pos++;
5781       while (ISALNUM (s[pos]) || s[pos] == '_');
5782     }
5783   else if (s[pos] == '\'')
5784     {
5785       for (pos++; s[pos] != '\''; pos++)
5786         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5787             || (s[pos] == '\\' && s[++pos] == '\0'))
5788           return 0;
5789       pos++;
5790     }
5791
5792   return pos;
5793 }
5794
5795 \f
5796 static char *scan_separators __P((char *));
5797 static void add_regex __P((char *, language *));
5798 static char *substitute __P((char *, char *, struct re_registers *));
5799
5800 /*
5801  * Take a string like "/blah/" and turn it into "blah", verifying
5802  * that the first and last characters are the same, and handling
5803  * quoted separator characters.  Actually, stops on the occurrence of
5804  * an unquoted separator.  Also process \t, \n, etc. and turn into
5805  * appropriate characters. Works in place.  Null terminates name string.
5806  * Returns pointer to terminating separator, or NULL for
5807  * unterminated regexps.
5808  */
5809 static char *
5810 scan_separators (name)
5811      char *name;
5812 {
5813   char sep = name[0];
5814   char *copyto = name;
5815   bool quoted = FALSE;
5816
5817   for (++name; *name != '\0'; ++name)
5818     {
5819       if (quoted)
5820         {
5821           switch (*name)
5822             {
5823             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5824             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5825             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5826             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5827             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5828             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5829             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5830             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5831             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5832             default:
5833               if (*name == sep)
5834                 *copyto++ = sep;
5835               else
5836                 {
5837                   /* Something else is quoted, so preserve the quote. */
5838                   *copyto++ = '\\';
5839                   *copyto++ = *name;
5840                 }
5841               break;
5842             }
5843           quoted = FALSE;
5844         }
5845       else if (*name == '\\')
5846         quoted = TRUE;
5847       else if (*name == sep)
5848         break;
5849       else
5850         *copyto++ = *name;
5851     }
5852   if (*name != sep)
5853     name = NULL;                /* signal unterminated regexp */
5854
5855   /* Terminate copied string. */
5856   *copyto = '\0';
5857   return name;
5858 }
5859
5860 /* Look at the argument of --regex or --no-regex and do the right
5861    thing.  Same for each line of a regexp file. */
5862 static void
5863 analyse_regex (regex_arg)
5864      char *regex_arg;
5865 {
5866   if (regex_arg == NULL)
5867     {
5868       free_regexps ();          /* --no-regex: remove existing regexps */
5869       return;
5870     }
5871
5872   /* A real --regexp option or a line in a regexp file. */
5873   switch (regex_arg[0])
5874     {
5875       /* Comments in regexp file or null arg to --regex. */
5876     case '\0':
5877     case ' ':
5878     case '\t':
5879       break;
5880
5881       /* Read a regex file.  This is recursive and may result in a
5882          loop, which will stop when the file descriptors are exhausted. */
5883     case '@':
5884       {
5885         FILE *regexfp;
5886         linebuffer regexbuf;
5887         char *regexfile = regex_arg + 1;
5888
5889         /* regexfile is a file containing regexps, one per line. */
5890         regexfp = fopen (regexfile, "r");
5891         if (regexfp == NULL)
5892           {
5893             pfatal (regexfile);
5894             return;
5895           }
5896         linebuffer_init (&regexbuf);
5897         while (readline_internal (&regexbuf, regexfp) > 0)
5898           analyse_regex (regexbuf.buffer);
5899         free (regexbuf.buffer);
5900         fclose (regexfp);
5901       }
5902       break;
5903
5904       /* Regexp to be used for a specific language only. */
5905     case '{':
5906       {
5907         language *lang;
5908         char *lang_name = regex_arg + 1;
5909         char *cp;
5910
5911         for (cp = lang_name; *cp != '}'; cp++)
5912           if (*cp == '\0')
5913             {
5914               error ("unterminated language name in regex: %s", regex_arg);
5915               return;
5916             }
5917         *cp++ = '\0';
5918         lang = get_language_from_langname (lang_name);
5919         if (lang == NULL)
5920           return;
5921         add_regex (cp, lang);
5922       }
5923       break;
5924
5925       /* Regexp to be used for any language. */
5926     default:
5927       add_regex (regex_arg, NULL);
5928       break;
5929     }
5930 }
5931
5932 /* Separate the regexp pattern, compile it,
5933    and care for optional name and modifiers. */
5934 static void
5935 add_regex (regexp_pattern, lang)
5936      char *regexp_pattern;
5937      language *lang;
5938 {
5939   static struct re_pattern_buffer zeropattern;
5940   char sep, *pat, *name, *modifiers;
5941   const char *err;
5942   struct re_pattern_buffer *patbuf;
5943   regexp *rp;
5944   bool
5945     force_explicit_name = TRUE, /* do not use implicit tag names */
5946     ignore_case = FALSE,        /* case is significant */
5947     multi_line = FALSE,         /* matches are done one line at a time */
5948     single_line = FALSE;        /* dot does not match newline */
5949
5950
5951   if (strlen(regexp_pattern) < 3)
5952     {
5953       error ("null regexp", (char *)NULL);
5954       return;
5955     }
5956   sep = regexp_pattern[0];
5957   name = scan_separators (regexp_pattern);
5958   if (name == NULL)
5959     {
5960       error ("%s: unterminated regexp", regexp_pattern);
5961       return;
5962     }
5963   if (name[1] == sep)
5964     {
5965       error ("null name for regexp \"%s\"", regexp_pattern);
5966       return;
5967     }
5968   modifiers = scan_separators (name);
5969   if (modifiers == NULL)        /* no terminating separator --> no name */
5970     {
5971       modifiers = name;
5972       name = "";
5973     }
5974   else
5975     modifiers += 1;             /* skip separator */
5976
5977   /* Parse regex modifiers. */
5978   for (; modifiers[0] != '\0'; modifiers++)
5979     switch (modifiers[0])
5980       {
5981       case 'N':
5982         if (modifiers == name)
5983           error ("forcing explicit tag name but no name, ignoring", NULL);
5984         force_explicit_name = TRUE;
5985         break;
5986       case 'i':
5987         ignore_case = TRUE;
5988         break;
5989       case 's':
5990         single_line = TRUE;
5991         /* FALLTHRU */
5992       case 'm':
5993         multi_line = TRUE;
5994         need_filebuf = TRUE;
5995         break;
5996       default:
5997         {
5998           char wrongmod [2];
5999           wrongmod[0] = modifiers[0];
6000           wrongmod[1] = '\0';
6001           error ("invalid regexp modifier `%s', ignoring", wrongmod);
6002         }
6003         break;
6004       }
6005
6006   patbuf = xnew (1, struct re_pattern_buffer);
6007   *patbuf = zeropattern;
6008   if (ignore_case)
6009     {
6010       static char lc_trans[CHARS];
6011       int i;
6012       for (i = 0; i < CHARS; i++)
6013         lc_trans[i] = lowcase (i);
6014       patbuf->translate = lc_trans;     /* translation table to fold case  */
6015     }
6016
6017   if (multi_line)
6018     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6019   else
6020     pat = regexp_pattern;
6021
6022   if (single_line)
6023     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6024   else
6025     re_set_syntax (RE_SYNTAX_EMACS);
6026
6027   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6028   if (multi_line)
6029     free (pat);
6030   if (err != NULL)
6031     {
6032       error ("%s while compiling pattern", err);
6033       return;
6034     }
6035
6036   rp = p_head;
6037   p_head = xnew (1, regexp);
6038   p_head->pattern = savestr (regexp_pattern);
6039   p_head->p_next = rp;
6040   p_head->lang = lang;
6041   p_head->pat = patbuf;
6042   p_head->name = savestr (name);
6043   p_head->error_signaled = FALSE;
6044   p_head->force_explicit_name = force_explicit_name;
6045   p_head->ignore_case = ignore_case;
6046   p_head->multi_line = multi_line;
6047 }
6048
6049 /*
6050  * Do the substitutions indicated by the regular expression and
6051  * arguments.
6052  */
6053 static char *
6054 substitute (in, out, regs)
6055      char *in, *out;
6056      struct re_registers *regs;
6057 {
6058   char *result, *t;
6059   int size, dig, diglen;
6060
6061   result = NULL;
6062   size = strlen (out);
6063
6064   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6065   if (out[size - 1] == '\\')
6066     fatal ("pattern error in \"%s\"", out);
6067   for (t = etags_strchr (out, '\\');
6068        t != NULL;
6069        t = etags_strchr (t + 2, '\\'))
6070     if (ISDIGIT (t[1]))
6071       {
6072         dig = t[1] - '0';
6073         diglen = regs->end[dig] - regs->start[dig];
6074         size += diglen - 2;
6075       }
6076     else
6077       size -= 1;
6078
6079   /* Allocate space and do the substitutions. */
6080   assert (size >= 0);
6081   result = xnew (size + 1, char);
6082
6083   for (t = result; *out != '\0'; out++)
6084     if (*out == '\\' && ISDIGIT (*++out))
6085       {
6086         dig = *out - '0';
6087         diglen = regs->end[dig] - regs->start[dig];
6088         strncpy (t, in + regs->start[dig], diglen);
6089         t += diglen;
6090       }
6091     else
6092       *t++ = *out;
6093   *t = '\0';
6094
6095   assert (t <= result + size);
6096   assert (t - result == (int)strlen (result));
6097
6098   return result;
6099 }
6100
6101 /* Deallocate all regexps. */
6102 static void
6103 free_regexps ()
6104 {
6105   regexp *rp;
6106   while (p_head != NULL)
6107     {
6108       rp = p_head->p_next;
6109       free (p_head->pattern);
6110       free (p_head->name);
6111       free (p_head);
6112       p_head = rp;
6113     }
6114   return;
6115 }
6116
6117 /*
6118  * Reads the whole file as a single string from `filebuf' and looks for
6119  * multi-line regular expressions, creating tags on matches.
6120  * readline already dealt with normal regexps.
6121  *
6122  * Idea by Ben Wing <ben@666.com> (2002).
6123  */
6124 static void
6125 regex_tag_multiline ()
6126 {
6127   char *buffer = filebuf.buffer;
6128   regexp *rp;
6129   char *name;
6130
6131   for (rp = p_head; rp != NULL; rp = rp->p_next)
6132     {
6133       int match = 0;
6134
6135       if (!rp->multi_line)
6136         continue;               /* skip normal regexps */
6137
6138       /* Generic initialisations before parsing file from memory. */
6139       lineno = 1;               /* reset global line number */
6140       charno = 0;               /* reset global char number */
6141       linecharno = 0;           /* reset global char number of line start */
6142
6143       /* Only use generic regexps or those for the current language. */
6144       if (rp->lang != NULL && rp->lang != curfdp->lang)
6145         continue;
6146
6147       while (match >= 0 && match < filebuf.len)
6148         {
6149           match = re_search (rp->pat, buffer, filebuf.len, charno,
6150                              filebuf.len - match, &rp->regs);
6151           switch (match)
6152             {
6153             case -2:
6154               /* Some error. */
6155               if (!rp->error_signaled)
6156                 {
6157                   error ("regexp stack overflow while matching \"%s\"",
6158                          rp->pattern);
6159                   rp->error_signaled = TRUE;
6160                 }
6161               break;
6162             case -1:
6163               /* No match. */
6164               break;
6165             default:
6166               if (match == rp->regs.end[0])
6167                 {
6168                   if (!rp->error_signaled)
6169                     {
6170                       error ("regexp matches the empty string: \"%s\"",
6171                              rp->pattern);
6172                       rp->error_signaled = TRUE;
6173                     }
6174                   match = -3;   /* exit from while loop */
6175                   break;
6176                 }
6177
6178               /* Match occurred.  Construct a tag. */
6179               while (charno < rp->regs.end[0])
6180                 if (buffer[charno++] == '\n')
6181                   lineno++, linecharno = charno;
6182               name = rp->name;
6183               if (name[0] == '\0')
6184                 name = NULL;
6185               else /* make a named tag */
6186                 name = substitute (buffer, rp->name, &rp->regs);
6187               if (rp->force_explicit_name)
6188                 /* Force explicit tag name, if a name is there. */
6189                 pfnote (name, TRUE, buffer + linecharno,
6190                         charno - linecharno + 1, lineno, linecharno);
6191               else
6192                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6193                           charno - linecharno + 1, lineno, linecharno);
6194               break;
6195             }
6196         }
6197     }
6198 }
6199
6200 \f
6201 static bool
6202 nocase_tail (cp)
6203      char *cp;
6204 {
6205   register int len = 0;
6206
6207   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6208     cp++, len++;
6209   if (*cp == '\0' && !intoken (dbp[len]))
6210     {
6211       dbp += len;
6212       return TRUE;
6213     }
6214   return FALSE;
6215 }
6216
6217 static void
6218 get_tag (bp, namepp)
6219      register char *bp;
6220      char **namepp;
6221 {
6222   register char *cp = bp;
6223
6224   if (*bp != '\0')
6225     {
6226       /* Go till you get to white space or a syntactic break */
6227       for (cp = bp + 1; !notinname (*cp); cp++)
6228         continue;
6229       make_tag (bp, cp - bp, TRUE,
6230                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6231     }
6232
6233   if (namepp != NULL)
6234     *namepp = savenstr (bp, cp - bp);
6235 }
6236
6237 /*
6238  * Read a line of text from `stream' into `lbp', excluding the
6239  * newline or CR-NL, if any.  Return the number of characters read from
6240  * `stream', which is the length of the line including the newline.
6241  *
6242  * On DOS or Windows we do not count the CR character, if any before the
6243  * NL, in the returned length; this mirrors the behavior of Emacs on those
6244  * platforms (for text files, it translates CR-NL to NL as it reads in the
6245  * file).
6246  *
6247  * If multi-line regular expressions are requested, each line read is
6248  * appended to `filebuf'.
6249  */
6250 static long
6251 readline_internal (lbp, stream)
6252      linebuffer *lbp;
6253      register FILE *stream;
6254 {
6255   char *buffer = lbp->buffer;
6256   register char *p = lbp->buffer;
6257   register char *pend;
6258   int chars_deleted;
6259
6260   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6261
6262   for (;;)
6263     {
6264       register int c = getc (stream);
6265       if (p == pend)
6266         {
6267           /* We're at the end of linebuffer: expand it. */
6268           lbp->size *= 2;
6269           xrnew (buffer, lbp->size, char);
6270           p += buffer - lbp->buffer;
6271           pend = buffer + lbp->size;
6272           lbp->buffer = buffer;
6273         }
6274       if (c == EOF)
6275         {
6276           *p = '\0';
6277           chars_deleted = 0;
6278           break;
6279         }
6280       if (c == '\n')
6281         {
6282           if (p > buffer && p[-1] == '\r')
6283             {
6284               p -= 1;
6285 #ifdef DOS_NT
6286              /* Assume CRLF->LF translation will be performed by Emacs
6287                 when loading this file, so CRs won't appear in the buffer.
6288                 It would be cleaner to compensate within Emacs;
6289                 however, Emacs does not know how many CRs were deleted
6290                 before any given point in the file.  */
6291               chars_deleted = 1;
6292 #else
6293               chars_deleted = 2;
6294 #endif
6295             }
6296           else
6297             {
6298               chars_deleted = 1;
6299             }
6300           *p = '\0';
6301           break;
6302         }
6303       *p++ = c;
6304     }
6305   lbp->len = p - buffer;
6306
6307   if (need_filebuf              /* we need filebuf for multi-line regexps */
6308       && chars_deleted > 0)     /* not at EOF */
6309     {
6310       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6311         {
6312           /* Expand filebuf. */
6313           filebuf.size *= 2;
6314           xrnew (filebuf.buffer, filebuf.size, char);
6315         }
6316       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6317       filebuf.len += lbp->len;
6318       filebuf.buffer[filebuf.len++] = '\n';
6319       filebuf.buffer[filebuf.len] = '\0';
6320     }
6321
6322   return lbp->len + chars_deleted;
6323 }
6324
6325 /*
6326  * Like readline_internal, above, but in addition try to match the
6327  * input line against relevant regular expressions and manage #line
6328  * directives.
6329  */
6330 static void
6331 readline (lbp, stream)
6332      linebuffer *lbp;
6333      FILE *stream;
6334 {
6335   long result;
6336
6337   linecharno = charno;          /* update global char number of line start */
6338   result = readline_internal (lbp, stream); /* read line */
6339   lineno += 1;                  /* increment global line number */
6340   charno += result;             /* increment global char number */
6341
6342   /* Honour #line directives. */
6343   if (!no_line_directive)
6344     {
6345       static bool discard_until_line_directive;
6346
6347       /* Check whether this is a #line directive. */
6348       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6349         {
6350           unsigned int lno;
6351           int start = 0;
6352
6353           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6354               && start > 0)     /* double quote character found */
6355             {
6356               char *endp = lbp->buffer + start;
6357
6358               while ((endp = etags_strchr (endp, '"')) != NULL
6359                      && endp[-1] == '\\')
6360                 endp++;
6361               if (endp != NULL)
6362                 /* Ok, this is a real #line directive.  Let's deal with it. */
6363                 {
6364                   char *taggedabsname;  /* absolute name of original file */
6365                   char *taggedfname;    /* name of original file as given */
6366                   char *name;           /* temp var */
6367
6368                   discard_until_line_directive = FALSE; /* found it */
6369                   name = lbp->buffer + start;
6370                   *endp = '\0';
6371                   canonicalize_filename (name); /* for DOS */
6372                   taggedabsname = absolute_filename (name, tagfiledir);
6373                   if (filename_is_absolute (name)
6374                       || filename_is_absolute (curfdp->infname))
6375                     taggedfname = savestr (taggedabsname);
6376                   else
6377                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6378
6379                   if (streq (curfdp->taggedfname, taggedfname))
6380                     /* The #line directive is only a line number change.  We
6381                        deal with this afterwards. */
6382                     free (taggedfname);
6383                   else
6384                     /* The tags following this #line directive should be
6385                        attributed to taggedfname.  In order to do this, set
6386                        curfdp accordingly. */
6387                     {
6388                       fdesc *fdp; /* file description pointer */
6389
6390                       /* Go look for a file description already set up for the
6391                          file indicated in the #line directive.  If there is
6392                          one, use it from now until the next #line
6393                          directive. */
6394                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6395                         if (streq (fdp->infname, curfdp->infname)
6396                             && streq (fdp->taggedfname, taggedfname))
6397                           /* If we remove the second test above (after the &&)
6398                              then all entries pertaining to the same file are
6399                              coalesced in the tags file.  If we use it, then
6400                              entries pertaining to the same file but generated
6401                              from different files (via #line directives) will
6402                              go into separate sections in the tags file.  These
6403                              alternatives look equivalent.  The first one
6404                              destroys some apparently useless information. */
6405                           {
6406                             curfdp = fdp;
6407                             free (taggedfname);
6408                             break;
6409                           }
6410                       /* Else, if we already tagged the real file, skip all
6411                          input lines until the next #line directive. */
6412                       if (fdp == NULL) /* not found */
6413                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6414                           if (streq (fdp->infabsname, taggedabsname))
6415                             {
6416                               discard_until_line_directive = TRUE;
6417                               free (taggedfname);
6418                               break;
6419                             }
6420                       /* Else create a new file description and use that from
6421                          now on, until the next #line directive. */
6422                       if (fdp == NULL) /* not found */
6423                         {
6424                           fdp = fdhead;
6425                           fdhead = xnew (1, fdesc);
6426                           *fdhead = *curfdp; /* copy curr. file description */
6427                           fdhead->next = fdp;
6428                           fdhead->infname = savestr (curfdp->infname);
6429                           fdhead->infabsname = savestr (curfdp->infabsname);
6430                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6431                           fdhead->taggedfname = taggedfname;
6432                           fdhead->usecharno = FALSE;
6433                           fdhead->prop = NULL;
6434                           fdhead->written = FALSE;
6435                           curfdp = fdhead;
6436                         }
6437                     }
6438                   free (taggedabsname);
6439                   lineno = lno - 1;
6440                   readline (lbp, stream);
6441                   return;
6442                 } /* if a real #line directive */
6443             } /* if #line is followed by a a number */
6444         } /* if line begins with "#line " */
6445
6446       /* If we are here, no #line directive was found. */
6447       if (discard_until_line_directive)
6448         {
6449           if (result > 0)
6450             {
6451               /* Do a tail recursion on ourselves, thus discarding the contents
6452                  of the line buffer. */
6453               readline (lbp, stream);
6454               return;
6455             }
6456           /* End of file. */
6457           discard_until_line_directive = FALSE;
6458           return;
6459         }
6460     } /* if #line directives should be considered */
6461
6462   {
6463     int match;
6464     regexp *rp;
6465     char *name;
6466
6467     /* Match against relevant regexps. */
6468     if (lbp->len > 0)
6469       for (rp = p_head; rp != NULL; rp = rp->p_next)
6470         {
6471           /* Only use generic regexps or those for the current language.
6472              Also do not use multiline regexps, which is the job of
6473              regex_tag_multiline. */
6474           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6475               || rp->multi_line)
6476             continue;
6477
6478           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6479           switch (match)
6480             {
6481             case -2:
6482               /* Some error. */
6483               if (!rp->error_signaled)
6484                 {
6485                   error ("regexp stack overflow while matching \"%s\"",
6486                          rp->pattern);
6487                   rp->error_signaled = TRUE;
6488                 }
6489               break;
6490             case -1:
6491               /* No match. */
6492               break;
6493             case 0:
6494               /* Empty string matched. */
6495               if (!rp->error_signaled)
6496                 {
6497                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6498                   rp->error_signaled = TRUE;
6499                 }
6500               break;
6501             default:
6502               /* Match occurred.  Construct a tag. */
6503               name = rp->name;
6504               if (name[0] == '\0')
6505                 name = NULL;
6506               else /* make a named tag */
6507                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6508               if (rp->force_explicit_name)
6509                 /* Force explicit tag name, if a name is there. */
6510                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6511               else
6512                 make_tag (name, strlen (name), TRUE,
6513                           lbp->buffer, match, lineno, linecharno);
6514               break;
6515             }
6516         }
6517   }
6518 }
6519
6520 \f
6521 /*
6522  * Return a pointer to a space of size strlen(cp)+1 allocated
6523  * with xnew where the string CP has been copied.
6524  */
6525 static char *
6526 savestr (cp)
6527      char *cp;
6528 {
6529   return savenstr (cp, strlen (cp));
6530 }
6531
6532 /*
6533  * Return a pointer to a space of size LEN+1 allocated with xnew where
6534  * the string CP has been copied for at most the first LEN characters.
6535  */
6536 static char *
6537 savenstr (cp, len)
6538      char *cp;
6539      int len;
6540 {
6541   register char *dp;
6542
6543   dp = xnew (len + 1, char);
6544   strncpy (dp, cp, len);
6545   dp[len] = '\0';
6546   return dp;
6547 }
6548
6549 /*
6550  * Return the ptr in sp at which the character c last
6551  * appears; NULL if not found
6552  *
6553  * Identical to POSIX strrchr, included for portability.
6554  */
6555 static char *
6556 etags_strrchr (sp, c)
6557      register const char *sp;
6558      register int c;
6559 {
6560   register const char *r;
6561
6562   r = NULL;
6563   do
6564     {
6565       if (*sp == c)
6566         r = sp;
6567   } while (*sp++);
6568   return (char *)r;
6569 }
6570
6571 /*
6572  * Return the ptr in sp at which the character c first
6573  * appears; NULL if not found
6574  *
6575  * Identical to POSIX strchr, included for portability.
6576  */
6577 static char *
6578 etags_strchr (sp, c)
6579      register const char *sp;
6580      register int c;
6581 {
6582   do
6583     {
6584       if (*sp == c)
6585         return (char *)sp;
6586     } while (*sp++);
6587   return NULL;
6588 }
6589
6590 /*
6591  * Compare two strings, ignoring case for alphabetic characters.
6592  *
6593  * Same as BSD's strcasecmp, included for portability.
6594  */
6595 static int
6596 etags_strcasecmp (s1, s2)
6597      register const char *s1;
6598      register const char *s2;
6599 {
6600   while (*s1 != '\0'
6601          && (ISALPHA (*s1) && ISALPHA (*s2)
6602              ? lowcase (*s1) == lowcase (*s2)
6603              : *s1 == *s2))
6604     s1++, s2++;
6605
6606   return (ISALPHA (*s1) && ISALPHA (*s2)
6607           ? lowcase (*s1) - lowcase (*s2)
6608           : *s1 - *s2);
6609 }
6610
6611 /*
6612  * Compare two strings, ignoring case for alphabetic characters.
6613  * Stop after a given number of characters
6614  *
6615  * Same as BSD's strncasecmp, included for portability.
6616  */
6617 static int
6618 etags_strncasecmp (s1, s2, n)
6619      register const char *s1;
6620      register const char *s2;
6621      register int n;
6622 {
6623   while (*s1 != '\0' && n-- > 0
6624          && (ISALPHA (*s1) && ISALPHA (*s2)
6625              ? lowcase (*s1) == lowcase (*s2)
6626              : *s1 == *s2))
6627     s1++, s2++;
6628
6629   if (n < 0)
6630     return 0;
6631   else
6632     return (ISALPHA (*s1) && ISALPHA (*s2)
6633             ? lowcase (*s1) - lowcase (*s2)
6634             : *s1 - *s2);
6635 }
6636
6637 /* Skip spaces (end of string is not space), return new pointer. */
6638 static char *
6639 skip_spaces (cp)
6640      char *cp;
6641 {
6642   while (iswhite (*cp))
6643     cp++;
6644   return cp;
6645 }
6646
6647 /* Skip non spaces, except end of string, return new pointer. */
6648 static char *
6649 skip_non_spaces (cp)
6650      char *cp;
6651 {
6652   while (*cp != '\0' && !iswhite (*cp))
6653     cp++;
6654   return cp;
6655 }
6656
6657 /* Print error message and exit.  */
6658 void
6659 fatal (s1, s2)
6660      char *s1, *s2;
6661 {
6662   error (s1, s2);
6663   exit (EXIT_FAILURE);
6664 }
6665
6666 static void
6667 pfatal (s1)
6668      char *s1;
6669 {
6670   perror (s1);
6671   exit (EXIT_FAILURE);
6672 }
6673
6674 static void
6675 suggest_asking_for_help ()
6676 {
6677   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6678            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6679   exit (EXIT_FAILURE);
6680 }
6681
6682 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6683 static void
6684 error (s1, s2)
6685      const char *s1, *s2;
6686 {
6687   fprintf (stderr, "%s: ", progname);
6688   fprintf (stderr, s1, s2);
6689   fprintf (stderr, "\n");
6690 }
6691
6692 /* Return a newly-allocated string whose contents
6693    concatenate those of s1, s2, s3.  */
6694 static char *
6695 concat (s1, s2, s3)
6696      char *s1, *s2, *s3;
6697 {
6698   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6699   char *result = xnew (len1 + len2 + len3 + 1, char);
6700
6701   strcpy (result, s1);
6702   strcpy (result + len1, s2);
6703   strcpy (result + len1 + len2, s3);
6704   result[len1 + len2 + len3] = '\0';
6705
6706   return result;
6707 }
6708
6709 \f
6710 /* Does the same work as the system V getcwd, but does not need to
6711    guess the buffer size in advance. */
6712 static char *
6713 etags_getcwd ()
6714 {
6715 #ifdef HAVE_GETCWD
6716   int bufsize = 200;
6717   char *path = xnew (bufsize, char);
6718
6719   while (getcwd (path, bufsize) == NULL)
6720     {
6721       if (errno != ERANGE)
6722         pfatal ("getcwd");
6723       bufsize *= 2;
6724       free (path);
6725       path = xnew (bufsize, char);
6726     }
6727
6728   canonicalize_filename (path);
6729   return path;
6730
6731 #else /* not HAVE_GETCWD */
6732 #if MSDOS
6733
6734   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6735
6736   getwd (path);
6737
6738   for (p = path; *p != '\0'; p++)
6739     if (*p == '\\')
6740       *p = '/';
6741     else
6742       *p = lowcase (*p);
6743
6744   return strdup (path);
6745 #else /* not MSDOS */
6746   linebuffer path;
6747   FILE *pipe;
6748
6749   linebuffer_init (&path);
6750   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6751   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6752     pfatal ("pwd");
6753   pclose (pipe);
6754
6755   return path.buffer;
6756 #endif /* not MSDOS */
6757 #endif /* not HAVE_GETCWD */
6758 }
6759
6760 /* Return a newly allocated string containing the file name of FILE
6761    relative to the absolute directory DIR (which should end with a slash). */
6762 static char *
6763 relative_filename (file, dir)
6764      char *file, *dir;
6765 {
6766   char *fp, *dp, *afn, *res;
6767   int i;
6768
6769   /* Find the common root of file and dir (with a trailing slash). */
6770   afn = absolute_filename (file, cwd);
6771   fp = afn;
6772   dp = dir;
6773   while (*fp++ == *dp++)
6774     continue;
6775   fp--, dp--;                   /* back to the first differing char */
6776 #ifdef DOS_NT
6777   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6778     return afn;
6779 #endif
6780   do                            /* look at the equal chars until '/' */
6781     fp--, dp--;
6782   while (*fp != '/');
6783
6784   /* Build a sequence of "../" strings for the resulting relative file name. */
6785   i = 0;
6786   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6787     i += 1;
6788   res = xnew (3*i + strlen (fp + 1) + 1, char);
6789   res[0] = '\0';
6790   while (i-- > 0)
6791     strcat (res, "../");
6792
6793   /* Add the file name relative to the common root of file and dir. */
6794   strcat (res, fp + 1);
6795   free (afn);
6796
6797   return res;
6798 }
6799
6800 /* Return a newly allocated string containing the absolute file name
6801    of FILE given DIR (which should end with a slash). */
6802 static char *
6803 absolute_filename (file, dir)
6804      char *file, *dir;
6805 {
6806   char *slashp, *cp, *res;
6807
6808   if (filename_is_absolute (file))
6809     res = savestr (file);
6810 #ifdef DOS_NT
6811   /* We don't support non-absolute file names with a drive
6812      letter, like `d:NAME' (it's too much hassle).  */
6813   else if (file[1] == ':')
6814     fatal ("%s: relative file names with drive letters not supported", file);
6815 #endif
6816   else
6817     res = concat (dir, file, "");
6818
6819   /* Delete the "/dirname/.." and "/." substrings. */
6820   slashp = etags_strchr (res, '/');
6821   while (slashp != NULL && slashp[0] != '\0')
6822     {
6823       if (slashp[1] == '.')
6824         {
6825           if (slashp[2] == '.'
6826               && (slashp[3] == '/' || slashp[3] == '\0'))
6827             {
6828               cp = slashp;
6829               do
6830                 cp--;
6831               while (cp >= res && !filename_is_absolute (cp));
6832               if (cp < res)
6833                 cp = slashp;    /* the absolute name begins with "/.." */
6834 #ifdef DOS_NT
6835               /* Under MSDOS and NT we get `d:/NAME' as absolute
6836                  file name, so the luser could say `d:/../NAME'.
6837                  We silently treat this as `d:/NAME'.  */
6838               else if (cp[0] != '/')
6839                 cp = slashp;
6840 #endif
6841               strcpy (cp, slashp + 3);
6842               slashp = cp;
6843               continue;
6844             }
6845           else if (slashp[2] == '/' || slashp[2] == '\0')
6846             {
6847               strcpy (slashp, slashp + 2);
6848               continue;
6849             }
6850         }
6851
6852       slashp = etags_strchr (slashp + 1, '/');
6853     }
6854
6855   if (res[0] == '\0')           /* just a safety net: should never happen */
6856     {
6857       free (res);
6858       return savestr ("/");
6859     }
6860   else
6861     return res;
6862 }
6863
6864 /* Return a newly allocated string containing the absolute
6865    file name of dir where FILE resides given DIR (which should
6866    end with a slash). */
6867 static char *
6868 absolute_dirname (file, dir)
6869      char *file, *dir;
6870 {
6871   char *slashp, *res;
6872   char save;
6873
6874   canonicalize_filename (file);
6875   slashp = etags_strrchr (file, '/');
6876   if (slashp == NULL)
6877     return savestr (dir);
6878   save = slashp[1];
6879   slashp[1] = '\0';
6880   res = absolute_filename (file, dir);
6881   slashp[1] = save;
6882
6883   return res;
6884 }
6885
6886 /* Whether the argument string is an absolute file name.  The argument
6887    string must have been canonicalized with canonicalize_filename. */
6888 static bool
6889 filename_is_absolute (fn)
6890      char *fn;
6891 {
6892   return (fn[0] == '/'
6893 #ifdef DOS_NT
6894           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6895 #endif
6896           );
6897 }
6898
6899 /* Translate backslashes into slashes.  Works in place. */
6900 static void
6901 canonicalize_filename (fn)
6902      register char *fn;
6903 {
6904 #ifdef DOS_NT
6905   /* Canonicalize drive letter case.  */
6906   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6907     fn[0] = upcase (fn[0]);
6908   /* Convert backslashes to slashes.  */
6909   for (; *fn != '\0'; fn++)
6910     if (*fn == '\\')
6911       *fn = '/';
6912 #else
6913   /* No action. */
6914   fn = NULL;                    /* shut up the compiler */
6915 #endif
6916 }
6917
6918 \f
6919 /* Initialize a linebuffer for use */
6920 static void
6921 linebuffer_init (lbp)
6922      linebuffer *lbp;
6923 {
6924   lbp->size = (DEBUG) ? 3 : 200;
6925   lbp->buffer = xnew (lbp->size, char);
6926   lbp->buffer[0] = '\0';
6927   lbp->len = 0;
6928 }
6929
6930 /* Set the minimum size of a string contained in a linebuffer. */
6931 static void
6932 linebuffer_setlen (lbp, toksize)
6933      linebuffer *lbp;
6934      int toksize;
6935 {
6936   while (lbp->size <= toksize)
6937     {
6938       lbp->size *= 2;
6939       xrnew (lbp->buffer, lbp->size, char);
6940     }
6941   lbp->len = toksize;
6942 }
6943
6944 /* Like malloc but get fatal error if memory is exhausted. */
6945 static PTR
6946 xmalloc (size)
6947      unsigned int size;
6948 {
6949   PTR result = (PTR) malloc (size);
6950   if (result == NULL)
6951     fatal ("virtual memory exhausted", (char *)NULL);
6952   return result;
6953 }
6954
6955 static PTR
6956 xrealloc (ptr, size)
6957      char *ptr;
6958      unsigned int size;
6959 {
6960   PTR result = (PTR) realloc (ptr, size);
6961   if (result == NULL)
6962     fatal ("virtual memory exhausted", (char *)NULL);
6963   return result;
6964 }
6965
6966 /*
6967  * Local Variables:
6968  * indent-tabs-mode: t
6969  * tab-width: 8
6970  * fill-column: 79
6971  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6972  * c-file-style: "gnu"
6973  * End:
6974  */
6975
6976 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6977    (do not change this comment) */
6978
6979 /* etags.c ends here */