lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
   3                  1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4                  2005, 2006 Free Software Foundation, Inc. and Ken Arnold
   5
   6  This file is not considered part of GNU Emacs.
   7
   8  This program is free software; you can redistribute it and/or modify
   9  it under the terms of the GNU General Public License as published by
  10  the Free Software Foundation; either version 2 of the License, or
  11  (at your option) any later version.
  12
  13  This program is distributed in the hope that it will be useful,
  14  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  GNU General Public License for more details.
  17
  18  You should have received a copy of the GNU General Public License
  19  along with this program; if not, write to the Free Software Foundation,
  20  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  21
  22 /*
  23  * Authors:
  24  *      Ctags originally by Ken Arnold.
  25  *      Fortran added by Jim Kleckner.
  26  *      Ed Pelegri-Llopart added C typedefs.
  27  *      Gnu Emacs TAGS format and modifications by RMS?
  28  * 1989 Sam Kendall added C++.
  29  * 1992 Joseph B. Wells improved C and C++ parsing.
  30  * 1993 Francesco Potortì reorganised C and C++.
  31  * 1994 Line-by-line regexp tags by Tom Tromey.
  32  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  33  * 2002 #line directives by Francesco Potortì.
  34  *
  35  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  36  */
  37
  38 /*
  39  * If you want to add support for a new language, start by looking at the LUA
  40  * language, which is the simplest.  Alternatively, consider shipping a
  41  * configuration file containing regexp definitions for etags.
  42  */
  43
  44 char pot_etags_version[] = "@(#) pot revision number is 17.25";
  45
  46 #define TRUE    1
  47 #define FALSE   0
  48
  49 #ifdef DEBUG
  50 #  undef DEBUG
  51 #  define DEBUG TRUE
  52 #else
  53 #  define DEBUG  FALSE
  54 #  define NDEBUG                /* disable assert */
  55 #endif
  56
  57 #ifdef HAVE_CONFIG_H
  58 # include <config.h>
  59   /* On some systems, Emacs defines static as nothing for the sake
  60      of unexec.  We don't want that here since we don't use unexec. */
  61 # undef static
  62 # ifndef PTR                    /* for XEmacs */
  63 #   define PTR void *
  64 # endif
  65 # ifndef __P                    /* for XEmacs */
  66 #   define __P(args) args
  67 # endif
  68 #else  /* no config.h */
  69 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  70 #   define __P(args) args       /* use prototypes */
  71 #   define PTR void *           /* for generic pointers */
  72 # else /* not standard C */
  73 #   define __P(args) ()         /* no prototypes */
  74 #   define const                /* remove const for old compilers' sake */
  75 #   define PTR long *           /* don't use void* */
  76 # endif
  77 #endif /* !HAVE_CONFIG_H */
  78
  79 #ifndef _GNU_SOURCE
  80 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  81 #endif
  82
  83 /* WIN32_NATIVE is for XEmacs.
  84    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  85 #ifdef WIN32_NATIVE
  86 # undef MSDOS
  87 # undef  WINDOWSNT
  88 # define WINDOWSNT
  89 #endif /* WIN32_NATIVE */
  90
  91 #ifdef MSDOS
  92 # undef MSDOS
  93 # define MSDOS TRUE
  94 # include <fcntl.h>
  95 # include <sys/param.h>
  96 # include <io.h>
  97 # ifndef HAVE_CONFIG_H
  98 #   define DOS_NT
  99 #   include <sys/config.h>
 100 # endif
 101 #else
 102 # define MSDOS FALSE
 103 #endif /* MSDOS */
 104
 105 #ifdef WINDOWSNT
 106 # include <stdlib.h>
 107 # include <fcntl.h>
 108 # include <string.h>
 109 # include <direct.h>
 110 # include <io.h>
 111 # define MAXPATHLEN _MAX_PATH
 112 # undef HAVE_NTGUI
 113 # undef  DOS_NT
 114 # define DOS_NT
 115 # ifndef HAVE_GETCWD
 116 #   define HAVE_GETCWD
 117 # endif /* undef HAVE_GETCWD */
 118 #else /* not WINDOWSNT */
 119 # ifdef STDC_HEADERS
 120 #  include <stdlib.h>
 121 #  include <string.h>
 122 # else /* no standard C headers */
 123     extern char *getenv ();
 124 #  ifdef VMS
 125 #   define EXIT_SUCCESS 1
 126 #   define EXIT_FAILURE 0
 127 #  else /* no VMS */
 128 #   define EXIT_SUCCESS 0
 129 #   define EXIT_FAILURE 1
 130 #  endif
 131 # endif
 132 #endif /* !WINDOWSNT */
 133
 134 #ifdef HAVE_UNISTD_H
 135 # include <unistd.h>
 136 #else
 137 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 138     extern char *getcwd (char *buf, size_t size);
 139 # endif
 140 #endif /* HAVE_UNISTD_H */
 141
 142 #include <stdio.h>
 143 #include <ctype.h>
 144 #include <errno.h>
 145 #ifndef errno
 146   extern int errno;
 147 #endif
 148 #include <sys/types.h>
 149 #include <sys/stat.h>
 150
 151 #include <assert.h>
 152 #ifdef NDEBUG
 153 # undef  assert                 /* some systems have a buggy assert.h */
 154 # define assert(x) ((void) 0)
 155 #endif
 156
 157 #if !defined (S_ISREG) && defined (S_IFREG)
 158 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 159 #endif
 160
 161 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 162 # define NO_LONG_OPTIONS TRUE
 163 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 164   extern char *optarg;
 165   extern int optind, opterr;
 166 #else
 167 # define NO_LONG_OPTIONS FALSE
 168 # include <getopt.h>
 169 #endif /* NO_LONG_OPTIONS */
 170
 171 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 172 # ifdef __CYGWIN__              /* compiling on Cygwin */
 173                              !!! NOTICE !!!
 174  the regex.h distributed with Cygwin is not compatible with etags, alas!
 175 If you want regular expression support, you should delete this notice and
 176               arrange to use the GNU regex.h and regex.c.
 177 # endif
 178 #endif
 179 #include <regex.h>
 180
 181 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 182  Leave it undefined to make the program "etags", which makes emacs-style
 183  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 184 #ifdef CTAGS
 185 # undef  CTAGS
 186 # define CTAGS TRUE
 187 #else
 188 # define CTAGS FALSE
 189 #endif
 190
 191 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 192 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 193 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 194 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 195
 196 #define CHARS 256               /* 2^sizeof(char) */
 197 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 198 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 199 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 200 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 201 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 202 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 203
 204 #define ISALNUM(c)      isalnum (CHAR(c))
 205 #define ISALPHA(c)      isalpha (CHAR(c))
 206 #define ISDIGIT(c)      isdigit (CHAR(c))
 207 #define ISLOWER(c)      islower (CHAR(c))
 208
 209 #define lowcase(c)      tolower (CHAR(c))
 210 #define upcase(c)       toupper (CHAR(c))
 211
 212
 213 /*
 214  *      xnew, xrnew -- allocate, reallocate storage
 215  *
 216  * SYNOPSIS:    Type *xnew (int n, Type);
 217  *              void xrnew (OldPointer, int n, Type);
 218  */
 219 #if DEBUG
 220 # include "chkmalloc.h"
 221 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 222                                                   (n) * sizeof (Type)))
 223 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 224                                         (char *) (op), (n) * sizeof (Type)))
 225 #else
 226 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 227 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 228                                         (char *) (op), (n) * sizeof (Type)))
 229 #endif
 230
 231 #define bool int
 232
 233 typedef void Lang_function __P((FILE *));
 234
 235 typedef struct
 236 {
 237   char *suffix;                 /* file name suffix for this compressor */
 238   char *command;                /* takes one arg and decompresses to stdout */
 239 } compressor;
 240
 241 typedef struct
 242 {
 243   char *name;                   /* language name */
 244   char *help;                   /* detailed help for the language */
 245   Lang_function *function;      /* parse function */
 246   char **suffixes;              /* name suffixes of this language's files */
 247   char **filenames;             /* names of this language's files */
 248   char **interpreters;          /* interpreters for this language */
 249   bool metasource;              /* source used to generate other sources */
 250 } language;
 251
 252 typedef struct fdesc
 253 {
 254   struct fdesc *next;           /* for the linked list */
 255   char *infname;                /* uncompressed input file name */
 256   char *infabsname;             /* absolute uncompressed input file name */
 257   char *infabsdir;              /* absolute dir of input file */
 258   char *taggedfname;            /* file name to write in tagfile */
 259   language *lang;               /* language of file */
 260   char *prop;                   /* file properties to write in tagfile */
 261   bool usecharno;               /* etags tags shall contain char number */
 262   bool written;                 /* entry written in the tags file */
 263 } fdesc;
 264
 265 typedef struct node_st
 266 {                               /* sorting structure */
 267   struct node_st *left, *right; /* left and right sons */
 268   fdesc *fdp;                   /* description of file to whom tag belongs */
 269   char *name;                   /* tag name */
 270   char *regex;                  /* search regexp */
 271   bool valid;                   /* write this tag on the tag file */
 272   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 273   bool been_warned;             /* warning already given for duplicated tag */
 274   int lno;                      /* line number tag is on */
 275   long cno;                     /* character number line starts on */
 276 } node;
 277
 278 /*
 279  * A `linebuffer' is a structure which holds a line of text.
 280  * `readline_internal' reads a line from a stream into a linebuffer
 281  * and works regardless of the length of the line.
 282  * SIZE is the size of BUFFER, LEN is the length of the string in
 283  * BUFFER after readline reads it.
 284  */
 285 typedef struct
 286 {
 287   long size;
 288   int len;
 289   char *buffer;
 290 } linebuffer;
 291
 292 /* Used to support mixing of --lang and file names. */
 293 typedef struct
 294 {
 295   enum {
 296     at_language,                /* a language specification */
 297     at_regexp,                  /* a regular expression */
 298     at_filename,                /* a file name */
 299     at_stdin,                   /* read from stdin here */
 300     at_end                      /* stop parsing the list */
 301   } arg_type;                   /* argument type */
 302   language *lang;               /* language associated with the argument */
 303   char *what;                   /* the argument itself */
 304 } argument;
 305
 306 /* Structure defining a regular expression. */
 307 typedef struct regexp
 308 {
 309   struct regexp *p_next;        /* pointer to next in list */
 310   language *lang;               /* if set, use only for this language */
 311   char *pattern;                /* the regexp pattern */
 312   char *name;                   /* tag name */
 313   struct re_pattern_buffer *pat; /* the compiled pattern */
 314   struct re_registers regs;     /* re registers */
 315   bool error_signaled;          /* already signaled for this regexp */
 316   bool force_explicit_name;     /* do not allow implict tag name */
 317   bool ignore_case;             /* ignore case when matching */
 318   bool multi_line;              /* do a multi-line match on the whole file */
 319 } regexp;
 320
 321
 322 /* Many compilers barf on this:
 323         Lang_function Ada_funcs;
 324    so let's write it this way */
 325 static void Ada_funcs __P((FILE *));
 326 static void Asm_labels __P((FILE *));
 327 static void C_entries __P((int c_ext, FILE *));
 328 static void default_C_entries __P((FILE *));
 329 static void plain_C_entries __P((FILE *));
 330 static void Cjava_entries __P((FILE *));
 331 static void Cobol_paragraphs __P((FILE *));
 332 static void Cplusplus_entries __P((FILE *));
 333 static void Cstar_entries __P((FILE *));
 334 static void Erlang_functions __P((FILE *));
 335 static void Forth_words __P((FILE *));
 336 static void Fortran_functions __P((FILE *));
 337 static void HTML_labels __P((FILE *));
 338 static void Lisp_functions __P((FILE *));
 339 static void Lua_functions __P((FILE *));
 340 static void Makefile_targets __P((FILE *));
 341 static void Pascal_functions __P((FILE *));
 342 static void Perl_functions __P((FILE *));
 343 static void PHP_functions __P((FILE *));
 344 static void PS_functions __P((FILE *));
 345 static void Prolog_functions __P((FILE *));
 346 static void Python_functions __P((FILE *));
 347 static void Scheme_functions __P((FILE *));
 348 static void TeX_commands __P((FILE *));
 349 static void Texinfo_nodes __P((FILE *));
 350 static void Yacc_entries __P((FILE *));
 351 static void just_read_file __P((FILE *));
 352
 353 static void print_language_names __P((void));
 354 static void print_version __P((void));
 355 static void print_help __P((argument *));
 356 int main __P((int, char **));
 357
 358 static compressor *get_compressor_from_suffix __P((char *, char **));
 359 static language *get_language_from_langname __P((const char *));
 360 static language *get_language_from_interpreter __P((char *));
 361 static language *get_language_from_filename __P((char *, bool));
 362 static void readline __P((linebuffer *, FILE *));
 363 static long readline_internal __P((linebuffer *, FILE *));
 364 static bool nocase_tail __P((char *));
 365 static void get_tag __P((char *, char **));
 366
 367 static void analyse_regex __P((char *));
 368 static void free_regexps __P((void));
 369 static void regex_tag_multiline __P((void));
 370 static void error __P((const char *, const char *));
 371 static void suggest_asking_for_help __P((void));
 372 void fatal __P((char *, char *));
 373 static void pfatal __P((char *));
 374 static void add_node __P((node *, node **));
 375
 376 static void init __P((void));
 377 static void process_file_name __P((char *, language *));
 378 static void process_file __P((FILE *, char *, language *));
 379 static void find_entries __P((FILE *));
 380 static void free_tree __P((node *));
 381 static void free_fdesc __P((fdesc *));
 382 static void pfnote __P((char *, bool, char *, int, int, long));
 383 static void make_tag __P((char *, int, bool, char *, int, int, long));
 384 static void invalidate_nodes __P((fdesc *, node **));
 385 static void put_entries __P((node *));
 386
 387 static char *concat __P((char *, char *, char *));
 388 static char *skip_spaces __P((char *));
 389 static char *skip_non_spaces __P((char *));
 390 static char *savenstr __P((char *, int));
 391 static char *savestr __P((char *));
 392 static char *etags_strchr __P((const char *, int));
 393 static char *etags_strrchr __P((const char *, int));
 394 static int etags_strcasecmp __P((const char *, const char *));
 395 static int etags_strncasecmp __P((const char *, const char *, int));
 396 static char *etags_getcwd __P((void));
 397 static char *relative_filename __P((char *, char *));
 398 static char *absolute_filename __P((char *, char *));
 399 static char *absolute_dirname __P((char *, char *));
 400 static bool filename_is_absolute __P((char *f));
 401 static void canonicalize_filename __P((char *));
 402 static void linebuffer_init __P((linebuffer *));
 403 static void linebuffer_setlen __P((linebuffer *, int));
 404 static PTR xmalloc __P((unsigned int));
 405 static PTR xrealloc __P((char *, unsigned int));
 406
 407 \f
 408 static char searchar = '/';     /* use /.../ searches */
 409
 410 static char *tagfile;           /* output file */
 411 static char *progname;          /* name this program was invoked with */
 412 static char *cwd;               /* current working directory */
 413 static char *tagfiledir;        /* directory of tagfile */
 414 static FILE *tagf;              /* ioptr for tags file */
 415
 416 static fdesc *fdhead;           /* head of file description list */
 417 static fdesc *curfdp;           /* current file description */
 418 static int lineno;              /* line number of current line */
 419 static long charno;             /* current character number */
 420 static long linecharno;         /* charno of start of current line */
 421 static char *dbp;               /* pointer to start of current tag */
 422
 423 static const int invalidcharno = -1;
 424
 425 static node *nodehead;          /* the head of the binary tree of tags */
 426 static node *last_node;         /* the last node created */
 427
 428 static linebuffer lb;           /* the current line */
 429 static linebuffer filebuf;      /* a buffer containing the whole file */
 430 static linebuffer token_name;   /* a buffer containing a tag name */
 431
 432 /* boolean "functions" (see init)       */
 433 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 434 static char
 435   /* white chars */
 436   *white = " \f\t\n\r\v",
 437   /* not in a name */
 438   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 439   /* token ending chars */
 440   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 441   /* token starting chars */
 442   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 443   /* valid in-token chars */
 444   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 445
 446 static bool append_to_tagfile;  /* -a: append to tags */
 447 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 448 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 449 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 450                                 /* 0 struct/enum/union decls, and C++ */
 451                                 /* member functions. */
 452 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 453                                 /* constants and variables. */
 454                                 /* -D: opposite of -d.  Default under ctags. */
 455 static bool globals;            /* create tags for global variables */
 456 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 457 static bool members;            /* create tags for C member variables */
 458 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 459 static bool update;             /* -u: update tags */
 460 static bool vgrind_style;       /* -v: create vgrind style index output */
 461 static bool no_warnings;        /* -w: suppress warnings */
 462 static bool cxref_style;        /* -x: create cxref style output */
 463 static bool cplusplus;          /* .[hc] means C++, not C */
 464 static bool ignoreindent;       /* -I: ignore indentation in C */
 465 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 466
 467 /* STDIN is defined in LynxOS system headers */
 468 #ifdef STDIN
 469 # undef STDIN
 470 #endif
 471
 472 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 473 static bool parsing_stdin;      /* --parse-stdin used */
 474
 475 static regexp *p_head;          /* list of all regexps */
 476 static bool need_filebuf;       /* some regexes are multi-line */
 477
 478 static struct option longopts[] =
 479 {
 480   { "append",             no_argument,       NULL,               'a'   },
 481   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 482   { "c++",                no_argument,       NULL,               'C'   },
 483   { "declarations",       no_argument,       &declarations,      TRUE  },
 484   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 485   { "help",               no_argument,       NULL,               'h'   },
 486   { "help",               no_argument,       NULL,               'H'   },
 487   { "ignore-indentation", no_argument,       NULL,               'I'   },
 488   { "language",           required_argument, NULL,               'l'   },
 489   { "members",            no_argument,       &members,           TRUE  },
 490   { "no-members",         no_argument,       &members,           FALSE },
 491   { "output",             required_argument, NULL,               'o'   },
 492   { "regex",              required_argument, NULL,               'r'   },
 493   { "no-regex",           no_argument,       NULL,               'R'   },
 494   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 495   { "parse-stdin",        required_argument, NULL,               STDIN },
 496   { "version",            no_argument,       NULL,               'V'   },
 497
 498 #if CTAGS /* Ctags options */
 499   { "backward-search",    no_argument,       NULL,               'B'   },
 500   { "cxref",              no_argument,       NULL,               'x'   },
 501   { "defines",            no_argument,       NULL,               'd'   },
 502   { "globals",            no_argument,       &globals,           TRUE  },
 503   { "typedefs",           no_argument,       NULL,               't'   },
 504   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 505   { "update",             no_argument,       NULL,               'u'   },
 506   { "vgrind",             no_argument,       NULL,               'v'   },
 507   { "no-warn",            no_argument,       NULL,               'w'   },
 508
 509 #else /* Etags options */
 510   { "no-defines",         no_argument,       NULL,               'D'   },
 511   { "no-globals",         no_argument,       &globals,           FALSE },
 512   { "include",            required_argument, NULL,               'i'   },
 513 #endif
 514   { NULL }
 515 };
 516
 517 static compressor compressors[] =
 518 {
 519   { "z", "gzip -d -c"},
 520   { "Z", "gzip -d -c"},
 521   { "gz", "gzip -d -c"},
 522   { "GZ", "gzip -d -c"},
 523   { "bz2", "bzip2 -d -c" },
 524   { NULL }
 525 };
 526
 527 /*
 528  * Language stuff.
 529  */
 530
 531 /* Ada code */
 532 static char *Ada_suffixes [] =
 533   { "ads", "adb", "ada", NULL };
 534 static char Ada_help [] =
 535 "In Ada code, functions, procedures, packages, tasks and types are\n\
 536 tags.  Use the `--packages-only' option to create tags for\n\
 537 packages only.\n\
 538 Ada tag names have suffixes indicating the type of entity:\n\
 539         Entity type:    Qualifier:\n\
 540         ------------    ----------\n\
 541         function        /f\n\
 542         procedure       /p\n\
 543         package spec    /s\n\
 544         package body    /b\n\
 545         type            /t\n\
 546         task            /k\n\
 547 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 548 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 549 will just search for any tag `bidule'.";
 550
 551 /* Assembly code */
 552 static char *Asm_suffixes [] =
 553   { "a",        /* Unix assembler */
 554     "asm", /* Microcontroller assembly */
 555     "def", /* BSO/Tasking definition includes  */
 556     "inc", /* Microcontroller include files */
 557     "ins", /* Microcontroller include files */
 558     "s", "sa", /* Unix assembler */
 559     "S",   /* cpp-processed Unix assembler */
 560     "src", /* BSO/Tasking C compiler output */
 561     NULL
 562   };
 563 static char Asm_help [] =
 564 "In assembler code, labels appearing at the beginning of a line,\n\
 565 followed by a colon, are tags.";
 566
 567
 568 /* Note that .c and .h can be considered C++, if the --c++ flag was
 569    given, or if the `class' or `template' keyowrds are met inside the file.
 570    That is why default_C_entries is called for these. */
 571 static char *default_C_suffixes [] =
 572   { "c", "h", NULL };
 573 static char default_C_help [] =
 574 "In C code, any C function or typedef is a tag, and so are\n\
 575 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 576 definitions and `enum' constants are tags unless you specify\n\
 577 `--no-defines'.  Global variables are tags unless you specify\n\
 578 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 579 can make the tags table file much smaller.\n\
 580 You can tag function declarations and external variables by\n\
 581 using `--declarations', and struct members by using `--members'.";
 582
 583 static char *Cplusplus_suffixes [] =
 584   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 585     "M",                        /* Objective C++ */
 586     "pdb",                      /* Postscript with C syntax */
 587     NULL };
 588 static char Cplusplus_help [] =
 589 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 590 --help --lang=c --lang=c++ for full help.)\n\
 591 In addition to C tags, member functions are also recognized, and\n\
 592 optionally member variables if you use the `--members' option.\n\
 593 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 594 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 595 `operator+'.";
 596
 597 static char *Cjava_suffixes [] =
 598   { "java", NULL };
 599 static char Cjava_help [] =
 600 "In Java code, all the tags constructs of C and C++ code are\n\
 601 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 602
 603
 604 static char *Cobol_suffixes [] =
 605   { "COB", "cob", NULL };
 606 static char Cobol_help [] =
 607 "In Cobol code, tags are paragraph names; that is, any word\n\
 608 starting in column 8 and followed by a period.";
 609
 610 static char *Cstar_suffixes [] =
 611   { "cs", "hs", NULL };
 612
 613 static char *Erlang_suffixes [] =
 614   { "erl", "hrl", NULL };
 615 static char Erlang_help [] =
 616 "In Erlang code, the tags are the functions, records and macros\n\
 617 defined in the file.";
 618
 619 char *Forth_suffixes [] =
 620   { "fth", "tok", NULL };
 621 static char Forth_help [] =
 622 "In Forth code, tags are words defined by `:',\n\
 623 constant, code, create, defer, value, variable, buffer:, field.";
 624
 625 static char *Fortran_suffixes [] =
 626   { "F", "f", "f90", "for", NULL };
 627 static char Fortran_help [] =
 628 "In Fortran code, functions, subroutines and block data are tags.";
 629
 630 static char *HTML_suffixes [] =
 631   { "htm", "html", "shtml", NULL };
 632 static char HTML_help [] =
 633 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 634 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 635 occurrences of `id='.";
 636
 637 static char *Lisp_suffixes [] =
 638   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 639 static char Lisp_help [] =
 640 "In Lisp code, any function defined with `defun', any variable\n\
 641 defined with `defvar' or `defconst', and in general the first\n\
 642 argument of any expression that starts with `(def' in column zero\n\
 643 is a tag.";
 644
 645 static char *Lua_suffixes [] =
 646   { "lua", "LUA", NULL };
 647 static char Lua_help [] =
 648 "In Lua scripts, all functions are tags.";
 649
 650 static char *Makefile_filenames [] =
 651   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 652 static char Makefile_help [] =
 653 "In makefiles, targets are tags; additionally, variables are tags\n\
 654 unless you specify `--no-globals'.";
 655
 656 static char *Objc_suffixes [] =
 657   { "lm",                       /* Objective lex file */
 658     "m",                        /* Objective C file */
 659      NULL };
 660 static char Objc_help [] =
 661 "In Objective C code, tags include Objective C definitions for classes,\n\
 662 class categories, methods and protocols.  Tags for variables and\n\
 663 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 664 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 665
 666 static char *Pascal_suffixes [] =
 667   { "p", "pas", NULL };
 668 static char Pascal_help [] =
 669 "In Pascal code, the tags are the functions and procedures defined\n\
 670 in the file.";
 671 /* " // this is for working around an Emacs highlighting bug... */
 672
 673 static char *Perl_suffixes [] =
 674   { "pl", "pm", NULL };
 675 static char *Perl_interpreters [] =
 676   { "perl", "@PERL@", NULL };
 677 static char Perl_help [] =
 678 "In Perl code, the tags are the packages, subroutines and variables\n\
 679 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 680 `--globals' if you want to tag global variables.  Tags for\n\
 681 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 682 defined in the default package is `main::SUB'.";
 683
 684 static char *PHP_suffixes [] =
 685   { "php", "php3", "php4", NULL };
 686 static char PHP_help [] =
 687 "In PHP code, tags are functions, classes and defines.  When using\n\
 688 the `--members' option, vars are tags too.";
 689
 690 static char *plain_C_suffixes [] =
 691   { "pc",                       /* Pro*C file */
 692      NULL };
 693
 694 static char *PS_suffixes [] =
 695   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 696 static char PS_help [] =
 697 "In PostScript code, the tags are the functions.";
 698
 699 static char *Prolog_suffixes [] =
 700   { "prolog", NULL };
 701 static char Prolog_help [] =
 702 "In Prolog code, tags are predicates and rules at the beginning of\n\
 703 line.";
 704
 705 static char *Python_suffixes [] =
 706   { "py", NULL };
 707 static char Python_help [] =
 708 "In Python code, `def' or `class' at the beginning of a line\n\
 709 generate a tag.";
 710
 711 /* Can't do the `SCM' or `scm' prefix with a version number. */
 712 static char *Scheme_suffixes [] =
 713   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 714 static char Scheme_help [] =
 715 "In Scheme code, tags include anything defined with `def' or with a\n\
 716 construct whose name starts with `def'.  They also include\n\
 717 variables set with `set!' at top level in the file.";
 718
 719 static char *TeX_suffixes [] =
 720   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 721 static char TeX_help [] =
 722 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 723 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 724 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 725 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 726 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 727 \n\
 728 Other commands can be specified by setting the environment variable\n\
 729 `TEXTAGS' to a colon-separated list like, for example,\n\
 730      TEXTAGS=\"mycommand:myothercommand\".";
 731
 732
 733 static char *Texinfo_suffixes [] =
 734   { "texi", "texinfo", "txi", NULL };
 735 static char Texinfo_help [] =
 736 "for texinfo files, lines starting with @node are tagged.";
 737
 738 static char *Yacc_suffixes [] =
 739   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 740 static char Yacc_help [] =
 741 "In Bison or Yacc input files, each rule defines as a tag the\n\
 742 nonterminal it constructs.  The portions of the file that contain\n\
 743 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 744 for full help).";
 745
 746 static char auto_help [] =
 747 "`auto' is not a real language, it indicates to use\n\
 748 a default language for files base on file name suffix and file contents.";
 749
 750 static char none_help [] =
 751 "`none' is not a real language, it indicates to only do\n\
 752 regexp processing on files.";
 753
 754 static char no_lang_help [] =
 755 "No detailed help available for this language.";
 756
 757
 758 /*
 759  * Table of languages.
 760  *
 761  * It is ok for a given function to be listed under more than one
 762  * name.  I just didn't.
 763  */
 764
 765 static language lang_names [] =
 766 {
 767   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 768   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 769   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 770   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 771   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 772   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 773   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 774   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 775   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 776   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 777   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 778   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 779   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 780   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 781   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 782   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 783   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 784   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 785   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 786   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 787   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 788   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 789   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 790   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 791   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 792   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 793   { "auto",      auto_help },                      /* default guessing scheme */
 794   { "none",      none_help,      just_read_file }, /* regexp matching only */
 795   { NULL }                /* end of list */
 796 };
 797
 798 \f
 799 static void
 800 print_language_names ()
 801 {
 802   language *lang;
 803   char **name, **ext;
 804
 805   puts ("\nThese are the currently supported languages, along with the\n\
 806 default file names and dot suffixes:");
 807   for (lang = lang_names; lang->name != NULL; lang++)
 808     {
 809       printf ("  %-*s", 10, lang->name);
 810       if (lang->filenames != NULL)
 811         for (name = lang->filenames; *name != NULL; name++)
 812           printf (" %s", *name);
 813       if (lang->suffixes != NULL)
 814         for (ext = lang->suffixes; *ext != NULL; ext++)
 815           printf (" .%s", *ext);
 816       puts ("");
 817     }
 818   puts ("where `auto' means use default language for files based on file\n\
 819 name suffix, and `none' means only do regexp processing on files.\n\
 820 If no language is specified and no matching suffix is found,\n\
 821 the first line of the file is read for a sharp-bang (#!) sequence\n\
 822 followed by the name of an interpreter.  If no such sequence is found,\n\
 823 Fortran is tried first; if no tags are found, C is tried next.\n\
 824 When parsing any C file, a \"class\" or \"template\" keyword\n\
 825 switches to C++.");
 826   puts ("Compressed files are supported using gzip and bzip2.\n\
 827 \n\
 828 For detailed help on a given language use, for example,\n\
 829 etags --help --lang=ada.");
 830 }
 831
 832 #ifndef EMACS_NAME
 833 # define EMACS_NAME "standalone"
 834 #endif
 835 #ifndef VERSION
 836 # define VERSION "version"
 837 #endif
 838 static void
 839 print_version ()
 840 {
 841   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 842   puts ("Copyright (C) 2006 Free Software Foundation, Inc. and Ken Arnold");
 843   puts ("This program is distributed under the same terms as Emacs");
 844
 845   exit (EXIT_SUCCESS);
 846 }
 847
 848 static void
 849 print_help (argbuffer)
 850      argument *argbuffer;
 851 {
 852   bool help_for_lang = FALSE;
 853
 854   for (; argbuffer->arg_type != at_end; argbuffer++)
 855     if (argbuffer->arg_type == at_language)
 856       {
 857         if (help_for_lang)
 858           puts ("");
 859         puts (argbuffer->lang->help);
 860         help_for_lang = TRUE;
 861       }
 862
 863   if (help_for_lang)
 864     exit (EXIT_SUCCESS);
 865
 866   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 867 \n\
 868 These are the options accepted by %s.\n", progname, progname);
 869   if (NO_LONG_OPTIONS)
 870     puts ("WARNING: long option names do not work with this executable,\n\
 871 as it is not linked with GNU getopt.");
 872   else
 873     puts ("You may use unambiguous abbreviations for the long option names.");
 874   puts ("  A - as file name means read names from stdin (one per line).\n\
 875 Absolute names are stored in the output file as they are.\n\
 876 Relative ones are stored relative to the output file's directory.\n");
 877
 878   puts ("-a, --append\n\
 879         Append tag entries to existing tags file.");
 880
 881   puts ("--packages-only\n\
 882         For Ada files, only generate tags for packages.");
 883
 884   if (CTAGS)
 885     puts ("-B, --backward-search\n\
 886         Write the search commands for the tag entries using '?', the\n\
 887         backward-search command instead of '/', the forward-search command.");
 888
 889   /* This option is mostly obsolete, because etags can now automatically
 890      detect C++.  Retained for backward compatibility and for debugging and
 891      experimentation.  In principle, we could want to tag as C++ even
 892      before any "class" or "template" keyword.
 893   puts ("-C, --c++\n\
 894         Treat files whose name suffix defaults to C language as C++ files.");
 895   */
 896
 897   puts ("--declarations\n\
 898         In C and derived languages, create tags for function declarations,");
 899   if (CTAGS)
 900     puts ("\tand create tags for extern variables if --globals is used.");
 901   else
 902     puts
 903       ("\tand create tags for extern variables unless --no-globals is used.");
 904
 905   if (CTAGS)
 906     puts ("-d, --defines\n\
 907         Create tag entries for C #define constants and enum constants, too.");
 908   else
 909     puts ("-D, --no-defines\n\
 910         Don't create tag entries for C #define constants and enum constants.\n\
 911         This makes the tags file smaller.");
 912
 913   if (!CTAGS)
 914     puts ("-i FILE, --include=FILE\n\
 915         Include a note in tag file indicating that, when searching for\n\
 916         a tag, one should also consult the tags file FILE after\n\
 917         checking the current file.");
 918
 919   puts ("-l LANG, --language=LANG\n\
 920         Force the following files to be considered as written in the\n\
 921         named language up to the next --language=LANG option.");
 922
 923   if (CTAGS)
 924     puts ("--globals\n\
 925         Create tag entries for global variables in some languages.");
 926   else
 927     puts ("--no-globals\n\
 928         Do not create tag entries for global variables in some\n\
 929         languages.  This makes the tags file smaller.");
 930   puts ("--members\n\
 931         Create tag entries for members of structures in some languages.");
 932
 933   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 934         Make a tag for each line matching a regular expression pattern\n\
 935         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 936         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 937         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 938         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 939   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 940         For example Tcl named tags can be created with:\n\
 941           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 942         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 943         `m' means to allow multi-line matches, `s' implies `m' and\n\
 944         causes dot to match any character, including newline.");
 945   puts ("-R, --no-regex\n\
 946         Don't create tags from regexps for the following files.");
 947   puts ("-I, --ignore-indentation\n\
 948         In C and C++ do not assume that a closing brace in the first\n\
 949         column is the final brace of a function or structure definition.");
 950   puts ("-o FILE, --output=FILE\n\
 951         Write the tags to FILE.");
 952   puts ("--parse-stdin=NAME\n\
 953         Read from standard input and record tags as belonging to file NAME.");
 954
 955   if (CTAGS)
 956     {
 957       puts ("-t, --typedefs\n\
 958         Generate tag entries for C and Ada typedefs.");
 959       puts ("-T, --typedefs-and-c++\n\
 960         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 961         and C++ member functions.");
 962     }
 963
 964   if (CTAGS)
 965     puts ("-u, --update\n\
 966         Update the tag entries for the given files, leaving tag\n\
 967         entries for other files in place.  Currently, this is\n\
 968         implemented by deleting the existing entries for the given\n\
 969         files and then rewriting the new entries at the end of the\n\
 970         tags file.  It is often faster to simply rebuild the entire\n\
 971         tag file than to use this.");
 972
 973   if (CTAGS)
 974     {
 975       puts ("-v, --vgrind\n\
 976         Print on the standard output an index of items intended for\n\
 977         human consumption, similar to the output of vgrind.  The index\n\
 978         is sorted, and gives the page number of each item.");
 979       puts ("-w, --no-warn\n\
 980         Suppress warning messages about entries defined in multiple\n\
 981         files.");
 982       puts ("-x, --cxref\n\
 983         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 984         The output uses line numbers instead of page numbers, but\n\
 985         beyond that the differences are cosmetic; try both to see\n\
 986         which you like.");
 987     }
 988
 989   puts ("-V, --version\n\
 990         Print the version of the program.\n\
 991 -h, --help\n\
 992         Print this help message.\n\
 993         Followed by one or more `--language' options prints detailed\n\
 994         help about tag generation for the specified languages.");
 995
 996   print_language_names ();
 997
 998   puts ("");
 999   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1000
1001   exit (EXIT_SUCCESS);
1002 }
1003
1004 \f
1005 #ifdef VMS                      /* VMS specific functions */
1006
1007 #define EOS     '\0'
1008
1009 /* This is a BUG!  ANY arbitrary limit is a BUG!
1010    Won't someone please fix this?  */
1011 #define MAX_FILE_SPEC_LEN       255
1012 typedef struct  {
1013   short   curlen;
1014   char    body[MAX_FILE_SPEC_LEN + 1];
1015 } vspec;
1016
1017 /*
1018  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1019  returning in each successive call the next file name matching the input
1020  spec. The function expects that each in_spec passed
1021  to it will be processed to completion; in particular, up to and
1022  including the call following that in which the last matching name
1023  is returned, the function ignores the value of in_spec, and will
1024  only start processing a new spec with the following call.
1025  If an error occurs, on return out_spec contains the value
1026  of in_spec when the error occurred.
1027
1028  With each successive file name returned in out_spec, the
1029  function's return value is one. When there are no more matching
1030  names the function returns zero. If on the first call no file
1031  matches in_spec, or there is any other error, -1 is returned.
1032 */
1033
1034 #include        <rmsdef.h>
1035 #include        <descrip.h>
1036 #define         OUTSIZE MAX_FILE_SPEC_LEN
1037 static short
1038 fn_exp (out, in)
1039      vspec *out;
1040      char *in;
1041 {
1042   static long context = 0;
1043   static struct dsc$descriptor_s o;
1044   static struct dsc$descriptor_s i;
1045   static bool pass1 = TRUE;
1046   long status;
1047   short retval;
1048
1049   if (pass1)
1050     {
1051       pass1 = FALSE;
1052       o.dsc$a_pointer = (char *) out;
1053       o.dsc$w_length = (short)OUTSIZE;
1054       i.dsc$a_pointer = in;
1055       i.dsc$w_length = (short)strlen(in);
1056       i.dsc$b_dtype = DSC$K_DTYPE_T;
1057       i.dsc$b_class = DSC$K_CLASS_S;
1058       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1059       o.dsc$b_class = DSC$K_CLASS_VS;
1060     }
1061   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1062     {
1063       out->body[out->curlen] = EOS;
1064       return 1;
1065     }
1066   else if (status == RMS$_NMF)
1067     retval = 0;
1068   else
1069     {
1070       strcpy(out->body, in);
1071       retval = -1;
1072     }
1073   lib$find_file_end(&context);
1074   pass1 = TRUE;
1075   return retval;
1076 }
1077
1078 /*
1079   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1080   name of each file specified by the provided arg expanding wildcards.
1081 */
1082 static char *
1083 gfnames (arg, p_error)
1084      char *arg;
1085      bool *p_error;
1086 {
1087   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1088
1089   switch (fn_exp (&filename, arg))
1090     {
1091     case 1:
1092       *p_error = FALSE;
1093       return filename.body;
1094     case 0:
1095       *p_error = FALSE;
1096       return NULL;
1097     default:
1098       *p_error = TRUE;
1099       return filename.body;
1100     }
1101 }
1102
1103 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1104 system (cmd)
1105      char *cmd;
1106 {
1107   error ("%s", "system() function not implemented under VMS");
1108 }
1109 #endif
1110
1111 #define VERSION_DELIM   ';'
1112 char *massage_name (s)
1113      char *s;
1114 {
1115   char *start = s;
1116
1117   for ( ; *s; s++)
1118     if (*s == VERSION_DELIM)
1119       {
1120         *s = EOS;
1121         break;
1122       }
1123     else
1124       *s = lowcase (*s);
1125   return start;
1126 }
1127 #endif /* VMS */
1128
1129 \f
1130 int
1131 main (argc, argv)
1132      int argc;
1133      char *argv[];
1134 {
1135   int i;
1136   unsigned int nincluded_files;
1137   char **included_files;
1138   argument *argbuffer;
1139   int current_arg, file_count;
1140   linebuffer filename_lb;
1141   bool help_asked = FALSE;
1142 #ifdef VMS
1143   bool got_err;
1144 #endif
1145  char *optstring;
1146  int opt;
1147
1148
1149 #ifdef DOS_NT
1150   _fmode = O_BINARY;   /* all of files are treated as binary files */
1151 #endif /* DOS_NT */
1152
1153   progname = argv[0];
1154   nincluded_files = 0;
1155   included_files = xnew (argc, char *);
1156   current_arg = 0;
1157   file_count = 0;
1158
1159   /* Allocate enough no matter what happens.  Overkill, but each one
1160      is small. */
1161   argbuffer = xnew (argc, argument);
1162
1163   /*
1164    * If etags, always find typedefs and structure tags.  Why not?
1165    * Also default to find macro constants, enum constants and
1166    * global variables.
1167    */
1168   if (!CTAGS)
1169     {
1170       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1171       globals = TRUE;
1172     }
1173
1174   /* When the optstring begins with a '-' getopt_long does not rearrange the
1175      non-options arguments to be at the end, but leaves them alone. */
1176   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1177                       "ac:Cf:Il:o:r:RSVhH",
1178                       (CTAGS) ? "BxdtTuvw" : "Di:");
1179
1180   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1181     switch (opt)
1182       {
1183       case 0:
1184         /* If getopt returns 0, then it has already processed a
1185            long-named option.  We should do nothing.  */
1186         break;
1187
1188       case 1:
1189         /* This means that a file name has been seen.  Record it. */
1190         argbuffer[current_arg].arg_type = at_filename;
1191         argbuffer[current_arg].what     = optarg;
1192         ++current_arg;
1193         ++file_count;
1194         break;
1195
1196       case STDIN:
1197         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1198         argbuffer[current_arg].arg_type = at_stdin;
1199         argbuffer[current_arg].what     = optarg;
1200         ++current_arg;
1201         ++file_count;
1202         if (parsing_stdin)
1203           fatal ("cannot parse standard input more than once", (char *)NULL);
1204         parsing_stdin = TRUE;
1205         break;
1206
1207         /* Common options. */
1208       case 'a': append_to_tagfile = TRUE;       break;
1209       case 'C': cplusplus = TRUE;               break;
1210       case 'f':         /* for compatibility with old makefiles */
1211       case 'o':
1212         if (tagfile)
1213           {
1214             error ("-o option may only be given once.", (char *)NULL);
1215             suggest_asking_for_help ();
1216             /* NOTREACHED */
1217           }
1218         tagfile = optarg;
1219         break;
1220       case 'I':
1221       case 'S':         /* for backward compatibility */
1222         ignoreindent = TRUE;
1223         break;
1224       case 'l':
1225         {
1226           language *lang = get_language_from_langname (optarg);
1227           if (lang != NULL)
1228             {
1229               argbuffer[current_arg].lang = lang;
1230               argbuffer[current_arg].arg_type = at_language;
1231               ++current_arg;
1232             }
1233         }
1234         break;
1235       case 'c':
1236         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1237         optarg = concat (optarg, "i", ""); /* memory leak here */
1238         /* FALLTHRU */
1239       case 'r':
1240         argbuffer[current_arg].arg_type = at_regexp;
1241         argbuffer[current_arg].what = optarg;
1242         ++current_arg;
1243         break;
1244       case 'R':
1245         argbuffer[current_arg].arg_type = at_regexp;
1246         argbuffer[current_arg].what = NULL;
1247         ++current_arg;
1248         break;
1249       case 'V':
1250         print_version ();
1251         break;
1252       case 'h':
1253       case 'H':
1254         help_asked = TRUE;
1255         break;
1256
1257         /* Etags options */
1258       case 'D': constantypedefs = FALSE;                        break;
1259       case 'i': included_files[nincluded_files++] = optarg;     break;
1260
1261         /* Ctags options. */
1262       case 'B': searchar = '?';                                 break;
1263       case 'd': constantypedefs = TRUE;                         break;
1264       case 't': typedefs = TRUE;                                break;
1265       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1266       case 'u': update = TRUE;                                  break;
1267       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1268       case 'x': cxref_style = TRUE;                             break;
1269       case 'w': no_warnings = TRUE;                             break;
1270       default:
1271         suggest_asking_for_help ();
1272         /* NOTREACHED */
1273       }
1274
1275   /* No more options.  Store the rest of arguments. */
1276   for (; optind < argc; optind++)
1277     {
1278       argbuffer[current_arg].arg_type = at_filename;
1279       argbuffer[current_arg].what = argv[optind];
1280       ++current_arg;
1281       ++file_count;
1282     }
1283
1284   argbuffer[current_arg].arg_type = at_end;
1285
1286   if (help_asked)
1287     print_help (argbuffer);
1288     /* NOTREACHED */
1289
1290   if (nincluded_files == 0 && file_count == 0)
1291     {
1292       error ("no input files specified.", (char *)NULL);
1293       suggest_asking_for_help ();
1294       /* NOTREACHED */
1295     }
1296
1297   if (tagfile == NULL)
1298     tagfile = CTAGS ? "tags" : "TAGS";
1299   cwd = etags_getcwd ();        /* the current working directory */
1300   if (cwd[strlen (cwd) - 1] != '/')
1301     {
1302       char *oldcwd = cwd;
1303       cwd = concat (oldcwd, "/", "");
1304       free (oldcwd);
1305     }
1306   /* Relative file names are made relative to the current directory. */
1307   if (streq (tagfile, "-")
1308       || strneq (tagfile, "/dev/", 5))
1309     tagfiledir = cwd;
1310   else
1311     tagfiledir = absolute_dirname (tagfile, cwd);
1312
1313   init ();                      /* set up boolean "functions" */
1314
1315   linebuffer_init (&lb);
1316   linebuffer_init (&filename_lb);
1317   linebuffer_init (&filebuf);
1318   linebuffer_init (&token_name);
1319
1320   if (!CTAGS)
1321     {
1322       if (streq (tagfile, "-"))
1323         {
1324           tagf = stdout;
1325 #ifdef DOS_NT
1326           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1327              doesn't take effect until after `stdout' is already open). */
1328           if (!isatty (fileno (stdout)))
1329             setmode (fileno (stdout), O_BINARY);
1330 #endif /* DOS_NT */
1331         }
1332       else
1333         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1334       if (tagf == NULL)
1335         pfatal (tagfile);
1336     }
1337
1338   /*
1339    * Loop through files finding functions.
1340    */
1341   for (i = 0; i < current_arg; i++)
1342     {
1343       static language *lang;    /* non-NULL if language is forced */
1344       char *this_file;
1345
1346       switch (argbuffer[i].arg_type)
1347         {
1348         case at_language:
1349           lang = argbuffer[i].lang;
1350           break;
1351         case at_regexp:
1352           analyse_regex (argbuffer[i].what);
1353           break;
1354         case at_filename:
1355 #ifdef VMS
1356           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1357             {
1358               if (got_err)
1359                 {
1360                   error ("can't find file %s\n", this_file);
1361                   argc--, argv++;
1362                 }
1363               else
1364                 {
1365                   this_file = massage_name (this_file);
1366                 }
1367 #else
1368               this_file = argbuffer[i].what;
1369 #endif
1370               /* Input file named "-" means read file names from stdin
1371                  (one per line) and use them. */
1372               if (streq (this_file, "-"))
1373                 {
1374                   if (parsing_stdin)
1375                     fatal ("cannot parse standard input AND read file names from it",
1376                            (char *)NULL);
1377                   while (readline_internal (&filename_lb, stdin) > 0)
1378                     process_file_name (filename_lb.buffer, lang);
1379                 }
1380               else
1381                 process_file_name (this_file, lang);
1382 #ifdef VMS
1383             }
1384 #endif
1385           break;
1386         case at_stdin:
1387           this_file = argbuffer[i].what;
1388           process_file (stdin, this_file, lang);
1389           break;
1390         }
1391     }
1392
1393   free_regexps ();
1394   free (lb.buffer);
1395   free (filebuf.buffer);
1396   free (token_name.buffer);
1397
1398   if (!CTAGS || cxref_style)
1399     {
1400       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1401       put_entries (nodehead);
1402       free_tree (nodehead);
1403       nodehead = NULL;
1404       if (!CTAGS)
1405         {
1406           fdesc *fdp;
1407
1408           /* Output file entries that have no tags. */
1409           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1410             if (!fdp->written)
1411               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1412
1413           while (nincluded_files-- > 0)
1414             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1415
1416           if (fclose (tagf) == EOF)
1417             pfatal (tagfile);
1418         }
1419
1420       exit (EXIT_SUCCESS);
1421     }
1422
1423   if (update)
1424     {
1425       char cmd[BUFSIZ];
1426       for (i = 0; i < current_arg; ++i)
1427         {
1428           switch (argbuffer[i].arg_type)
1429             {
1430             case at_filename:
1431             case at_stdin:
1432               break;
1433             default:
1434               continue;         /* the for loop */
1435             }
1436           sprintf (cmd,
1437                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1438                    tagfile, argbuffer[i].what, tagfile);
1439           if (system (cmd) != EXIT_SUCCESS)
1440             fatal ("failed to execute shell command", (char *)NULL);
1441         }
1442       append_to_tagfile = TRUE;
1443     }
1444
1445   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1446   if (tagf == NULL)
1447     pfatal (tagfile);
1448   put_entries (nodehead);       /* write all the tags (CTAGS) */
1449   free_tree (nodehead);
1450   nodehead = NULL;
1451   if (fclose (tagf) == EOF)
1452     pfatal (tagfile);
1453
1454   if (CTAGS)
1455     if (append_to_tagfile || update)
1456       {
1457         char cmd[2*BUFSIZ+10];
1458         sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1459         exit (system (cmd));
1460       }
1461   return EXIT_SUCCESS;
1462 }
1463
1464
1465 /*
1466  * Return a compressor given the file name.  If EXTPTR is non-zero,
1467  * return a pointer into FILE where the compressor-specific
1468  * extension begins.  If no compressor is found, NULL is returned
1469  * and EXTPTR is not significant.
1470  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1471  */
1472 static compressor *
1473 get_compressor_from_suffix (file, extptr)
1474      char *file;
1475      char **extptr;
1476 {
1477   compressor *compr;
1478   char *slash, *suffix;
1479
1480   /* This relies on FN to be after canonicalize_filename,
1481      so we don't need to consider backslashes on DOS_NT.  */
1482   slash = etags_strrchr (file, '/');
1483   suffix = etags_strrchr (file, '.');
1484   if (suffix == NULL || suffix < slash)
1485     return NULL;
1486   if (extptr != NULL)
1487     *extptr = suffix;
1488   suffix += 1;
1489   /* Let those poor souls who live with DOS 8+3 file name limits get
1490      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1491      Only the first do loop is run if not MSDOS */
1492   do
1493     {
1494       for (compr = compressors; compr->suffix != NULL; compr++)
1495         if (streq (compr->suffix, suffix))
1496           return compr;
1497       if (!MSDOS)
1498         break;                  /* do it only once: not really a loop */
1499       if (extptr != NULL)
1500         *extptr = ++suffix;
1501     } while (*suffix != '\0');
1502   return NULL;
1503 }
1504
1505
1506
1507 /*
1508  * Return a language given the name.
1509  */
1510 static language *
1511 get_language_from_langname (name)
1512      const char *name;
1513 {
1514   language *lang;
1515
1516   if (name == NULL)
1517     error ("empty language name", (char *)NULL);
1518   else
1519     {
1520       for (lang = lang_names; lang->name != NULL; lang++)
1521         if (streq (name, lang->name))
1522           return lang;
1523       error ("unknown language \"%s\"", name);
1524     }
1525
1526   return NULL;
1527 }
1528
1529
1530 /*
1531  * Return a language given the interpreter name.
1532  */
1533 static language *
1534 get_language_from_interpreter (interpreter)
1535      char *interpreter;
1536 {
1537   language *lang;
1538   char **iname;
1539
1540   if (interpreter == NULL)
1541     return NULL;
1542   for (lang = lang_names; lang->name != NULL; lang++)
1543     if (lang->interpreters != NULL)
1544       for (iname = lang->interpreters; *iname != NULL; iname++)
1545         if (streq (*iname, interpreter))
1546             return lang;
1547
1548   return NULL;
1549 }
1550
1551
1552
1553 /*
1554  * Return a language given the file name.
1555  */
1556 static language *
1557 get_language_from_filename (file, case_sensitive)
1558      char *file;
1559      bool case_sensitive;
1560 {
1561   language *lang;
1562   char **name, **ext, *suffix;
1563
1564   /* Try whole file name first. */
1565   for (lang = lang_names; lang->name != NULL; lang++)
1566     if (lang->filenames != NULL)
1567       for (name = lang->filenames; *name != NULL; name++)
1568         if ((case_sensitive)
1569             ? streq (*name, file)
1570             : strcaseeq (*name, file))
1571           return lang;
1572
1573   /* If not found, try suffix after last dot. */
1574   suffix = etags_strrchr (file, '.');
1575   if (suffix == NULL)
1576     return NULL;
1577   suffix += 1;
1578   for (lang = lang_names; lang->name != NULL; lang++)
1579     if (lang->suffixes != NULL)
1580       for (ext = lang->suffixes; *ext != NULL; ext++)
1581         if ((case_sensitive)
1582             ? streq (*ext, suffix)
1583             : strcaseeq (*ext, suffix))
1584           return lang;
1585   return NULL;
1586 }
1587
1588 \f
1589 /*
1590  * This routine is called on each file argument.
1591  */
1592 static void
1593 process_file_name (file, lang)
1594      char *file;
1595      language *lang;
1596 {
1597   struct stat stat_buf;
1598   FILE *inf;
1599   fdesc *fdp;
1600   compressor *compr;
1601   char *compressed_name, *uncompressed_name;
1602   char *ext, *real_name;
1603   int retval;
1604
1605   canonicalize_filename (file);
1606   if (streq (file, tagfile) && !streq (tagfile, "-"))
1607     {
1608       error ("skipping inclusion of %s in self.", file);
1609       return;
1610     }
1611   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1612     {
1613       compressed_name = NULL;
1614       real_name = uncompressed_name = savestr (file);
1615     }
1616   else
1617     {
1618       real_name = compressed_name = savestr (file);
1619       uncompressed_name = savenstr (file, ext - file);
1620     }
1621
1622   /* If the canonicalized uncompressed name
1623      has already been dealt with, skip it silently. */
1624   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1625     {
1626       assert (fdp->infname != NULL);
1627       if (streq (uncompressed_name, fdp->infname))
1628         goto cleanup;
1629     }
1630
1631   if (stat (real_name, &stat_buf) != 0)
1632     {
1633       /* Reset real_name and try with a different name. */
1634       real_name = NULL;
1635       if (compressed_name != NULL) /* try with the given suffix */
1636         {
1637           if (stat (uncompressed_name, &stat_buf) == 0)
1638             real_name = uncompressed_name;
1639         }
1640       else                      /* try all possible suffixes */
1641         {
1642           for (compr = compressors; compr->suffix != NULL; compr++)
1643             {
1644               compressed_name = concat (file, ".", compr->suffix);
1645               if (stat (compressed_name, &stat_buf) != 0)
1646                 {
1647                   if (MSDOS)
1648                     {
1649                       char *suf = compressed_name + strlen (file);
1650                       size_t suflen = strlen (compr->suffix) + 1;
1651                       for ( ; suf[1]; suf++, suflen--)
1652                         {
1653                           memmove (suf, suf + 1, suflen);
1654                           if (stat (compressed_name, &stat_buf) == 0)
1655                             {
1656                               real_name = compressed_name;
1657                               break;
1658                             }
1659                         }
1660                       if (real_name != NULL)
1661                         break;
1662                     } /* MSDOS */
1663                   free (compressed_name);
1664                   compressed_name = NULL;
1665                 }
1666               else
1667                 {
1668                   real_name = compressed_name;
1669                   break;
1670                 }
1671             }
1672         }
1673       if (real_name == NULL)
1674         {
1675           perror (file);
1676           goto cleanup;
1677         }
1678     } /* try with a different name */
1679
1680   if (!S_ISREG (stat_buf.st_mode))
1681     {
1682       error ("skipping %s: it is not a regular file.", real_name);
1683       goto cleanup;
1684     }
1685   if (real_name == compressed_name)
1686     {
1687       char *cmd = concat (compr->command, " ", real_name);
1688       inf = (FILE *) popen (cmd, "r");
1689       free (cmd);
1690     }
1691   else
1692     inf = fopen (real_name, "r");
1693   if (inf == NULL)
1694     {
1695       perror (real_name);
1696       goto cleanup;
1697     }
1698
1699   process_file (inf, uncompressed_name, lang);
1700
1701   if (real_name == compressed_name)
1702     retval = pclose (inf);
1703   else
1704     retval = fclose (inf);
1705   if (retval < 0)
1706     pfatal (file);
1707
1708  cleanup:
1709   if (compressed_name) free (compressed_name);
1710   if (uncompressed_name) free (uncompressed_name);
1711   last_node = NULL;
1712   curfdp = NULL;
1713   return;
1714 }
1715
1716 static void
1717 process_file (fh, fn, lang)
1718      FILE *fh;
1719      char *fn;
1720      language *lang;
1721 {
1722   static const fdesc emptyfdesc;
1723   fdesc *fdp;
1724
1725   /* Create a new input file description entry. */
1726   fdp = xnew (1, fdesc);
1727   *fdp = emptyfdesc;
1728   fdp->next = fdhead;
1729   fdp->infname = savestr (fn);
1730   fdp->lang = lang;
1731   fdp->infabsname = absolute_filename (fn, cwd);
1732   fdp->infabsdir = absolute_dirname (fn, cwd);
1733   if (filename_is_absolute (fn))
1734     {
1735       /* An absolute file name.  Canonicalize it. */
1736       fdp->taggedfname = absolute_filename (fn, NULL);
1737     }
1738   else
1739     {
1740       /* A file name relative to cwd.  Make it relative
1741          to the directory of the tags file. */
1742       fdp->taggedfname = relative_filename (fn, tagfiledir);
1743     }
1744   fdp->usecharno = TRUE;        /* use char position when making tags */
1745   fdp->prop = NULL;
1746   fdp->written = FALSE;         /* not written on tags file yet */
1747
1748   fdhead = fdp;
1749   curfdp = fdhead;              /* the current file description */
1750
1751   find_entries (fh);
1752
1753   /* If not Ctags, and if this is not metasource and if it contained no #line
1754      directives, we can write the tags and free all nodes pointing to
1755      curfdp. */
1756   if (!CTAGS
1757       && curfdp->usecharno      /* no #line directives in this file */
1758       && !curfdp->lang->metasource)
1759     {
1760       node *np, *prev;
1761
1762       /* Look for the head of the sublist relative to this file.  See add_node
1763          for the structure of the node tree. */
1764       prev = NULL;
1765       for (np = nodehead; np != NULL; prev = np, np = np->left)
1766         if (np->fdp == curfdp)
1767           break;
1768
1769       /* If we generated tags for this file, write and delete them. */
1770       if (np != NULL)
1771         {
1772           /* This is the head of the last sublist, if any.  The following
1773              instructions depend on this being true. */
1774           assert (np->left == NULL);
1775
1776           assert (fdhead == curfdp);
1777           assert (last_node->fdp == curfdp);
1778           put_entries (np);     /* write tags for file curfdp->taggedfname */
1779           free_tree (np);       /* remove the written nodes */
1780           if (prev == NULL)
1781             nodehead = NULL;    /* no nodes left */
1782           else
1783             prev->left = NULL;  /* delete the pointer to the sublist */
1784         }
1785     }
1786 }
1787
1788 /*
1789  * This routine sets up the boolean pseudo-functions which work
1790  * by setting boolean flags dependent upon the corresponding character.
1791  * Every char which is NOT in that string is not a white char.  Therefore,
1792  * all of the array "_wht" is set to FALSE, and then the elements
1793  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1794  * of a char is TRUE if it is the string "white", else FALSE.
1795  */
1796 static void
1797 init ()
1798 {
1799   register char *sp;
1800   register int i;
1801
1802   for (i = 0; i < CHARS; i++)
1803     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1804   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1805   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1806   notinname('\0') = notinname('\n');
1807   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1808   begtoken('\0') = begtoken('\n');
1809   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1810   intoken('\0') = intoken('\n');
1811   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1812   endtoken('\0') = endtoken('\n');
1813 }
1814
1815 /*
1816  * This routine opens the specified file and calls the function
1817  * which finds the function and type definitions.
1818  */
1819 static void
1820 find_entries (inf)
1821      FILE *inf;
1822 {
1823   char *cp;
1824   language *lang = curfdp->lang;
1825   Lang_function *parser = NULL;
1826
1827   /* If user specified a language, use it. */
1828   if (lang != NULL && lang->function != NULL)
1829     {
1830       parser = lang->function;
1831     }
1832
1833   /* Else try to guess the language given the file name. */
1834   if (parser == NULL)
1835     {
1836       lang = get_language_from_filename (curfdp->infname, TRUE);
1837       if (lang != NULL && lang->function != NULL)
1838         {
1839           curfdp->lang = lang;
1840           parser = lang->function;
1841         }
1842     }
1843
1844   /* Else look for sharp-bang as the first two characters. */
1845   if (parser == NULL
1846       && readline_internal (&lb, inf) > 0
1847       && lb.len >= 2
1848       && lb.buffer[0] == '#'
1849       && lb.buffer[1] == '!')
1850     {
1851       char *lp;
1852
1853       /* Set lp to point at the first char after the last slash in the
1854          line or, if no slashes, at the first nonblank.  Then set cp to
1855          the first successive blank and terminate the string. */
1856       lp = etags_strrchr (lb.buffer+2, '/');
1857       if (lp != NULL)
1858         lp += 1;
1859       else
1860         lp = skip_spaces (lb.buffer + 2);
1861       cp = skip_non_spaces (lp);
1862       *cp = '\0';
1863
1864       if (strlen (lp) > 0)
1865         {
1866           lang = get_language_from_interpreter (lp);
1867           if (lang != NULL && lang->function != NULL)
1868             {
1869               curfdp->lang = lang;
1870               parser = lang->function;
1871             }
1872         }
1873     }
1874
1875   /* We rewind here, even if inf may be a pipe.  We fail if the
1876      length of the first line is longer than the pipe block size,
1877      which is unlikely. */
1878   rewind (inf);
1879
1880   /* Else try to guess the language given the case insensitive file name. */
1881   if (parser == NULL)
1882     {
1883       lang = get_language_from_filename (curfdp->infname, FALSE);
1884       if (lang != NULL && lang->function != NULL)
1885         {
1886           curfdp->lang = lang;
1887           parser = lang->function;
1888         }
1889     }
1890
1891   /* Else try Fortran or C. */
1892   if (parser == NULL)
1893     {
1894       node *old_last_node = last_node;
1895
1896       curfdp->lang = get_language_from_langname ("fortran");
1897       find_entries (inf);
1898
1899       if (old_last_node == last_node)
1900         /* No Fortran entries found.  Try C. */
1901         {
1902           /* We do not tag if rewind fails.
1903              Only the file name will be recorded in the tags file. */
1904           rewind (inf);
1905           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1906           find_entries (inf);
1907         }
1908       return;
1909     }
1910
1911   if (!no_line_directive
1912       && curfdp->lang != NULL && curfdp->lang->metasource)
1913     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1914        file, or anyway we parsed a file that is automatically generated from
1915        this one.  If this is the case, the bingo.c file contained #line
1916        directives that generated tags pointing to this file.  Let's delete
1917        them all before parsing this file, which is the real source. */
1918     {
1919       fdesc **fdpp = &fdhead;
1920       while (*fdpp != NULL)
1921         if (*fdpp != curfdp
1922             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1923           /* We found one of those!  We must delete both the file description
1924              and all tags referring to it. */
1925           {
1926             fdesc *badfdp = *fdpp;
1927
1928             /* Delete the tags referring to badfdp->taggedfname
1929                that were obtained from badfdp->infname. */
1930             invalidate_nodes (badfdp, &nodehead);
1931
1932             *fdpp = badfdp->next; /* remove the bad description from the list */
1933             free_fdesc (badfdp);
1934           }
1935         else
1936           fdpp = &(*fdpp)->next; /* advance the list pointer */
1937     }
1938
1939   assert (parser != NULL);
1940
1941   /* Generic initialisations before reading from file. */
1942   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1943
1944   /* Generic initialisations before parsing file with readline. */
1945   lineno = 0;                  /* reset global line number */
1946   charno = 0;                  /* reset global char number */
1947   linecharno = 0;              /* reset global char number of line start */
1948
1949   parser (inf);
1950
1951   regex_tag_multiline ();
1952 }
1953
1954 \f
1955 /*
1956  * Check whether an implicitly named tag should be created,
1957  * then call `pfnote'.
1958  * NAME is a string that is internally copied by this function.
1959  *
1960  * TAGS format specification
1961  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1962  * The following is explained in some more detail in etc/ETAGS.EBNF.
1963  *
1964  * make_tag creates tags with "implicit tag names" (unnamed tags)
1965  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1966  *  1. NAME does not contain any of the characters in NONAM;
1967  *  2. LINESTART contains name as either a rightmost, or rightmost but
1968  *     one character, substring;
1969  *  3. the character, if any, immediately before NAME in LINESTART must
1970  *     be a character in NONAM;
1971  *  4. the character, if any, immediately after NAME in LINESTART must
1972  *     also be a character in NONAM.
1973  *
1974  * The implementation uses the notinname() macro, which recognises the
1975  * characters stored in the string `nonam'.
1976  * etags.el needs to use the same characters that are in NONAM.
1977  */
1978 static void
1979 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1980      char *name;                /* tag name, or NULL if unnamed */
1981      int namelen;               /* tag length */
1982      bool is_func;              /* tag is a function */
1983      char *linestart;           /* start of the line where tag is */
1984      int linelen;               /* length of the line where tag is */
1985      int lno;                   /* line number */
1986      long cno;                  /* character number */
1987 {
1988   bool named = (name != NULL && namelen > 0);
1989
1990   if (!CTAGS && named)          /* maybe set named to false */
1991     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1992        such that etags.el can guess a name from it. */
1993     {
1994       int i;
1995       register char *cp = name;
1996
1997       for (i = 0; i < namelen; i++)
1998         if (notinname (*cp++))
1999           break;
2000       if (i == namelen)                         /* rule #1 */
2001         {
2002           cp = linestart + linelen - namelen;
2003           if (notinname (linestart[linelen-1]))
2004             cp -= 1;                            /* rule #4 */
2005           if (cp >= linestart                   /* rule #2 */
2006               && (cp == linestart
2007                   || notinname (cp[-1]))        /* rule #3 */
2008               && strneq (name, cp, namelen))    /* rule #2 */
2009             named = FALSE;      /* use implicit tag name */
2010         }
2011     }
2012
2013   if (named)
2014     name = savenstr (name, namelen);
2015   else
2016     name = NULL;
2017   pfnote (name, is_func, linestart, linelen, lno, cno);
2018 }
2019
2020 /* Record a tag. */
2021 static void
2022 pfnote (name, is_func, linestart, linelen, lno, cno)
2023      char *name;                /* tag name, or NULL if unnamed */
2024      bool is_func;              /* tag is a function */
2025      char *linestart;           /* start of the line where tag is */
2026      int linelen;               /* length of the line where tag is */
2027      int lno;                   /* line number */
2028      long cno;                  /* character number */
2029 {
2030   register node *np;
2031
2032   assert (name == NULL || name[0] != '\0');
2033   if (CTAGS && name == NULL)
2034     return;
2035
2036   np = xnew (1, node);
2037
2038   /* If ctags mode, change name "main" to M<thisfilename>. */
2039   if (CTAGS && !cxref_style && streq (name, "main"))
2040     {
2041       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2042       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2043       fp = etags_strrchr (np->name, '.');
2044       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2045         fp[0] = '\0';
2046     }
2047   else
2048     np->name = name;
2049   np->valid = TRUE;
2050   np->been_warned = FALSE;
2051   np->fdp = curfdp;
2052   np->is_func = is_func;
2053   np->lno = lno;
2054   if (np->fdp->usecharno)
2055     /* Our char numbers are 0-base, because of C language tradition?
2056        ctags compatibility?  old versions compatibility?   I don't know.
2057        Anyway, since emacs's are 1-base we expect etags.el to take care
2058        of the difference.  If we wanted to have 1-based numbers, we would
2059        uncomment the +1 below. */
2060     np->cno = cno /* + 1 */ ;
2061   else
2062     np->cno = invalidcharno;
2063   np->left = np->right = NULL;
2064   if (CTAGS && !cxref_style)
2065     {
2066       if (strlen (linestart) < 50)
2067         np->regex = concat (linestart, "$", "");
2068       else
2069         np->regex = savenstr (linestart, 50);
2070     }
2071   else
2072     np->regex = savenstr (linestart, linelen);
2073
2074   add_node (np, &nodehead);
2075 }
2076
2077 /*
2078  * free_tree ()
2079  *      recurse on left children, iterate on right children.
2080  */
2081 static void
2082 free_tree (np)
2083      register node *np;
2084 {
2085   while (np)
2086     {
2087       register node *node_right = np->right;
2088       free_tree (np->left);
2089       if (np->name != NULL)
2090         free (np->name);
2091       free (np->regex);
2092       free (np);
2093       np = node_right;
2094     }
2095 }
2096
2097 /*
2098  * free_fdesc ()
2099  *      delete a file description
2100  */
2101 static void
2102 free_fdesc (fdp)
2103      register fdesc *fdp;
2104 {
2105   if (fdp->infname != NULL) free (fdp->infname);
2106   if (fdp->infabsname != NULL) free (fdp->infabsname);
2107   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2108   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2109   if (fdp->prop != NULL) free (fdp->prop);
2110   free (fdp);
2111 }
2112
2113 /*
2114  * add_node ()
2115  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2116  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2117  *      balancing.
2118  *
2119  *      add_node is the only function allowed to add nodes, so it can
2120  *      maintain state.
2121  */
2122 static void
2123 add_node (np, cur_node_p)
2124      node *np, **cur_node_p;
2125 {
2126   register int dif;
2127   register node *cur_node = *cur_node_p;
2128
2129   if (cur_node == NULL)
2130     {
2131       *cur_node_p = np;
2132       last_node = np;
2133       return;
2134     }
2135
2136   if (!CTAGS)
2137     /* Etags Mode */
2138     {
2139       /* For each file name, tags are in a linked sublist on the right
2140          pointer.  The first tags of different files are a linked list
2141          on the left pointer.  last_node points to the end of the last
2142          used sublist. */
2143       if (last_node != NULL && last_node->fdp == np->fdp)
2144         {
2145           /* Let's use the same sublist as the last added node. */
2146           assert (last_node->right == NULL);
2147           last_node->right = np;
2148           last_node = np;
2149         }
2150       else if (cur_node->fdp == np->fdp)
2151         {
2152           /* Scanning the list we found the head of a sublist which is
2153              good for us.  Let's scan this sublist. */
2154           add_node (np, &cur_node->right);
2155         }
2156       else
2157         /* The head of this sublist is not good for us.  Let's try the
2158            next one. */
2159         add_node (np, &cur_node->left);
2160     } /* if ETAGS mode */
2161
2162   else
2163     {
2164       /* Ctags Mode */
2165       dif = strcmp (np->name, cur_node->name);
2166
2167       /*
2168        * If this tag name matches an existing one, then
2169        * do not add the node, but maybe print a warning.
2170        */
2171       if (!dif)
2172         {
2173           if (np->fdp == cur_node->fdp)
2174             {
2175               if (!no_warnings)
2176                 {
2177                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2178                            np->fdp->infname, lineno, np->name);
2179                   fprintf (stderr, "Second entry ignored\n");
2180                 }
2181             }
2182           else if (!cur_node->been_warned && !no_warnings)
2183             {
2184               fprintf
2185                 (stderr,
2186                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2187                  np->fdp->infname, cur_node->fdp->infname, np->name);
2188               cur_node->been_warned = TRUE;
2189             }
2190           return;
2191         }
2192
2193       /* Actually add the node */
2194       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2195     } /* if CTAGS mode */
2196 }
2197
2198 /*
2199  * invalidate_nodes ()
2200  *      Scan the node tree and invalidate all nodes pointing to the
2201  *      given file description (CTAGS case) or free them (ETAGS case).
2202  */
2203 static void
2204 invalidate_nodes (badfdp, npp)
2205      fdesc *badfdp;
2206      node **npp;
2207 {
2208   node *np = *npp;
2209
2210   if (np == NULL)
2211     return;
2212
2213   if (CTAGS)
2214     {
2215       if (np->left != NULL)
2216         invalidate_nodes (badfdp, &np->left);
2217       if (np->fdp == badfdp)
2218         np->valid = FALSE;
2219       if (np->right != NULL)
2220         invalidate_nodes (badfdp, &np->right);
2221     }
2222   else
2223     {
2224       assert (np->fdp != NULL);
2225       if (np->fdp == badfdp)
2226         {
2227           *npp = np->left;      /* detach the sublist from the list */
2228           np->left = NULL;      /* isolate it */
2229           free_tree (np);       /* free it */
2230           invalidate_nodes (badfdp, npp);
2231         }
2232       else
2233         invalidate_nodes (badfdp, &np->left);
2234     }
2235 }
2236
2237 \f
2238 static int total_size_of_entries __P((node *));
2239 static int number_len __P((long));
2240
2241 /* Length of a non-negative number's decimal representation. */
2242 static int
2243 number_len (num)
2244      long num;
2245 {
2246   int len = 1;
2247   while ((num /= 10) > 0)
2248     len += 1;
2249   return len;
2250 }
2251
2252 /*
2253  * Return total number of characters that put_entries will output for
2254  * the nodes in the linked list at the right of the specified node.
2255  * This count is irrelevant with etags.el since emacs 19.34 at least,
2256  * but is still supplied for backward compatibility.
2257  */
2258 static int
2259 total_size_of_entries (np)
2260      register node *np;
2261 {
2262   register int total = 0;
2263
2264   for (; np != NULL; np = np->right)
2265     if (np->valid)
2266       {
2267         total += strlen (np->regex) + 1;                /* pat\177 */
2268         if (np->name != NULL)
2269           total += strlen (np->name) + 1;               /* name\001 */
2270         total += number_len ((long) np->lno) + 1;       /* lno, */
2271         if (np->cno != invalidcharno)                   /* cno */
2272           total += number_len (np->cno);
2273         total += 1;                                     /* newline */
2274       }
2275
2276   return total;
2277 }
2278
2279 static void
2280 put_entries (np)
2281      register node *np;
2282 {
2283   register char *sp;
2284   static fdesc *fdp = NULL;
2285
2286   if (np == NULL)
2287     return;
2288
2289   /* Output subentries that precede this one */
2290   if (CTAGS)
2291     put_entries (np->left);
2292
2293   /* Output this entry */
2294   if (np->valid)
2295     {
2296       if (!CTAGS)
2297         {
2298           /* Etags mode */
2299           if (fdp != np->fdp)
2300             {
2301               fdp = np->fdp;
2302               fprintf (tagf, "\f\n%s,%d\n",
2303                        fdp->taggedfname, total_size_of_entries (np));
2304               fdp->written = TRUE;
2305             }
2306           fputs (np->regex, tagf);
2307           fputc ('\177', tagf);
2308           if (np->name != NULL)
2309             {
2310               fputs (np->name, tagf);
2311               fputc ('\001', tagf);
2312             }
2313           fprintf (tagf, "%d,", np->lno);
2314           if (np->cno != invalidcharno)
2315             fprintf (tagf, "%ld", np->cno);
2316           fputs ("\n", tagf);
2317         }
2318       else
2319         {
2320           /* Ctags mode */
2321           if (np->name == NULL)
2322             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2323
2324           if (cxref_style)
2325             {
2326               if (vgrind_style)
2327                 fprintf (stdout, "%s %s %d\n",
2328                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2329               else
2330                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2331                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2332             }
2333           else
2334             {
2335               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2336
2337               if (np->is_func)
2338                 {               /* function or #define macro with args */
2339                   putc (searchar, tagf);
2340                   putc ('^', tagf);
2341
2342                   for (sp = np->regex; *sp; sp++)
2343                     {
2344                       if (*sp == '\\' || *sp == searchar)
2345                         putc ('\\', tagf);
2346                       putc (*sp, tagf);
2347                     }
2348                   putc (searchar, tagf);
2349                 }
2350               else
2351                 {               /* anything else; text pattern inadequate */
2352                   fprintf (tagf, "%d", np->lno);
2353                 }
2354               putc ('\n', tagf);
2355             }
2356         }
2357     } /* if this node contains a valid tag */
2358
2359   /* Output subentries that follow this one */
2360   put_entries (np->right);
2361   if (!CTAGS)
2362     put_entries (np->left);
2363 }
2364
2365 \f
2366 /* C extensions. */
2367 #define C_EXT   0x00fff         /* C extensions */
2368 #define C_PLAIN 0x00000         /* C */
2369 #define C_PLPL  0x00001         /* C++ */
2370 #define C_STAR  0x00003         /* C* */
2371 #define C_JAVA  0x00005         /* JAVA */
2372 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2373 #define YACC    0x10000         /* yacc file */
2374
2375 /*
2376  * The C symbol tables.
2377  */
2378 enum sym_type
2379 {
2380   st_none,
2381   st_C_objprot, st_C_objimpl, st_C_objend,
2382   st_C_gnumacro,
2383   st_C_ignore, st_C_attribute,
2384   st_C_javastruct,
2385   st_C_operator,
2386   st_C_class, st_C_template,
2387   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2388 };
2389
2390 static unsigned int hash __P((const char *, unsigned int));
2391 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2392 static enum sym_type C_symtype __P((char *, int, int));
2393
2394 /* Feed stuff between (but not including) %[ and %] lines to:
2395      gperf -m 5
2396 %[
2397 %compare-strncmp
2398 %enum
2399 %struct-type
2400 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2401 %%
2402 if,             0,                      st_C_ignore
2403 for,            0,                      st_C_ignore
2404 while,          0,                      st_C_ignore
2405 switch,         0,                      st_C_ignore
2406 return,         0,                      st_C_ignore
2407 __attribute__,  0,                      st_C_attribute
2408 @interface,     0,                      st_C_objprot
2409 @protocol,      0,                      st_C_objprot
2410 @implementation,0,                      st_C_objimpl
2411 @end,           0,                      st_C_objend
2412 import,         (C_JAVA & !C_PLPL),     st_C_ignore
2413 package,        (C_JAVA & !C_PLPL),     st_C_ignore
2414 friend,         C_PLPL,                 st_C_ignore
2415 extends,        (C_JAVA & !C_PLPL),     st_C_javastruct
2416 implements,     (C_JAVA & !C_PLPL),     st_C_javastruct
2417 interface,      (C_JAVA & !C_PLPL),     st_C_struct
2418 class,          0,                      st_C_class
2419 namespace,      C_PLPL,                 st_C_struct
2420 domain,         C_STAR,                 st_C_struct
2421 union,          0,                      st_C_struct
2422 struct,         0,                      st_C_struct
2423 extern,         0,                      st_C_extern
2424 enum,           0,                      st_C_enum
2425 typedef,        0,                      st_C_typedef
2426 define,         0,                      st_C_define
2427 undef,          0,                      st_C_define
2428 operator,       C_PLPL,                 st_C_operator
2429 template,       0,                      st_C_template
2430 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2431 DEFUN,          0,                      st_C_gnumacro
2432 SYSCALL,        0,                      st_C_gnumacro
2433 ENTRY,          0,                      st_C_gnumacro
2434 PSEUDO,         0,                      st_C_gnumacro
2435 # These are defined inside C functions, so currently they are not met.
2436 # EXFUN used in glibc, DEFVAR_* in emacs.
2437 #EXFUN,         0,                      st_C_gnumacro
2438 #DEFVAR_,       0,                      st_C_gnumacro
2439 %]
2440 and replace lines between %< and %> with its output, then:
2441  - remove the #if characterset check
2442  - make in_word_set static and not inline. */
2443 /*%<*/
2444 /* C code produced by gperf version 3.0.1 */
2445 /* Command-line: gperf -m 5  */
2446 /* Computed positions: -k'2-3' */
2447
2448 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2449 /* maximum key range = 33, duplicates = 0 */
2450
2451 #ifdef __GNUC__
2452 __inline
2453 #else
2454 #ifdef __cplusplus
2455 inline
2456 #endif
2457 #endif
2458 static unsigned int
2459 hash (str, len)
2460      register const char *str;
2461      register unsigned int len;
2462 {
2463   static unsigned char asso_values[] =
2464     {
2465       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2466       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2467       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2468       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2469       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2470       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2471       35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2472       14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2473       35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2474       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2475       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2476        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2477        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2478       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2479       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2480       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2481       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2482       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2483       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2484       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2485       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2486       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2487       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2488       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2489       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2490       35, 35, 35, 35, 35, 35
2491     };
2492   register int hval = len;
2493
2494   switch (hval)
2495     {
2496       default:
2497         hval += asso_values[(unsigned char)str[2]];
2498       /*FALLTHROUGH*/
2499       case 2:
2500         hval += asso_values[(unsigned char)str[1]];
2501         break;
2502     }
2503   return hval;
2504 }
2505
2506 static struct C_stab_entry *
2507 in_word_set (str, len)
2508      register const char *str;
2509      register unsigned int len;
2510 {
2511   enum
2512     {
2513       TOTAL_KEYWORDS = 32,
2514       MIN_WORD_LENGTH = 2,
2515       MAX_WORD_LENGTH = 15,
2516       MIN_HASH_VALUE = 2,
2517       MAX_HASH_VALUE = 34
2518     };
2519
2520   static struct C_stab_entry wordlist[] =
2521     {
2522       {""}, {""},
2523       {"if",            0,                      st_C_ignore},
2524       {""},
2525       {"@end",          0,                      st_C_objend},
2526       {"union",         0,                      st_C_struct},
2527       {"define",                0,                      st_C_define},
2528       {"import",                (C_JAVA & !C_PLPL),     st_C_ignore},
2529       {"template",      0,                      st_C_template},
2530       {"operator",      C_PLPL,                 st_C_operator},
2531       {"@interface",    0,                      st_C_objprot},
2532       {"implements",    (C_JAVA & !C_PLPL),     st_C_javastruct},
2533       {"friend",                C_PLPL,                 st_C_ignore},
2534       {"typedef",       0,                      st_C_typedef},
2535       {"return",                0,                      st_C_ignore},
2536       {"@implementation",0,                     st_C_objimpl},
2537       {"@protocol",     0,                      st_C_objprot},
2538       {"interface",     (C_JAVA & !C_PLPL),     st_C_struct},
2539       {"extern",                0,                      st_C_extern},
2540       {"extends",       (C_JAVA & !C_PLPL),     st_C_javastruct},
2541       {"struct",                0,                      st_C_struct},
2542       {"domain",                C_STAR,                 st_C_struct},
2543       {"switch",                0,                      st_C_ignore},
2544       {"enum",          0,                      st_C_enum},
2545       {"for",           0,                      st_C_ignore},
2546       {"namespace",     C_PLPL,                 st_C_struct},
2547       {"class",         0,                      st_C_class},
2548       {"while",         0,                      st_C_ignore},
2549       {"undef",         0,                      st_C_define},
2550       {"package",       (C_JAVA & !C_PLPL),     st_C_ignore},
2551       {"__attribute__", 0,                      st_C_attribute},
2552       {"SYSCALL",       0,                      st_C_gnumacro},
2553       {"ENTRY",         0,                      st_C_gnumacro},
2554       {"PSEUDO",                0,                      st_C_gnumacro},
2555       {"DEFUN",         0,                      st_C_gnumacro}
2556     };
2557
2558   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2559     {
2560       register int key = hash (str, len);
2561
2562       if (key <= MAX_HASH_VALUE && key >= 0)
2563         {
2564           register const char *s = wordlist[key].name;
2565
2566           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2567             return &wordlist[key];
2568         }
2569     }
2570   return 0;
2571 }
2572 /*%>*/
2573
2574 static enum sym_type
2575 C_symtype (str, len, c_ext)
2576      char *str;
2577      int len;
2578      int c_ext;
2579 {
2580   register struct C_stab_entry *se = in_word_set (str, len);
2581
2582   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2583     return st_none;
2584   return se->type;
2585 }
2586
2587 \f
2588 /*
2589  * Ignoring __attribute__ ((list))
2590  */
2591 static bool inattribute;        /* looking at an __attribute__ construct */
2592
2593 /*
2594  * C functions and variables are recognized using a simple
2595  * finite automaton.  fvdef is its state variable.
2596  */
2597 static enum
2598 {
2599   fvnone,                       /* nothing seen */
2600   fdefunkey,                    /* Emacs DEFUN keyword seen */
2601   fdefunname,                   /* Emacs DEFUN name seen */
2602   foperator,                    /* func: operator keyword seen (cplpl) */
2603   fvnameseen,                   /* function or variable name seen */
2604   fstartlist,                   /* func: just after open parenthesis */
2605   finlist,                      /* func: in parameter list */
2606   flistseen,                    /* func: after parameter list */
2607   fignore,                      /* func: before open brace */
2608   vignore                       /* var-like: ignore until ';' */
2609 } fvdef;
2610
2611 static bool fvextern;           /* func or var: extern keyword seen; */
2612
2613 /*
2614  * typedefs are recognized using a simple finite automaton.
2615  * typdef is its state variable.
2616  */
2617 static enum
2618 {
2619   tnone,                        /* nothing seen */
2620   tkeyseen,                     /* typedef keyword seen */
2621   ttypeseen,                    /* defined type seen */
2622   tinbody,                      /* inside typedef body */
2623   tend,                         /* just before typedef tag */
2624   tignore                       /* junk after typedef tag */
2625 } typdef;
2626
2627 /*
2628  * struct-like structures (enum, struct and union) are recognized
2629  * using another simple finite automaton.  `structdef' is its state
2630  * variable.
2631  */
2632 static enum
2633 {
2634   snone,                        /* nothing seen yet,
2635                                    or in struct body if bracelev > 0 */
2636   skeyseen,                     /* struct-like keyword seen */
2637   stagseen,                     /* struct-like tag seen */
2638   scolonseen                    /* colon seen after struct-like tag */
2639 } structdef;
2640
2641 /*
2642  * When objdef is different from onone, objtag is the name of the class.
2643  */
2644 static char *objtag = "<uninited>";
2645
2646 /*
2647  * Yet another little state machine to deal with preprocessor lines.
2648  */
2649 static enum
2650 {
2651   dnone,                        /* nothing seen */
2652   dsharpseen,                   /* '#' seen as first char on line */
2653   ddefineseen,                  /* '#' and 'define' seen */
2654   dignorerest                   /* ignore rest of line */
2655 } definedef;
2656
2657 /*
2658  * State machine for Objective C protocols and implementations.
2659  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2660  */
2661 static enum
2662 {
2663   onone,                        /* nothing seen */
2664   oprotocol,                    /* @interface or @protocol seen */
2665   oimplementation,              /* @implementations seen */
2666   otagseen,                     /* class name seen */
2667   oparenseen,                   /* parenthesis before category seen */
2668   ocatseen,                     /* category name seen */
2669   oinbody,                      /* in @implementation body */
2670   omethodsign,                  /* in @implementation body, after +/- */
2671   omethodtag,                   /* after method name */
2672   omethodcolon,                 /* after method colon */
2673   omethodparm,                  /* after method parameter */
2674   oignore                       /* wait for @end */
2675 } objdef;
2676
2677
2678 /*
2679  * Use this structure to keep info about the token read, and how it
2680  * should be tagged.  Used by the make_C_tag function to build a tag.
2681  */
2682 static struct tok
2683 {
2684   char *line;                   /* string containing the token */
2685   int offset;                   /* where the token starts in LINE */
2686   int length;                   /* token length */
2687   /*
2688     The previous members can be used to pass strings around for generic
2689     purposes.  The following ones specifically refer to creating tags.  In this
2690     case the token contained here is the pattern that will be used to create a
2691     tag.
2692   */
2693   bool valid;                   /* do not create a tag; the token should be
2694                                    invalidated whenever a state machine is
2695                                    reset prematurely */
2696   bool named;                   /* create a named tag */
2697   int lineno;                   /* source line number of tag */
2698   long linepos;                 /* source char number of tag */
2699 } token;                        /* latest token read */
2700
2701 /*
2702  * Variables and functions for dealing with nested structures.
2703  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2704  */
2705 static void pushclass_above __P((int, char *, int));
2706 static void popclass_above __P((int));
2707 static void write_classname __P((linebuffer *, char *qualifier));
2708
2709 static struct {
2710   char **cname;                 /* nested class names */
2711   int *bracelev;                /* nested class brace level */
2712   int nl;                       /* class nesting level (elements used) */
2713   int size;                     /* length of the array */
2714 } cstack;                       /* stack for nested declaration tags */
2715 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2716 #define nestlev         (cstack.nl)
2717 /* After struct keyword or in struct body, not inside a nested function. */
2718 #define instruct        (structdef == snone && nestlev > 0                      \
2719                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2720
2721 static void
2722 pushclass_above (bracelev, str, len)
2723      int bracelev;
2724      char *str;
2725      int len;
2726 {
2727   int nl;
2728
2729   popclass_above (bracelev);
2730   nl = cstack.nl;
2731   if (nl >= cstack.size)
2732     {
2733       int size = cstack.size *= 2;
2734       xrnew (cstack.cname, size, char *);
2735       xrnew (cstack.bracelev, size, int);
2736     }
2737   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2738   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2739   cstack.bracelev[nl] = bracelev;
2740   cstack.nl = nl + 1;
2741 }
2742
2743 static void
2744 popclass_above (bracelev)
2745      int bracelev;
2746 {
2747   int nl;
2748
2749   for (nl = cstack.nl - 1;
2750        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2751        nl--)
2752     {
2753       if (cstack.cname[nl] != NULL)
2754         free (cstack.cname[nl]);
2755       cstack.nl = nl;
2756     }
2757 }
2758
2759 static void
2760 write_classname (cn, qualifier)
2761      linebuffer *cn;
2762      char *qualifier;
2763 {
2764   int i, len;
2765   int qlen = strlen (qualifier);
2766
2767   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2768     {
2769       len = 0;
2770       cn->len = 0;
2771       cn->buffer[0] = '\0';
2772     }
2773   else
2774     {
2775       len = strlen (cstack.cname[0]);
2776       linebuffer_setlen (cn, len);
2777       strcpy (cn->buffer, cstack.cname[0]);
2778     }
2779   for (i = 1; i < cstack.nl; i++)
2780     {
2781       char *s;
2782       int slen;
2783
2784       s = cstack.cname[i];
2785       if (s == NULL)
2786         continue;
2787       slen = strlen (s);
2788       len += slen + qlen;
2789       linebuffer_setlen (cn, len);
2790       strncat (cn->buffer, qualifier, qlen);
2791       strncat (cn->buffer, s, slen);
2792     }
2793 }
2794
2795 \f
2796 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2797 static void make_C_tag __P((bool));
2798
2799 /*
2800  * consider_token ()
2801  *      checks to see if the current token is at the start of a
2802  *      function or variable, or corresponds to a typedef, or
2803  *      is a struct/union/enum tag, or #define, or an enum constant.
2804  *
2805  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2806  *      with args.  C_EXTP points to which language we are looking at.
2807  *
2808  * Globals
2809  *      fvdef                   IN OUT
2810  *      structdef               IN OUT
2811  *      definedef               IN OUT
2812  *      typdef                  IN OUT
2813  *      objdef                  IN OUT
2814  */
2815
2816 static bool
2817 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2818      register char *str;        /* IN: token pointer */
2819      register int len;          /* IN: token length */
2820      register int c;            /* IN: first char after the token */
2821      int *c_extp;               /* IN, OUT: C extensions mask */
2822      int bracelev;              /* IN: brace level */
2823      int parlev;                /* IN: parenthesis level */
2824      bool *is_func_or_var;      /* OUT: function or variable found */
2825 {
2826   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2827      structtype is the type of the preceding struct-like keyword, and
2828      structbracelev is the brace level where it has been seen. */
2829   static enum sym_type structtype;
2830   static int structbracelev;
2831   static enum sym_type toktype;
2832
2833
2834   toktype = C_symtype (str, len, *c_extp);
2835
2836   /*
2837    * Skip __attribute__
2838    */
2839   if (toktype == st_C_attribute)
2840     {
2841       inattribute = TRUE;
2842       return FALSE;
2843      }
2844
2845    /*
2846     * Advance the definedef state machine.
2847     */
2848    switch (definedef)
2849      {
2850      case dnone:
2851        /* We're not on a preprocessor line. */
2852        if (toktype == st_C_gnumacro)
2853          {
2854            fvdef = fdefunkey;
2855            return FALSE;
2856          }
2857        break;
2858      case dsharpseen:
2859        if (toktype == st_C_define)
2860          {
2861            definedef = ddefineseen;
2862          }
2863        else
2864          {
2865            definedef = dignorerest;
2866          }
2867        return FALSE;
2868      case ddefineseen:
2869        /*
2870         * Make a tag for any macro, unless it is a constant
2871         * and constantypedefs is FALSE.
2872         */
2873        definedef = dignorerest;
2874        *is_func_or_var = (c == '(');
2875        if (!*is_func_or_var && !constantypedefs)
2876          return FALSE;
2877        else
2878          return TRUE;
2879      case dignorerest:
2880        return FALSE;
2881      default:
2882        error ("internal error: definedef value.", (char *)NULL);
2883      }
2884
2885    /*
2886     * Now typedefs
2887     */
2888    switch (typdef)
2889      {
2890      case tnone:
2891        if (toktype == st_C_typedef)
2892          {
2893            if (typedefs)
2894              typdef = tkeyseen;
2895            fvextern = FALSE;
2896            fvdef = fvnone;
2897            return FALSE;
2898          }
2899        break;
2900      case tkeyseen:
2901        switch (toktype)
2902          {
2903          case st_none:
2904          case st_C_class:
2905          case st_C_struct:
2906          case st_C_enum:
2907            typdef = ttypeseen;
2908          }
2909        break;
2910      case ttypeseen:
2911        if (structdef == snone && fvdef == fvnone)
2912          {
2913            fvdef = fvnameseen;
2914            return TRUE;
2915          }
2916        break;
2917      case tend:
2918        switch (toktype)
2919          {
2920          case st_C_class:
2921          case st_C_struct:
2922          case st_C_enum:
2923            return FALSE;
2924          }
2925        return TRUE;
2926      }
2927
2928    /*
2929     * This structdef business is NOT invoked when we are ctags and the
2930     * file is plain C.  This is because a struct tag may have the same
2931     * name as another tag, and this loses with ctags.
2932     */
2933    switch (toktype)
2934      {
2935      case st_C_javastruct:
2936        if (structdef == stagseen)
2937          structdef = scolonseen;
2938        return FALSE;
2939      case st_C_template:
2940      case st_C_class:
2941        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2942            && bracelev == 0
2943            && definedef == dnone && structdef == snone
2944            && typdef == tnone && fvdef == fvnone)
2945          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2946        if (toktype == st_C_template)
2947          break;
2948        /* FALLTHRU */
2949      case st_C_struct:
2950      case st_C_enum:
2951        if (parlev == 0
2952            && fvdef != vignore
2953            && (typdef == tkeyseen
2954                || (typedefs_or_cplusplus && structdef == snone)))
2955          {
2956            structdef = skeyseen;
2957            structtype = toktype;
2958            structbracelev = bracelev;
2959            if (fvdef == fvnameseen)
2960              fvdef = fvnone;
2961          }
2962        return FALSE;
2963      }
2964
2965    if (structdef == skeyseen)
2966      {
2967        structdef = stagseen;
2968        return TRUE;
2969      }
2970
2971    if (typdef != tnone)
2972      definedef = dnone;
2973
2974    /* Detect Objective C constructs. */
2975    switch (objdef)
2976      {
2977      case onone:
2978        switch (toktype)
2979          {
2980          case st_C_objprot:
2981            objdef = oprotocol;
2982            return FALSE;
2983          case st_C_objimpl:
2984            objdef = oimplementation;
2985            return FALSE;
2986          }
2987        break;
2988      case oimplementation:
2989        /* Save the class tag for functions or variables defined inside. */
2990        objtag = savenstr (str, len);
2991        objdef = oinbody;
2992        return FALSE;
2993      case oprotocol:
2994        /* Save the class tag for categories. */
2995        objtag = savenstr (str, len);
2996        objdef = otagseen;
2997        *is_func_or_var = TRUE;
2998        return TRUE;
2999      case oparenseen:
3000        objdef = ocatseen;
3001        *is_func_or_var = TRUE;
3002        return TRUE;
3003      case oinbody:
3004        break;
3005      case omethodsign:
3006        if (parlev == 0)
3007          {
3008            fvdef = fvnone;
3009            objdef = omethodtag;
3010            linebuffer_setlen (&token_name, len);
3011            strncpy (token_name.buffer, str, len);
3012            token_name.buffer[len] = '\0';
3013            return TRUE;
3014          }
3015        return FALSE;
3016      case omethodcolon:
3017        if (parlev == 0)
3018          objdef = omethodparm;
3019        return FALSE;
3020      case omethodparm:
3021        if (parlev == 0)
3022          {
3023            fvdef = fvnone;
3024            objdef = omethodtag;
3025            linebuffer_setlen (&token_name, token_name.len + len);
3026            strncat (token_name.buffer, str, len);
3027            return TRUE;
3028          }
3029        return FALSE;
3030      case oignore:
3031        if (toktype == st_C_objend)
3032          {
3033            /* Memory leakage here: the string pointed by objtag is
3034               never released, because many tests would be needed to
3035               avoid breaking on incorrect input code.  The amount of
3036               memory leaked here is the sum of the lengths of the
3037               class tags.
3038            free (objtag); */
3039            objdef = onone;
3040          }
3041        return FALSE;
3042      }
3043
3044    /* A function, variable or enum constant? */
3045    switch (toktype)
3046      {
3047      case st_C_extern:
3048        fvextern = TRUE;
3049        switch  (fvdef)
3050          {
3051          case finlist:
3052          case flistseen:
3053          case fignore:
3054          case vignore:
3055            break;
3056          default:
3057            fvdef = fvnone;
3058          }
3059        return FALSE;
3060      case st_C_ignore:
3061        fvextern = FALSE;
3062        fvdef = vignore;
3063        return FALSE;
3064      case st_C_operator:
3065        fvdef = foperator;
3066        *is_func_or_var = TRUE;
3067        return TRUE;
3068      case st_none:
3069        if (constantypedefs
3070            && structdef == snone
3071            && structtype == st_C_enum && bracelev > structbracelev)
3072          return TRUE;           /* enum constant */
3073        switch (fvdef)
3074          {
3075          case fdefunkey:
3076            if (bracelev > 0)
3077              break;
3078            fvdef = fdefunname;  /* GNU macro */
3079            *is_func_or_var = TRUE;
3080            return TRUE;
3081          case fvnone:
3082            switch (typdef)
3083              {
3084              case ttypeseen:
3085                return FALSE;
3086              case tnone:
3087                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3088                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3089                  {
3090                    fvdef = vignore;
3091                    return FALSE;
3092                  }
3093                break;
3094              }
3095           /* FALLTHRU */
3096           case fvnameseen:
3097           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3098             {
3099               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3100                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3101               fvdef = foperator;
3102               *is_func_or_var = TRUE;
3103               return TRUE;
3104             }
3105           if (bracelev > 0 && !instruct)
3106             break;
3107           fvdef = fvnameseen;   /* function or variable */
3108           *is_func_or_var = TRUE;
3109           return TRUE;
3110         }
3111       break;
3112     }
3113
3114   return FALSE;
3115 }
3116
3117 \f
3118 /*
3119  * C_entries often keeps pointers to tokens or lines which are older than
3120  * the line currently read.  By keeping two line buffers, and switching
3121  * them at end of line, it is possible to use those pointers.
3122  */
3123 static struct
3124 {
3125   long linepos;
3126   linebuffer lb;
3127 } lbs[2];
3128
3129 #define current_lb_is_new (newndx == curndx)
3130 #define switch_line_buffers() (curndx = 1 - curndx)
3131
3132 #define curlb (lbs[curndx].lb)
3133 #define newlb (lbs[newndx].lb)
3134 #define curlinepos (lbs[curndx].linepos)
3135 #define newlinepos (lbs[newndx].linepos)
3136
3137 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3138 #define cplpl (c_ext & C_PLPL)
3139 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3140
3141 #define CNL_SAVE_DEFINEDEF()                                            \
3142 do {                                                                    \
3143   curlinepos = charno;                                                  \
3144   readline (&curlb, inf);                                               \
3145   lp = curlb.buffer;                                                    \
3146   quotednl = FALSE;                                                     \
3147   newndx = curndx;                                                      \
3148 } while (0)
3149
3150 #define CNL()                                                           \
3151 do {                                                                    \
3152   CNL_SAVE_DEFINEDEF();                                                 \
3153   if (savetoken.valid)                                                  \
3154     {                                                                   \
3155       token = savetoken;                                                \
3156       savetoken.valid = FALSE;                                          \
3157     }                                                                   \
3158   definedef = dnone;                                                    \
3159 } while (0)
3160
3161
3162 static void
3163 make_C_tag (isfun)
3164      bool isfun;
3165 {
3166   /* This function is never called when token.valid is FALSE, but
3167      we must protect against invalid input or internal errors. */
3168   if (!DEBUG && !token.valid)
3169     return;
3170
3171   if (token.valid)
3172     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3173               token.offset+token.length+1, token.lineno, token.linepos);
3174   else                          /* this case is optimised away if !DEBUG */
3175     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3176               token_name.len + 17, isfun, token.line,
3177               token.offset+token.length+1, token.lineno, token.linepos);
3178
3179   token.valid = FALSE;
3180 }
3181
3182
3183 /*
3184  * C_entries ()
3185  *      This routine finds functions, variables, typedefs,
3186  *      #define's, enum constants and struct/union/enum definitions in
3187  *      C syntax and adds them to the list.
3188  */
3189 static void
3190 C_entries (c_ext, inf)
3191      int c_ext;                 /* extension of C */
3192      FILE *inf;                 /* input file */
3193 {
3194   register char c;              /* latest char read; '\0' for end of line */
3195   register char *lp;            /* pointer one beyond the character `c' */
3196   int curndx, newndx;           /* indices for current and new lb */
3197   register int tokoff;          /* offset in line of start of current token */
3198   register int toklen;          /* length of current token */
3199   char *qualifier;              /* string used to qualify names */
3200   int qlen;                     /* length of qualifier */
3201   int bracelev;                 /* current brace level */
3202   int bracketlev;               /* current bracket level */
3203   int parlev;                   /* current parenthesis level */
3204   int attrparlev;               /* __attribute__ parenthesis level */
3205   int templatelev;              /* current template level */
3206   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3207   bool incomm, inquote, inchar, quotednl, midtoken;
3208   bool yacc_rules;              /* in the rules part of a yacc file */
3209   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3210
3211
3212   linebuffer_init (&lbs[0].lb);
3213   linebuffer_init (&lbs[1].lb);
3214   if (cstack.size == 0)
3215     {
3216       cstack.size = (DEBUG) ? 1 : 4;
3217       cstack.nl = 0;
3218       cstack.cname = xnew (cstack.size, char *);
3219       cstack.bracelev = xnew (cstack.size, int);
3220     }
3221
3222   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3223   curndx = newndx = 0;
3224   lp = curlb.buffer;
3225   *lp = 0;
3226
3227   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3228   structdef = snone; definedef = dnone; objdef = onone;
3229   yacc_rules = FALSE;
3230   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3231   token.valid = savetoken.valid = FALSE;
3232   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3233   if (cjava)
3234     { qualifier = "."; qlen = 1; }
3235   else
3236     { qualifier = "::"; qlen = 2; }
3237
3238
3239   while (!feof (inf))
3240     {
3241       c = *lp++;
3242       if (c == '\\')
3243         {
3244           /* If we are at the end of the line, the next character is a
3245              '\0'; do not skip it, because it is what tells us
3246              to read the next line.  */
3247           if (*lp == '\0')
3248             {
3249               quotednl = TRUE;
3250               continue;
3251             }
3252           lp++;
3253           c = ' ';
3254         }
3255       else if (incomm)
3256         {
3257           switch (c)
3258             {
3259             case '*':
3260               if (*lp == '/')
3261                 {
3262                   c = *lp++;
3263                   incomm = FALSE;
3264                 }
3265               break;
3266             case '\0':
3267               /* Newlines inside comments do not end macro definitions in
3268                  traditional cpp. */
3269               CNL_SAVE_DEFINEDEF ();
3270               break;
3271             }
3272           continue;
3273         }
3274       else if (inquote)
3275         {
3276           switch (c)
3277             {
3278             case '"':
3279               inquote = FALSE;
3280               break;
3281             case '\0':
3282               /* Newlines inside strings do not end macro definitions
3283                  in traditional cpp, even though compilers don't
3284                  usually accept them. */
3285               CNL_SAVE_DEFINEDEF ();
3286               break;
3287             }
3288           continue;
3289         }
3290       else if (inchar)
3291         {
3292           switch (c)
3293             {
3294             case '\0':
3295               /* Hmmm, something went wrong. */
3296               CNL ();
3297               /* FALLTHRU */
3298             case '\'':
3299               inchar = FALSE;
3300               break;
3301             }
3302           continue;
3303         }
3304       else if (bracketlev > 0)
3305         {
3306           switch (c)
3307             {
3308             case ']':
3309               if (--bracketlev > 0)
3310                 continue;
3311               break;
3312             case '\0':
3313               CNL_SAVE_DEFINEDEF ();
3314               break;
3315             }
3316           continue;
3317         }
3318       else switch (c)
3319         {
3320         case '"':
3321           inquote = TRUE;
3322           if (inattribute)
3323             break;
3324           switch (fvdef)
3325             {
3326             case fdefunkey:
3327             case fstartlist:
3328             case finlist:
3329             case fignore:
3330             case vignore:
3331               break;
3332             default:
3333               fvextern = FALSE;
3334               fvdef = fvnone;
3335             }
3336           continue;
3337         case '\'':
3338           inchar = TRUE;
3339           if (inattribute)
3340             break;
3341           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3342             {
3343               fvextern = FALSE;
3344               fvdef = fvnone;
3345             }
3346           continue;
3347         case '/':
3348           if (*lp == '*')
3349             {
3350               lp++;
3351               incomm = TRUE;
3352               continue;
3353             }
3354           else if (/* cplpl && */ *lp == '/')
3355             {
3356               c = '\0';
3357               break;
3358             }
3359           else
3360             break;
3361         case '%':
3362           if ((c_ext & YACC) && *lp == '%')
3363             {
3364               /* Entering or exiting rules section in yacc file. */
3365               lp++;
3366               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3367               typdef = tnone; structdef = snone;
3368               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3369               bracelev = 0;
3370               yacc_rules = !yacc_rules;
3371               continue;
3372             }
3373           else
3374             break;
3375         case '#':
3376           if (definedef == dnone)
3377             {
3378               char *cp;
3379               bool cpptoken = TRUE;
3380
3381               /* Look back on this line.  If all blanks, or nonblanks
3382                  followed by an end of comment, this is a preprocessor
3383                  token. */
3384               for (cp = newlb.buffer; cp < lp-1; cp++)
3385                 if (!iswhite (*cp))
3386                   {
3387                     if (*cp == '*' && *(cp+1) == '/')
3388                       {
3389                         cp++;
3390                         cpptoken = TRUE;
3391                       }
3392                     else
3393                       cpptoken = FALSE;
3394                   }
3395               if (cpptoken)
3396                 definedef = dsharpseen;
3397             } /* if (definedef == dnone) */
3398           continue;
3399         case '[':
3400           bracketlev++;
3401             continue;
3402         } /* switch (c) */
3403
3404
3405       /* Consider token only if some involved conditions are satisfied. */
3406       if (typdef != tignore
3407           && definedef != dignorerest
3408           && fvdef != finlist
3409           && templatelev == 0
3410           && (definedef != dnone
3411               || structdef != scolonseen)
3412           && !inattribute)
3413         {
3414           if (midtoken)
3415             {
3416               if (endtoken (c))
3417                 {
3418                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3419                     /* This handles :: in the middle,
3420                        but not at the beginning of an identifier.
3421                        Also, space-separated :: is not recognised. */
3422                     {
3423                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3424                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3425                       lp += 2;
3426                       toklen += 2;
3427                       c = lp[-1];
3428                       goto still_in_token;
3429                     }
3430                   else
3431                     {
3432                       bool funorvar = FALSE;
3433
3434                       if (yacc_rules
3435                           || consider_token (newlb.buffer + tokoff, toklen, c,
3436                                              &c_ext, bracelev, parlev,
3437                                              &funorvar))
3438                         {
3439                           if (fvdef == foperator)
3440                             {
3441                               char *oldlp = lp;
3442                               lp = skip_spaces (lp-1);
3443                               if (*lp != '\0')
3444                                 lp += 1;
3445                               while (*lp != '\0'
3446                                      && !iswhite (*lp) && *lp != '(')
3447                                 lp += 1;
3448                               c = *lp++;
3449                               toklen += lp - oldlp;
3450                             }
3451                           token.named = FALSE;
3452                           if (!plainc
3453                               && nestlev > 0 && definedef == dnone)
3454                             /* in struct body */
3455                             {
3456                               write_classname (&token_name, qualifier);
3457                               linebuffer_setlen (&token_name,
3458                                                  token_name.len+qlen+toklen);
3459                               strcat (token_name.buffer, qualifier);
3460                               strncat (token_name.buffer,
3461                                        newlb.buffer + tokoff, toklen);
3462                               token.named = TRUE;
3463                             }
3464                           else if (objdef == ocatseen)
3465                             /* Objective C category */
3466                             {
3467                               int len = strlen (objtag) + 2 + toklen;
3468                               linebuffer_setlen (&token_name, len);
3469                               strcpy (token_name.buffer, objtag);
3470                               strcat (token_name.buffer, "(");
3471                               strncat (token_name.buffer,
3472                                        newlb.buffer + tokoff, toklen);
3473                               strcat (token_name.buffer, ")");
3474                               token.named = TRUE;
3475                             }
3476                           else if (objdef == omethodtag
3477                                    || objdef == omethodparm)
3478                             /* Objective C method */
3479                             {
3480                               token.named = TRUE;
3481                             }
3482                           else if (fvdef == fdefunname)
3483                             /* GNU DEFUN and similar macros */
3484                             {
3485                               bool defun = (newlb.buffer[tokoff] == 'F');
3486                               int off = tokoff;
3487                               int len = toklen;
3488
3489                               /* Rewrite the tag so that emacs lisp DEFUNs
3490                                  can be found by their elisp name */
3491                               if (defun)
3492                                 {
3493                                   off += 1;
3494                                   len -= 1;
3495                                 }
3496                               linebuffer_setlen (&token_name, len);
3497                               strncpy (token_name.buffer,
3498                                        newlb.buffer + off, len);
3499                               token_name.buffer[len] = '\0';
3500                               if (defun)
3501                                 while (--len >= 0)
3502                                   if (token_name.buffer[len] == '_')
3503                                     token_name.buffer[len] = '-';
3504                               token.named = defun;
3505                             }
3506                           else
3507                             {
3508                               linebuffer_setlen (&token_name, toklen);
3509                               strncpy (token_name.buffer,
3510                                        newlb.buffer + tokoff, toklen);
3511                               token_name.buffer[toklen] = '\0';
3512                               /* Name macros and members. */
3513                               token.named = (structdef == stagseen
3514                                              || typdef == ttypeseen
3515                                              || typdef == tend
3516                                              || (funorvar
3517                                                  && definedef == dignorerest)
3518                                              || (funorvar
3519                                                  && definedef == dnone
3520                                                  && structdef == snone
3521                                                  && bracelev > 0));
3522                             }
3523                           token.lineno = lineno;
3524                           token.offset = tokoff;
3525                           token.length = toklen;
3526                           token.line = newlb.buffer;
3527                           token.linepos = newlinepos;
3528                           token.valid = TRUE;
3529
3530                           if (definedef == dnone
3531                               && (fvdef == fvnameseen
3532                                   || fvdef == foperator
3533                                   || structdef == stagseen
3534                                   || typdef == tend
3535                                   || typdef == ttypeseen
3536                                   || objdef != onone))
3537                             {
3538                               if (current_lb_is_new)
3539                                 switch_line_buffers ();
3540                             }
3541                           else if (definedef != dnone
3542                                    || fvdef == fdefunname
3543                                    || instruct)
3544                             make_C_tag (funorvar);
3545                         }
3546                       else /* not yacc and consider_token failed */
3547                         {
3548                           if (inattribute && fvdef == fignore)
3549                             {
3550                               /* We have just met __attribute__ after a
3551                                  function parameter list: do not tag the
3552                                  function again. */
3553                               fvdef = fvnone;
3554                             }
3555                         }
3556                       midtoken = FALSE;
3557                     }
3558                 } /* if (endtoken (c)) */
3559               else if (intoken (c))
3560                 still_in_token:
3561                 {
3562                   toklen++;
3563                   continue;
3564                 }
3565             } /* if (midtoken) */
3566           else if (begtoken (c))
3567             {
3568               switch (definedef)
3569                 {
3570                 case dnone:
3571                   switch (fvdef)
3572                     {
3573                     case fstartlist:
3574                       /* This prevents tagging fb in
3575                          void (__attribute__((noreturn)) *fb) (void);
3576                          Fixing this is not easy and not very important. */
3577                       fvdef = finlist;
3578                       continue;
3579                     case flistseen:
3580                       if (plainc || declarations)
3581                         {
3582                           make_C_tag (TRUE); /* a function */
3583                           fvdef = fignore;
3584                         }
3585                       break;
3586                     }
3587                   if (structdef == stagseen && !cjava)
3588                     {
3589                       popclass_above (bracelev);
3590                       structdef = snone;
3591                     }
3592                   break;
3593                 case dsharpseen:
3594                   savetoken = token;
3595                   break;
3596                 }
3597               if (!yacc_rules || lp == newlb.buffer + 1)
3598                 {
3599                   tokoff = lp - 1 - newlb.buffer;
3600                   toklen = 1;
3601                   midtoken = TRUE;
3602                 }
3603               continue;
3604             } /* if (begtoken) */
3605         } /* if must look at token */
3606
3607
3608       /* Detect end of line, colon, comma, semicolon and various braces
3609          after having handled a token.*/
3610       switch (c)
3611         {
3612         case ':':
3613           if (inattribute)
3614             break;
3615           if (yacc_rules && token.offset == 0 && token.valid)
3616             {
3617               make_C_tag (FALSE); /* a yacc function */
3618               break;
3619             }
3620           if (definedef != dnone)
3621             break;
3622           switch (objdef)
3623             {
3624             case  otagseen:
3625               objdef = oignore;
3626               make_C_tag (TRUE); /* an Objective C class */
3627               break;
3628             case omethodtag:
3629             case omethodparm:
3630               objdef = omethodcolon;
3631               linebuffer_setlen (&token_name, token_name.len + 1);
3632               strcat (token_name.buffer, ":");
3633               break;
3634             }
3635           if (structdef == stagseen)
3636             {
3637               structdef = scolonseen;
3638               break;
3639             }
3640           /* Should be useless, but may be work as a safety net. */
3641           if (cplpl && fvdef == flistseen)
3642             {
3643               make_C_tag (TRUE); /* a function */
3644               fvdef = fignore;
3645               break;
3646             }
3647           break;
3648         case ';':
3649           if (definedef != dnone || inattribute)
3650             break;
3651           switch (typdef)
3652             {
3653             case tend:
3654             case ttypeseen:
3655               make_C_tag (FALSE); /* a typedef */
3656               typdef = tnone;
3657               fvdef = fvnone;
3658               break;
3659             case tnone:
3660             case tinbody:
3661             case tignore:
3662               switch (fvdef)
3663                 {
3664                 case fignore:
3665                   if (typdef == tignore || cplpl)
3666                     fvdef = fvnone;
3667                   break;
3668                 case fvnameseen:
3669                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3670                       || (members && instruct))
3671                     make_C_tag (FALSE); /* a variable */
3672                   fvextern = FALSE;
3673                   fvdef = fvnone;
3674                   token.valid = FALSE;
3675                   break;
3676                 case flistseen:
3677                   if ((declarations
3678                        && (cplpl || !instruct)
3679                        && (typdef == tnone || (typdef != tignore && instruct)))
3680                       || (members
3681                           && plainc && instruct))
3682                     make_C_tag (TRUE);  /* a function */
3683                   /* FALLTHRU */
3684                 default:
3685                   fvextern = FALSE;
3686                   fvdef = fvnone;
3687                   if (declarations
3688                        && cplpl && structdef == stagseen)
3689                     make_C_tag (FALSE); /* forward declaration */
3690                   else
3691                     token.valid = FALSE;
3692                 } /* switch (fvdef) */
3693               /* FALLTHRU */
3694             default:
3695               if (!instruct)
3696                 typdef = tnone;
3697             }
3698           if (structdef == stagseen)
3699             structdef = snone;
3700           break;
3701         case ',':
3702           if (definedef != dnone || inattribute)
3703             break;
3704           switch (objdef)
3705             {
3706             case omethodtag:
3707             case omethodparm:
3708               make_C_tag (TRUE); /* an Objective C method */
3709               objdef = oinbody;
3710               break;
3711             }
3712           switch (fvdef)
3713             {
3714             case fdefunkey:
3715             case foperator:
3716             case fstartlist:
3717             case finlist:
3718             case fignore:
3719             case vignore:
3720               break;
3721             case fdefunname:
3722               fvdef = fignore;
3723               break;
3724             case fvnameseen:
3725               if (parlev == 0
3726                   && ((globals
3727                        && bracelev == 0
3728                        && templatelev == 0
3729                        && (!fvextern || declarations))
3730                       || (members && instruct)))
3731                   make_C_tag (FALSE); /* a variable */
3732               break;
3733             case flistseen:
3734               if ((declarations && typdef == tnone && !instruct)
3735                   || (members && typdef != tignore && instruct))
3736                 {
3737                   make_C_tag (TRUE); /* a function */
3738                   fvdef = fvnameseen;
3739                 }
3740               else if (!declarations)
3741                 fvdef = fvnone;
3742               token.valid = FALSE;
3743               break;
3744             default:
3745               fvdef = fvnone;
3746             }
3747           if (structdef == stagseen)
3748             structdef = snone;
3749           break;
3750         case ']':
3751           if (definedef != dnone || inattribute)
3752             break;
3753           if (structdef == stagseen)
3754             structdef = snone;
3755           switch (typdef)
3756             {
3757             case ttypeseen:
3758             case tend:
3759               typdef = tignore;
3760               make_C_tag (FALSE);       /* a typedef */
3761               break;
3762             case tnone:
3763             case tinbody:
3764               switch (fvdef)
3765                 {
3766                 case foperator:
3767                 case finlist:
3768                 case fignore:
3769                 case vignore:
3770                   break;
3771                 case fvnameseen:
3772                   if ((members && bracelev == 1)
3773                       || (globals && bracelev == 0
3774                           && (!fvextern || declarations)))
3775                     make_C_tag (FALSE); /* a variable */
3776                   /* FALLTHRU */
3777                 default:
3778                   fvdef = fvnone;
3779                 }
3780               break;
3781             }
3782           break;
3783         case '(':
3784           if (inattribute)
3785             {
3786               attrparlev++;
3787               break;
3788             }
3789           if (definedef != dnone)
3790             break;
3791           if (objdef == otagseen && parlev == 0)
3792             objdef = oparenseen;
3793           switch (fvdef)
3794             {
3795             case fvnameseen:
3796               if (typdef == ttypeseen
3797                   && *lp != '*'
3798                   && !instruct)
3799                 {
3800                   /* This handles constructs like:
3801                      typedef void OperatorFun (int fun); */
3802                   make_C_tag (FALSE);
3803                   typdef = tignore;
3804                   fvdef = fignore;
3805                   break;
3806                 }
3807               /* FALLTHRU */
3808             case foperator:
3809               fvdef = fstartlist;
3810               break;
3811             case flistseen:
3812               fvdef = finlist;
3813               break;
3814             }
3815           parlev++;
3816           break;
3817         case ')':
3818           if (inattribute)
3819             {
3820               if (--attrparlev == 0)
3821                 inattribute = FALSE;
3822               break;
3823             }
3824           if (definedef != dnone)
3825             break;
3826           if (objdef == ocatseen && parlev == 1)
3827             {
3828               make_C_tag (TRUE); /* an Objective C category */
3829               objdef = oignore;
3830             }
3831           if (--parlev == 0)
3832             {
3833               switch (fvdef)
3834                 {
3835                 case fstartlist:
3836                 case finlist:
3837                   fvdef = flistseen;
3838                   break;
3839                 }
3840               if (!instruct
3841                   && (typdef == tend
3842                       || typdef == ttypeseen))
3843                 {
3844                   typdef = tignore;
3845                   make_C_tag (FALSE); /* a typedef */
3846                 }
3847             }
3848           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3849             parlev = 0;
3850           break;
3851         case '{':
3852           if (definedef != dnone)
3853             break;
3854           if (typdef == ttypeseen)
3855             {
3856               /* Whenever typdef is set to tinbody (currently only
3857                  here), typdefbracelev should be set to bracelev. */
3858               typdef = tinbody;
3859               typdefbracelev = bracelev;
3860             }
3861           switch (fvdef)
3862             {
3863             case flistseen:
3864               make_C_tag (TRUE);    /* a function */
3865               /* FALLTHRU */
3866             case fignore:
3867               fvdef = fvnone;
3868               break;
3869             case fvnone:
3870               switch (objdef)
3871                 {
3872                 case otagseen:
3873                   make_C_tag (TRUE); /* an Objective C class */
3874                   objdef = oignore;
3875                   break;
3876                 case omethodtag:
3877                 case omethodparm:
3878                   make_C_tag (TRUE); /* an Objective C method */
3879                   objdef = oinbody;
3880                   break;
3881                 default:
3882                   /* Neutralize `extern "C" {' grot. */
3883                   if (bracelev == 0 && structdef == snone && nestlev == 0
3884                       && typdef == tnone)
3885                     bracelev = -1;
3886                 }
3887               break;
3888             }
3889           switch (structdef)
3890             {
3891             case skeyseen:         /* unnamed struct */
3892               pushclass_above (bracelev, NULL, 0);
3893               structdef = snone;
3894               break;
3895             case stagseen:         /* named struct or enum */
3896             case scolonseen:       /* a class */
3897               pushclass_above (bracelev,token.line+token.offset, token.length);
3898               structdef = snone;
3899               make_C_tag (FALSE);  /* a struct or enum */
3900               break;
3901             }
3902           bracelev++;
3903           break;
3904         case '*':
3905           if (definedef != dnone)
3906             break;
3907           if (fvdef == fstartlist)
3908             {
3909               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3910               token.valid = FALSE;
3911             }
3912           break;
3913         case '}':
3914           if (definedef != dnone)
3915             break;
3916           if (!ignoreindent && lp == newlb.buffer + 1)
3917             {
3918               if (bracelev != 0)
3919                 token.valid = FALSE;
3920               bracelev = 0;     /* reset brace level if first column */
3921               parlev = 0;       /* also reset paren level, just in case... */
3922             }
3923           else if (bracelev > 0)
3924             bracelev--;
3925           else
3926             token.valid = FALSE; /* something gone amiss, token unreliable */
3927           popclass_above (bracelev);
3928           structdef = snone;
3929           /* Only if typdef == tinbody is typdefbracelev significant. */
3930           if (typdef == tinbody && bracelev <= typdefbracelev)
3931             {
3932               assert (bracelev == typdefbracelev);
3933               typdef = tend;
3934             }
3935           break;
3936         case '=':
3937           if (definedef != dnone)
3938             break;
3939           switch (fvdef)
3940             {
3941             case foperator:
3942             case finlist:
3943             case fignore:
3944             case vignore:
3945               break;
3946             case fvnameseen:
3947               if ((members && bracelev == 1)
3948                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3949                 make_C_tag (FALSE); /* a variable */
3950               /* FALLTHRU */
3951             default:
3952               fvdef = vignore;
3953             }
3954           break;
3955         case '<':
3956           if (cplpl
3957               && (structdef == stagseen || fvdef == fvnameseen))
3958             {
3959               templatelev++;
3960               break;
3961             }
3962           goto resetfvdef;
3963         case '>':
3964           if (templatelev > 0)
3965             {
3966               templatelev--;
3967               break;
3968             }
3969           goto resetfvdef;
3970         case '+':
3971         case '-':
3972           if (objdef == oinbody && bracelev == 0)
3973             {
3974               objdef = omethodsign;
3975               break;
3976             }
3977           /* FALLTHRU */
3978         resetfvdef:
3979         case '#': case '~': case '&': case '%': case '/':
3980         case '|': case '^': case '!': case '.': case '?':
3981           if (definedef != dnone)
3982             break;
3983           /* These surely cannot follow a function tag in C. */
3984           switch (fvdef)
3985             {
3986             case foperator:
3987             case finlist:
3988             case fignore:
3989             case vignore:
3990               break;
3991             default:
3992               fvdef = fvnone;
3993             }
3994           break;
3995         case '\0':
3996           if (objdef == otagseen)
3997             {
3998               make_C_tag (TRUE); /* an Objective C class */
3999               objdef = oignore;
4000             }
4001           /* If a macro spans multiple lines don't reset its state. */
4002           if (quotednl)
4003             CNL_SAVE_DEFINEDEF ();
4004           else
4005             CNL ();
4006           break;
4007         } /* switch (c) */
4008
4009     } /* while not eof */
4010
4011   free (lbs[0].lb.buffer);
4012   free (lbs[1].lb.buffer);
4013 }
4014
4015 /*
4016  * Process either a C++ file or a C file depending on the setting
4017  * of a global flag.
4018  */
4019 static void
4020 default_C_entries (inf)
4021      FILE *inf;
4022 {
4023   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4024 }
4025
4026 /* Always do plain C. */
4027 static void
4028 plain_C_entries (inf)
4029      FILE *inf;
4030 {
4031   C_entries (0, inf);
4032 }
4033
4034 /* Always do C++. */
4035 static void
4036 Cplusplus_entries (inf)
4037      FILE *inf;
4038 {
4039   C_entries (C_PLPL, inf);
4040 }
4041
4042 /* Always do Java. */
4043 static void
4044 Cjava_entries (inf)
4045      FILE *inf;
4046 {
4047   C_entries (C_JAVA, inf);
4048 }
4049
4050 /* Always do C*. */
4051 static void
4052 Cstar_entries (inf)
4053      FILE *inf;
4054 {
4055   C_entries (C_STAR, inf);
4056 }
4057
4058 /* Always do Yacc. */
4059 static void
4060 Yacc_entries (inf)
4061      FILE *inf;
4062 {
4063   C_entries (YACC, inf);
4064 }
4065
4066 \f
4067 /* Useful macros. */
4068 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4069   for (;                        /* loop initialization */               \
4070        !feof (file_pointer)     /* loop test */                         \
4071        &&                       /* instructions at start of loop */     \
4072           (readline (&line_buffer, file_pointer),                       \
4073            char_pointer = line_buffer.buffer,                           \
4074            TRUE);                                                       \
4075       )
4076
4077 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4078   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4079    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4080    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4081    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4082
4083 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4084 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4085   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4086    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4087    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4088
4089 /*
4090  * Read a file, but do no processing.  This is used to do regexp
4091  * matching on files that have no language defined.
4092  */
4093 static void
4094 just_read_file (inf)
4095      FILE *inf;
4096 {
4097   register char *dummy;
4098
4099   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4100     continue;
4101 }
4102
4103 \f
4104 /* Fortran parsing */
4105
4106 static void F_takeprec __P((void));
4107 static void F_getit __P((FILE *));
4108
4109 static void
4110 F_takeprec ()
4111 {
4112   dbp = skip_spaces (dbp);
4113   if (*dbp != '*')
4114     return;
4115   dbp++;
4116   dbp = skip_spaces (dbp);
4117   if (strneq (dbp, "(*)", 3))
4118     {
4119       dbp += 3;
4120       return;
4121     }
4122   if (!ISDIGIT (*dbp))
4123     {
4124       --dbp;                    /* force failure */
4125       return;
4126     }
4127   do
4128     dbp++;
4129   while (ISDIGIT (*dbp));
4130 }
4131
4132 static void
4133 F_getit (inf)
4134      FILE *inf;
4135 {
4136   register char *cp;
4137
4138   dbp = skip_spaces (dbp);
4139   if (*dbp == '\0')
4140     {
4141       readline (&lb, inf);
4142       dbp = lb.buffer;
4143       if (dbp[5] != '&')
4144         return;
4145       dbp += 6;
4146       dbp = skip_spaces (dbp);
4147     }
4148   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4149     return;
4150   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4151     continue;
4152   make_tag (dbp, cp-dbp, TRUE,
4153             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4154 }
4155
4156
4157 static void
4158 Fortran_functions (inf)
4159      FILE *inf;
4160 {
4161   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4162     {
4163       if (*dbp == '%')
4164         dbp++;                  /* Ratfor escape to fortran */
4165       dbp = skip_spaces (dbp);
4166       if (*dbp == '\0')
4167         continue;
4168       switch (lowcase (*dbp))
4169         {
4170         case 'i':
4171           if (nocase_tail ("integer"))
4172             F_takeprec ();
4173           break;
4174         case 'r':
4175           if (nocase_tail ("real"))
4176             F_takeprec ();
4177           break;
4178         case 'l':
4179           if (nocase_tail ("logical"))
4180             F_takeprec ();
4181           break;
4182         case 'c':
4183           if (nocase_tail ("complex") || nocase_tail ("character"))
4184             F_takeprec ();
4185           break;
4186         case 'd':
4187           if (nocase_tail ("double"))
4188             {
4189               dbp = skip_spaces (dbp);
4190               if (*dbp == '\0')
4191                 continue;
4192               if (nocase_tail ("precision"))
4193                 break;
4194               continue;
4195             }
4196           break;
4197         }
4198       dbp = skip_spaces (dbp);
4199       if (*dbp == '\0')
4200         continue;
4201       switch (lowcase (*dbp))
4202         {
4203         case 'f':
4204           if (nocase_tail ("function"))
4205             F_getit (inf);
4206           continue;
4207         case 's':
4208           if (nocase_tail ("subroutine"))
4209             F_getit (inf);
4210           continue;
4211         case 'e':
4212           if (nocase_tail ("entry"))
4213             F_getit (inf);
4214           continue;
4215         case 'b':
4216           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4217             {
4218               dbp = skip_spaces (dbp);
4219               if (*dbp == '\0') /* assume un-named */
4220                 make_tag ("blockdata", 9, TRUE,
4221                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4222               else
4223                 F_getit (inf);  /* look for name */
4224             }
4225           continue;
4226         }
4227     }
4228 }
4229
4230 \f
4231 /*
4232  * Ada parsing
4233  * Original code by
4234  * Philippe Waroquiers (1998)
4235  */
4236
4237 static void Ada_getit __P((FILE *, char *));
4238
4239 /* Once we are positioned after an "interesting" keyword, let's get
4240    the real tag value necessary. */
4241 static void
4242 Ada_getit (inf, name_qualifier)
4243      FILE *inf;
4244      char *name_qualifier;
4245 {
4246   register char *cp;
4247   char *name;
4248   char c;
4249
4250   while (!feof (inf))
4251     {
4252       dbp = skip_spaces (dbp);
4253       if (*dbp == '\0'
4254           || (dbp[0] == '-' && dbp[1] == '-'))
4255         {
4256           readline (&lb, inf);
4257           dbp = lb.buffer;
4258         }
4259       switch (lowcase(*dbp))
4260         {
4261         case 'b':
4262           if (nocase_tail ("body"))
4263             {
4264               /* Skipping body of   procedure body   or   package body or ....
4265                  resetting qualifier to body instead of spec. */
4266               name_qualifier = "/b";
4267               continue;
4268             }
4269           break;
4270         case 't':
4271           /* Skipping type of   task type   or   protected type ... */
4272           if (nocase_tail ("type"))
4273             continue;
4274           break;
4275         }
4276       if (*dbp == '"')
4277         {
4278           dbp += 1;
4279           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4280             continue;
4281         }
4282       else
4283         {
4284           dbp = skip_spaces (dbp);
4285           for (cp = dbp;
4286                (*cp != '\0'
4287                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4288                cp++)
4289             continue;
4290           if (cp == dbp)
4291             return;
4292         }
4293       c = *cp;
4294       *cp = '\0';
4295       name = concat (dbp, name_qualifier, "");
4296       *cp = c;
4297       make_tag (name, strlen (name), TRUE,
4298                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4299       free (name);
4300       if (c == '"')
4301         dbp = cp + 1;
4302       return;
4303     }
4304 }
4305
4306 static void
4307 Ada_funcs (inf)
4308      FILE *inf;
4309 {
4310   bool inquote = FALSE;
4311   bool skip_till_semicolumn = FALSE;
4312
4313   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4314     {
4315       while (*dbp != '\0')
4316         {
4317           /* Skip a string i.e. "abcd". */
4318           if (inquote || (*dbp == '"'))
4319             {
4320               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4321               if (dbp != NULL)
4322                 {
4323                   inquote = FALSE;
4324                   dbp += 1;
4325                   continue;     /* advance char */
4326                 }
4327               else
4328                 {
4329                   inquote = TRUE;
4330                   break;        /* advance line */
4331                 }
4332             }
4333
4334           /* Skip comments. */
4335           if (dbp[0] == '-' && dbp[1] == '-')
4336             break;              /* advance line */
4337
4338           /* Skip character enclosed in single quote i.e. 'a'
4339              and skip single quote starting an attribute i.e. 'Image. */
4340           if (*dbp == '\'')
4341             {
4342               dbp++ ;
4343               if (*dbp != '\0')
4344                 dbp++;
4345               continue;
4346             }
4347
4348           if (skip_till_semicolumn)
4349             {
4350               if (*dbp == ';')
4351                 skip_till_semicolumn = FALSE;
4352               dbp++;
4353               continue;         /* advance char */
4354             }
4355
4356           /* Search for beginning of a token.  */
4357           if (!begtoken (*dbp))
4358             {
4359               dbp++;
4360               continue;         /* advance char */
4361             }
4362
4363           /* We are at the beginning of a token. */
4364           switch (lowcase(*dbp))
4365             {
4366             case 'f':
4367               if (!packages_only && nocase_tail ("function"))
4368                 Ada_getit (inf, "/f");
4369               else
4370                 break;          /* from switch */
4371               continue;         /* advance char */
4372             case 'p':
4373               if (!packages_only && nocase_tail ("procedure"))
4374                 Ada_getit (inf, "/p");
4375               else if (nocase_tail ("package"))
4376                 Ada_getit (inf, "/s");
4377               else if (nocase_tail ("protected")) /* protected type */
4378                 Ada_getit (inf, "/t");
4379               else
4380                 break;          /* from switch */
4381               continue;         /* advance char */
4382
4383             case 'u':
4384               if (typedefs && !packages_only && nocase_tail ("use"))
4385                 {
4386                   /* when tagging types, avoid tagging  use type Pack.Typename;
4387                      for this, we will skip everything till a ; */
4388                   skip_till_semicolumn = TRUE;
4389                   continue;     /* advance char */
4390                 }
4391
4392             case 't':
4393               if (!packages_only && nocase_tail ("task"))
4394                 Ada_getit (inf, "/k");
4395               else if (typedefs && !packages_only && nocase_tail ("type"))
4396                 {
4397                   Ada_getit (inf, "/t");
4398                   while (*dbp != '\0')
4399                     dbp += 1;
4400                 }
4401               else
4402                 break;          /* from switch */
4403               continue;         /* advance char */
4404             }
4405
4406           /* Look for the end of the token. */
4407           while (!endtoken (*dbp))
4408             dbp++;
4409
4410         } /* advance char */
4411     } /* advance line */
4412 }
4413
4414 \f
4415 /*
4416  * Unix and microcontroller assembly tag handling
4417  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4418  * Idea by Bob Weiner, Motorola Inc. (1994)
4419  */
4420 static void
4421 Asm_labels (inf)
4422      FILE *inf;
4423 {
4424   register char *cp;
4425
4426   LOOP_ON_INPUT_LINES (inf, lb, cp)
4427     {
4428       /* If first char is alphabetic or one of [_.$], test for colon
4429          following identifier. */
4430       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4431         {
4432           /* Read past label. */
4433           cp++;
4434           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4435             cp++;
4436           if (*cp == ':' || iswhite (*cp))
4437             /* Found end of label, so copy it and add it to the table. */
4438             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4439                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4440         }
4441     }
4442 }
4443
4444 \f
4445 /*
4446  * Perl support
4447  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4448  * Perl variable names: /^(my|local).../
4449  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4450  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4451  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4452  */
4453 static void
4454 Perl_functions (inf)
4455      FILE *inf;
4456 {
4457   char *package = savestr ("main"); /* current package name */
4458   register char *cp;
4459
4460   LOOP_ON_INPUT_LINES (inf, lb, cp)
4461     {
4462       skip_spaces(cp);
4463
4464       if (LOOKING_AT (cp, "package"))
4465         {
4466           free (package);
4467           get_tag (cp, &package);
4468         }
4469       else if (LOOKING_AT (cp, "sub"))
4470         {
4471           char *pos;
4472           char *sp = cp;
4473
4474           while (!notinname (*cp))
4475             cp++;
4476           if (cp == sp)
4477             continue;           /* nothing found */
4478           if ((pos = etags_strchr (sp, ':')) != NULL
4479               && pos < cp && pos[1] == ':')
4480             /* The name is already qualified. */
4481             make_tag (sp, cp - sp, TRUE,
4482                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4483           else
4484             /* Qualify it. */
4485             {
4486               char savechar, *name;
4487
4488               savechar = *cp;
4489               *cp = '\0';
4490               name = concat (package, "::", sp);
4491               *cp = savechar;
4492               make_tag (name, strlen(name), TRUE,
4493                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4494               free (name);
4495             }
4496         }
4497        else if (globals)        /* only if we are tagging global vars */
4498         {
4499           /* Skip a qualifier, if any. */
4500           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4501           /* After "my" or "local", but before any following paren or space. */
4502           char *varstart = cp;
4503
4504           if (qual              /* should this be removed?  If yes, how? */
4505               && (*cp == '$' || *cp == '@' || *cp == '%'))
4506             {
4507               varstart += 1;
4508               do
4509                 cp++;
4510               while (ISALNUM (*cp) || *cp == '_');
4511             }
4512           else if (qual)
4513             {
4514               /* Should be examining a variable list at this point;
4515                  could insist on seeing an open parenthesis. */
4516               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4517                 cp++;
4518             }
4519           else
4520             continue;
4521
4522           make_tag (varstart, cp - varstart, FALSE,
4523                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4524         }
4525     }
4526   free (package);
4527 }
4528
4529
4530 /*
4531  * Python support
4532  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4533  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4534  * More ideas by seb bacon <seb@jamkit.com> (2002)
4535  */
4536 static void
4537 Python_functions (inf)
4538      FILE *inf;
4539 {
4540   register char *cp;
4541
4542   LOOP_ON_INPUT_LINES (inf, lb, cp)
4543     {
4544       cp = skip_spaces (cp);
4545       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4546         {
4547           char *name = cp;
4548           while (!notinname (*cp) && *cp != ':')
4549             cp++;
4550           make_tag (name, cp - name, TRUE,
4551                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4552         }
4553     }
4554 }
4555
4556 \f
4557 /*
4558  * PHP support
4559  * Look for:
4560  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4561  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4562  *  - /^[ \t]*define\(\"[^\"]+/
4563  * Only with --members:
4564  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4565  * Idea by Diez B. Roggisch (2001)
4566  */
4567 static void
4568 PHP_functions (inf)
4569      FILE *inf;
4570 {
4571   register char *cp, *name;
4572   bool search_identifier = FALSE;
4573
4574   LOOP_ON_INPUT_LINES (inf, lb, cp)
4575     {
4576       cp = skip_spaces (cp);
4577       name = cp;
4578       if (search_identifier
4579           && *cp != '\0')
4580         {
4581           while (!notinname (*cp))
4582             cp++;
4583           make_tag (name, cp - name, TRUE,
4584                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4585           search_identifier = FALSE;
4586         }
4587       else if (LOOKING_AT (cp, "function"))
4588         {
4589           if(*cp == '&')
4590             cp = skip_spaces (cp+1);
4591           if(*cp != '\0')
4592             {
4593               name = cp;
4594               while (!notinname (*cp))
4595                 cp++;
4596               make_tag (name, cp - name, TRUE,
4597                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4598             }
4599           else
4600             search_identifier = TRUE;
4601         }
4602       else if (LOOKING_AT (cp, "class"))
4603         {
4604           if (*cp != '\0')
4605             {
4606               name = cp;
4607               while (*cp != '\0' && !iswhite (*cp))
4608                 cp++;
4609               make_tag (name, cp - name, FALSE,
4610                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4611             }
4612           else
4613             search_identifier = TRUE;
4614         }
4615       else if (strneq (cp, "define", 6)
4616                && (cp = skip_spaces (cp+6))
4617                && *cp++ == '('
4618                && (*cp == '"' || *cp == '\''))
4619         {
4620           char quote = *cp++;
4621           name = cp;
4622           while (*cp != quote && *cp != '\0')
4623             cp++;
4624           make_tag (name, cp - name, FALSE,
4625                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4626         }
4627       else if (members
4628                && LOOKING_AT (cp, "var")
4629                && *cp == '$')
4630         {
4631           name = cp;
4632           while (!notinname(*cp))
4633             cp++;
4634           make_tag (name, cp - name, FALSE,
4635                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4636         }
4637     }
4638 }
4639
4640 \f
4641 /*
4642  * Cobol tag functions
4643  * We could look for anything that could be a paragraph name.
4644  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4645  * Idea by Corny de Souza (1993)
4646  */
4647 static void
4648 Cobol_paragraphs (inf)
4649      FILE *inf;
4650 {
4651   register char *bp, *ep;
4652
4653   LOOP_ON_INPUT_LINES (inf, lb, bp)
4654     {
4655       if (lb.len < 9)
4656         continue;
4657       bp += 8;
4658
4659       /* If eoln, compiler option or comment ignore whole line. */
4660       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4661         continue;
4662
4663       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4664         continue;
4665       if (*ep++ == '.')
4666         make_tag (bp, ep - bp, TRUE,
4667                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4668     }
4669 }
4670
4671 \f
4672 /*
4673  * Makefile support
4674  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4675  */
4676 static void
4677 Makefile_targets (inf)
4678      FILE *inf;
4679 {
4680   register char *bp;
4681
4682   LOOP_ON_INPUT_LINES (inf, lb, bp)
4683     {
4684       if (*bp == '\t' || *bp == '#')
4685         continue;
4686       while (*bp != '\0' && *bp != '=' && *bp != ':')
4687         bp++;
4688       if (*bp == ':' || (globals && *bp == '='))
4689         {
4690           /* We should detect if there is more than one tag, but we do not.
4691              We just skip initial and final spaces. */
4692           char * namestart = skip_spaces (lb.buffer);
4693           while (--bp > namestart)
4694             if (!notinname (*bp))
4695               break;
4696           make_tag (namestart, bp - namestart + 1, TRUE,
4697                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4698         }
4699     }
4700 }
4701
4702 \f
4703 /*
4704  * Pascal parsing
4705  * Original code by Mosur K. Mohan (1989)
4706  *
4707  *  Locates tags for procedures & functions.  Doesn't do any type- or
4708  *  var-definitions.  It does look for the keyword "extern" or
4709  *  "forward" immediately following the procedure statement; if found,
4710  *  the tag is skipped.
4711  */
4712 static void
4713 Pascal_functions (inf)
4714      FILE *inf;
4715 {
4716   linebuffer tline;             /* mostly copied from C_entries */
4717   long save_lcno;
4718   int save_lineno, namelen, taglen;
4719   char c, *name;
4720
4721   bool                          /* each of these flags is TRUE iff: */
4722     incomment,                  /* point is inside a comment */
4723     inquote,                    /* point is inside '..' string */
4724     get_tagname,                /* point is after PROCEDURE/FUNCTION
4725                                    keyword, so next item = potential tag */
4726     found_tag,                  /* point is after a potential tag */
4727     inparms,                    /* point is within parameter-list */
4728     verify_tag;                 /* point has passed the parm-list, so the
4729                                    next token will determine whether this
4730                                    is a FORWARD/EXTERN to be ignored, or
4731                                    whether it is a real tag */
4732
4733   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4734   name = NULL;                  /* keep compiler quiet */
4735   dbp = lb.buffer;
4736   *dbp = '\0';
4737   linebuffer_init (&tline);
4738
4739   incomment = inquote = FALSE;
4740   found_tag = FALSE;            /* have a proc name; check if extern */
4741   get_tagname = FALSE;          /* found "procedure" keyword         */
4742   inparms = FALSE;              /* found '(' after "proc"            */
4743   verify_tag = FALSE;           /* check if "extern" is ahead        */
4744
4745
4746   while (!feof (inf))           /* long main loop to get next char */
4747     {
4748       c = *dbp++;
4749       if (c == '\0')            /* if end of line */
4750         {
4751           readline (&lb, inf);
4752           dbp = lb.buffer;
4753           if (*dbp == '\0')
4754             continue;
4755           if (!((found_tag && verify_tag)
4756                 || get_tagname))
4757             c = *dbp++;         /* only if don't need *dbp pointing
4758                                    to the beginning of the name of
4759                                    the procedure or function */
4760         }
4761       if (incomment)
4762         {
4763           if (c == '}')         /* within { } comments */
4764             incomment = FALSE;
4765           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4766             {
4767               dbp++;
4768               incomment = FALSE;
4769             }
4770           continue;
4771         }
4772       else if (inquote)
4773         {
4774           if (c == '\'')
4775             inquote = FALSE;
4776           continue;
4777         }
4778       else
4779         switch (c)
4780           {
4781           case '\'':
4782             inquote = TRUE;     /* found first quote */
4783             continue;
4784           case '{':             /* found open { comment */
4785             incomment = TRUE;
4786             continue;
4787           case '(':
4788             if (*dbp == '*')    /* found open (* comment */
4789               {
4790                 incomment = TRUE;
4791                 dbp++;
4792               }
4793             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4794               inparms = TRUE;
4795             continue;
4796           case ')':             /* end of parms list */
4797             if (inparms)
4798               inparms = FALSE;
4799             continue;
4800           case ';':
4801             if (found_tag && !inparms) /* end of proc or fn stmt */
4802               {
4803                 verify_tag = TRUE;
4804                 break;
4805               }
4806             continue;
4807           }
4808       if (found_tag && verify_tag && (*dbp != ' '))
4809         {
4810           /* Check if this is an "extern" declaration. */
4811           if (*dbp == '\0')
4812             continue;
4813           if (lowcase (*dbp == 'e'))
4814             {
4815               if (nocase_tail ("extern")) /* superfluous, really! */
4816                 {
4817                   found_tag = FALSE;
4818                   verify_tag = FALSE;
4819                 }
4820             }
4821           else if (lowcase (*dbp) == 'f')
4822             {
4823               if (nocase_tail ("forward")) /* check for forward reference */
4824                 {
4825                   found_tag = FALSE;
4826                   verify_tag = FALSE;
4827                 }
4828             }
4829           if (found_tag && verify_tag) /* not external proc, so make tag */
4830             {
4831               found_tag = FALSE;
4832               verify_tag = FALSE;
4833               make_tag (name, namelen, TRUE,
4834                         tline.buffer, taglen, save_lineno, save_lcno);
4835               continue;
4836             }
4837         }
4838       if (get_tagname)          /* grab name of proc or fn */
4839         {
4840           char *cp;
4841
4842           if (*dbp == '\0')
4843             continue;
4844
4845           /* Find block name. */
4846           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4847             continue;
4848
4849           /* Save all values for later tagging. */
4850           linebuffer_setlen (&tline, lb.len);
4851           strcpy (tline.buffer, lb.buffer);
4852           save_lineno = lineno;
4853           save_lcno = linecharno;
4854           name = tline.buffer + (dbp - lb.buffer);
4855           namelen = cp - dbp;
4856           taglen = cp - lb.buffer + 1;
4857
4858           dbp = cp;             /* set dbp to e-o-token */
4859           get_tagname = FALSE;
4860           found_tag = TRUE;
4861           continue;
4862
4863           /* And proceed to check for "extern". */
4864         }
4865       else if (!incomment && !inquote && !found_tag)
4866         {
4867           /* Check for proc/fn keywords. */
4868           switch (lowcase (c))
4869             {
4870             case 'p':
4871               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4872                 get_tagname = TRUE;
4873               continue;
4874             case 'f':
4875               if (nocase_tail ("unction"))
4876                 get_tagname = TRUE;
4877               continue;
4878             }
4879         }
4880     } /* while not eof */
4881
4882   free (tline.buffer);
4883 }
4884
4885 \f
4886 /*
4887  * Lisp tag functions
4888  *  look for (def or (DEF, quote or QUOTE
4889  */
4890
4891 static void L_getit __P((void));
4892
4893 static void
4894 L_getit ()
4895 {
4896   if (*dbp == '\'')             /* Skip prefix quote */
4897     dbp++;
4898   else if (*dbp == '(')
4899   {
4900     dbp++;
4901     /* Try to skip "(quote " */
4902     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4903       /* Ok, then skip "(" before name in (defstruct (foo)) */
4904       dbp = skip_spaces (dbp);
4905   }
4906   get_tag (dbp, NULL);
4907 }
4908
4909 static void
4910 Lisp_functions (inf)
4911      FILE *inf;
4912 {
4913   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4914     {
4915       if (dbp[0] != '(')
4916         continue;
4917
4918       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4919         {
4920           dbp = skip_non_spaces (dbp);
4921           dbp = skip_spaces (dbp);
4922           L_getit ();
4923         }
4924       else
4925         {
4926           /* Check for (foo::defmumble name-defined ... */
4927           do
4928             dbp++;
4929           while (!notinname (*dbp) && *dbp != ':');
4930           if (*dbp == ':')
4931             {
4932               do
4933                 dbp++;
4934               while (*dbp == ':');
4935
4936               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4937                 {
4938                   dbp = skip_non_spaces (dbp);
4939                   dbp = skip_spaces (dbp);
4940                   L_getit ();
4941                 }
4942             }
4943         }
4944     }
4945 }
4946
4947 \f
4948 /*
4949  * Lua script language parsing
4950  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4951  *
4952  *  "function" and "local function" are tags if they start at column 1.
4953  */
4954 static void
4955 Lua_functions (inf)
4956      FILE *inf;
4957 {
4958   register char *bp;
4959
4960   LOOP_ON_INPUT_LINES (inf, lb, bp)
4961     {
4962       if (bp[0] != 'f' && bp[0] != 'l')
4963         continue;
4964
4965       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4966
4967       if (LOOKING_AT (bp, "function"))
4968         get_tag (bp, NULL);
4969     }
4970 }
4971
4972 \f
4973 /*
4974  * Postscript tags
4975  * Just look for lines where the first character is '/'
4976  * Also look at "defineps" for PSWrap
4977  * Ideas by:
4978  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4979  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4980  */
4981 static void
4982 PS_functions (inf)
4983      FILE *inf;
4984 {
4985   register char *bp, *ep;
4986
4987   LOOP_ON_INPUT_LINES (inf, lb, bp)
4988     {
4989       if (bp[0] == '/')
4990         {
4991           for (ep = bp+1;
4992                *ep != '\0' && *ep != ' ' && *ep != '{';
4993                ep++)
4994             continue;
4995           make_tag (bp, ep - bp, TRUE,
4996                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4997         }
4998       else if (LOOKING_AT (bp, "defineps"))
4999         get_tag (bp, NULL);
5000     }
5001 }
5002
5003 \f
5004 /*
5005  * Forth tags
5006  * Ignore anything after \ followed by space or in ( )
5007  * Look for words defined by :
5008  * Look for constant, code, create, defer, value, and variable
5009  * OBP extensions:  Look for buffer:, field,
5010  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5011  */
5012 static void
5013 Forth_words (inf)
5014      FILE *inf;
5015 {
5016   register char *bp;
5017
5018   LOOP_ON_INPUT_LINES (inf, lb, bp)
5019     while ((bp = skip_spaces (bp))[0] != '\0')
5020       if (bp[0] == '\\' && iswhite(bp[1]))
5021         break;                  /* read next line */
5022       else if (bp[0] == '(' && iswhite(bp[1]))
5023         do                      /* skip to ) or eol */
5024           bp++;
5025         while (*bp != ')' && *bp != '\0');
5026       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5027                || LOOKING_AT_NOCASE (bp, "constant")
5028                || LOOKING_AT_NOCASE (bp, "code")
5029                || LOOKING_AT_NOCASE (bp, "create")
5030                || LOOKING_AT_NOCASE (bp, "defer")
5031                || LOOKING_AT_NOCASE (bp, "value")
5032                || LOOKING_AT_NOCASE (bp, "variable")
5033                || LOOKING_AT_NOCASE (bp, "buffer:")
5034                || LOOKING_AT_NOCASE (bp, "field"))
5035         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5036       else
5037         bp = skip_non_spaces (bp);
5038 }
5039
5040 \f
5041 /*
5042  * Scheme tag functions
5043  * look for (def... xyzzy
5044  *          (def... (xyzzy
5045  *          (def ... ((...(xyzzy ....
5046  *          (set! xyzzy
5047  * Original code by Ken Haase (1985?)
5048  */
5049 static void
5050 Scheme_functions (inf)
5051      FILE *inf;
5052 {
5053   register char *bp;
5054
5055   LOOP_ON_INPUT_LINES (inf, lb, bp)
5056     {
5057       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5058         {
5059           bp = skip_non_spaces (bp+4);
5060           /* Skip over open parens and white space */
5061           while (notinname (*bp))
5062             bp++;
5063           get_tag (bp, NULL);
5064         }
5065       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5066         get_tag (bp, NULL);
5067     }
5068 }
5069
5070 \f
5071 /* Find tags in TeX and LaTeX input files.  */
5072
5073 /* TEX_toktab is a table of TeX control sequences that define tags.
5074  * Each entry records one such control sequence.
5075  *
5076  * Original code from who knows whom.
5077  * Ideas by:
5078  *   Stefan Monnier (2002)
5079  */
5080
5081 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5082
5083 /* Default set of control sequences to put into TEX_toktab.
5084    The value of environment var TEXTAGS is prepended to this.  */
5085 static char *TEX_defenv = "\
5086 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5087 :part:appendix:entry:index:def\
5088 :newcommand:renewcommand:newenvironment:renewenvironment";
5089
5090 static void TEX_mode __P((FILE *));
5091 static void TEX_decode_env __P((char *, char *));
5092
5093 static char TEX_esc = '\\';
5094 static char TEX_opgrp = '{';
5095 static char TEX_clgrp = '}';
5096
5097 /*
5098  * TeX/LaTeX scanning loop.
5099  */
5100 static void
5101 TeX_commands (inf)
5102      FILE *inf;
5103 {
5104   char *cp;
5105   linebuffer *key;
5106
5107   /* Select either \ or ! as escape character.  */
5108   TEX_mode (inf);
5109
5110   /* Initialize token table once from environment. */
5111   if (TEX_toktab == NULL)
5112     TEX_decode_env ("TEXTAGS", TEX_defenv);
5113
5114   LOOP_ON_INPUT_LINES (inf, lb, cp)
5115     {
5116       /* Look at each TEX keyword in line. */
5117       for (;;)
5118         {
5119           /* Look for a TEX escape. */
5120           while (*cp++ != TEX_esc)
5121             if (cp[-1] == '\0' || cp[-1] == '%')
5122               goto tex_next_line;
5123
5124           for (key = TEX_toktab; key->buffer != NULL; key++)
5125             if (strneq (cp, key->buffer, key->len))
5126               {
5127                 register char *p;
5128                 int namelen, linelen;
5129                 bool opgrp = FALSE;
5130
5131                 cp = skip_spaces (cp + key->len);
5132                 if (*cp == TEX_opgrp)
5133                   {
5134                     opgrp = TRUE;
5135                     cp++;
5136                   }
5137                 for (p = cp;
5138                      (!iswhite (*p) && *p != '#' &&
5139                       *p != TEX_opgrp && *p != TEX_clgrp);
5140                      p++)
5141                   continue;
5142                 namelen = p - cp;
5143                 linelen = lb.len;
5144                 if (!opgrp || *p == TEX_clgrp)
5145                   {
5146                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5147                       p++;
5148                     linelen = p - lb.buffer + 1;
5149                   }
5150                 make_tag (cp, namelen, TRUE,
5151                           lb.buffer, linelen, lineno, linecharno);
5152                 goto tex_next_line; /* We only tag a line once */
5153               }
5154         }
5155     tex_next_line:
5156       ;
5157     }
5158 }
5159
5160 #define TEX_LESC '\\'
5161 #define TEX_SESC '!'
5162
5163 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5164    chars accordingly. */
5165 static void
5166 TEX_mode (inf)
5167      FILE *inf;
5168 {
5169   int c;
5170
5171   while ((c = getc (inf)) != EOF)
5172     {
5173       /* Skip to next line if we hit the TeX comment char. */
5174       if (c == '%')
5175         while (c != '\n' && c != EOF)
5176           c = getc (inf);
5177       else if (c == TEX_LESC || c == TEX_SESC )
5178         break;
5179     }
5180
5181   if (c == TEX_LESC)
5182     {
5183       TEX_esc = TEX_LESC;
5184       TEX_opgrp = '{';
5185       TEX_clgrp = '}';
5186     }
5187   else
5188     {
5189       TEX_esc = TEX_SESC;
5190       TEX_opgrp = '<';
5191       TEX_clgrp = '>';
5192     }
5193   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5194      No attempt is made to correct the situation. */
5195   rewind (inf);
5196 }
5197
5198 /* Read environment and prepend it to the default string.
5199    Build token table. */
5200 static void
5201 TEX_decode_env (evarname, defenv)
5202      char *evarname;
5203      char *defenv;
5204 {
5205   register char *env, *p;
5206   int i, len;
5207
5208   /* Append default string to environment. */
5209   env = getenv (evarname);
5210   if (!env)
5211     env = defenv;
5212   else
5213     {
5214       char *oldenv = env;
5215       env = concat (oldenv, defenv, "");
5216     }
5217
5218   /* Allocate a token table */
5219   for (len = 1, p = env; p;)
5220     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5221       len++;
5222   TEX_toktab = xnew (len, linebuffer);
5223
5224   /* Unpack environment string into token table. Be careful about */
5225   /* zero-length strings (leading ':', "::" and trailing ':') */
5226   for (i = 0; *env != '\0';)
5227     {
5228       p = etags_strchr (env, ':');
5229       if (!p)                   /* End of environment string. */
5230         p = env + strlen (env);
5231       if (p - env > 0)
5232         {                       /* Only non-zero strings. */
5233           TEX_toktab[i].buffer = savenstr (env, p - env);
5234           TEX_toktab[i].len = p - env;
5235           i++;
5236         }
5237       if (*p)
5238         env = p + 1;
5239       else
5240         {
5241           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5242           TEX_toktab[i].len = 0;
5243           break;
5244         }
5245     }
5246 }
5247
5248 \f
5249 /* Texinfo support.  Dave Love, Mar. 2000.  */
5250 static void
5251 Texinfo_nodes (inf)
5252      FILE * inf;
5253 {
5254   char *cp, *start;
5255   LOOP_ON_INPUT_LINES (inf, lb, cp)
5256     if (LOOKING_AT (cp, "@node"))
5257       {
5258         start = cp;
5259         while (*cp != '\0' && *cp != ',')
5260           cp++;
5261         make_tag (start, cp - start, TRUE,
5262                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5263       }
5264 }
5265
5266 \f
5267 /*
5268  * HTML support.
5269  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5270  * Contents of <a name=xxx> are tags with name xxx.
5271  *
5272  * Francesco Potortì, 2002.
5273  */
5274 static void
5275 HTML_labels (inf)
5276      FILE * inf;
5277 {
5278   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5279   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5280   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5281   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5282   char *end;
5283
5284
5285   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5286
5287   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5288     for (;;)                    /* loop on the same line */
5289       {
5290         if (skiptag)            /* skip HTML tag */
5291           {
5292             while (*dbp != '\0' && *dbp != '>')
5293               dbp++;
5294             if (*dbp == '>')
5295               {
5296                 dbp += 1;
5297                 skiptag = FALSE;
5298                 continue;       /* look on the same line */
5299               }
5300             break;              /* go to next line */
5301           }
5302
5303         else if (intag) /* look for "name=" or "id=" */
5304           {
5305             while (*dbp != '\0' && *dbp != '>'
5306                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5307               dbp++;
5308             if (*dbp == '\0')
5309               break;            /* go to next line */
5310             if (*dbp == '>')
5311               {
5312                 dbp += 1;
5313                 intag = FALSE;
5314                 continue;       /* look on the same line */
5315               }
5316             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5317                 || LOOKING_AT_NOCASE (dbp, "id="))
5318               {
5319                 bool quoted = (dbp[0] == '"');
5320
5321                 if (quoted)
5322                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5323                     continue;
5324                 else
5325                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5326                     continue;
5327                 linebuffer_setlen (&token_name, end - dbp);
5328                 strncpy (token_name.buffer, dbp, end - dbp);
5329                 token_name.buffer[end - dbp] = '\0';
5330
5331                 dbp = end;
5332                 intag = FALSE;  /* we found what we looked for */
5333                 skiptag = TRUE; /* skip to the end of the tag */
5334                 getnext = TRUE; /* then grab the text */
5335                 continue;       /* look on the same line */
5336               }
5337             dbp += 1;
5338           }
5339
5340         else if (getnext)       /* grab next tokens and tag them */
5341           {
5342             dbp = skip_spaces (dbp);
5343             if (*dbp == '\0')
5344               break;            /* go to next line */
5345             if (*dbp == '<')
5346               {
5347                 intag = TRUE;
5348                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5349                 continue;       /* look on the same line */
5350               }
5351
5352             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5353               continue;
5354             make_tag (token_name.buffer, token_name.len, TRUE,
5355                       dbp, end - dbp, lineno, linecharno);
5356             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5357             getnext = FALSE;
5358             break;              /* go to next line */
5359           }
5360
5361         else                    /* look for an interesting HTML tag */
5362           {
5363             while (*dbp != '\0' && *dbp != '<')
5364               dbp++;
5365             if (*dbp == '\0')
5366               break;            /* go to next line */
5367             intag = TRUE;
5368             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5369               {
5370                 inanchor = TRUE;
5371                 continue;       /* look on the same line */
5372               }
5373             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5374                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5375                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5376                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5377               {
5378                 intag = FALSE;
5379                 getnext = TRUE;
5380                 continue;       /* look on the same line */
5381               }
5382             dbp += 1;
5383           }
5384       }
5385 }
5386
5387 \f
5388 /*
5389  * Prolog support
5390  *
5391  * Assumes that the predicate or rule starts at column 0.
5392  * Only the first clause of a predicate or rule is added.
5393  * Original code by Sunichirou Sugou (1989)
5394  * Rewritten by Anders Lindgren (1996)
5395  */
5396 static int prolog_pr __P((char *, char *));
5397 static void prolog_skip_comment __P((linebuffer *, FILE *));
5398 static int prolog_atom __P((char *, int));
5399
5400 static void
5401 Prolog_functions (inf)
5402      FILE *inf;
5403 {
5404   char *cp, *last;
5405   int len;
5406   int allocated;
5407
5408   allocated = 0;
5409   len = 0;
5410   last = NULL;
5411
5412   LOOP_ON_INPUT_LINES (inf, lb, cp)
5413     {
5414       if (cp[0] == '\0')        /* Empty line */
5415         continue;
5416       else if (iswhite (cp[0])) /* Not a predicate */
5417         continue;
5418       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5419         prolog_skip_comment (&lb, inf);
5420       else if ((len = prolog_pr (cp, last)) > 0)
5421         {
5422           /* Predicate or rule.  Store the function name so that we
5423              only generate a tag for the first clause.  */
5424           if (last == NULL)
5425             last = xnew(len + 1, char);
5426           else if (len + 1 > allocated)
5427             xrnew (last, len + 1, char);
5428           allocated = len + 1;
5429           strncpy (last, cp, len);
5430           last[len] = '\0';
5431         }
5432     }
5433   if (last != NULL)
5434     free (last);
5435 }
5436
5437
5438 static void
5439 prolog_skip_comment (plb, inf)
5440      linebuffer *plb;
5441      FILE *inf;
5442 {
5443   char *cp;
5444
5445   do
5446     {
5447       for (cp = plb->buffer; *cp != '\0'; cp++)
5448         if (cp[0] == '*' && cp[1] == '/')
5449           return;
5450       readline (plb, inf);
5451     }
5452   while (!feof(inf));
5453 }
5454
5455 /*
5456  * A predicate or rule definition is added if it matches:
5457  *     <beginning of line><Prolog Atom><whitespace>(
5458  * or  <beginning of line><Prolog Atom><whitespace>:-
5459  *
5460  * It is added to the tags database if it doesn't match the
5461  * name of the previous clause header.
5462  *
5463  * Return the size of the name of the predicate or rule, or 0 if no
5464  * header was found.
5465  */
5466 static int
5467 prolog_pr (s, last)
5468      char *s;
5469      char *last;                /* Name of last clause. */
5470 {
5471   int pos;
5472   int len;
5473
5474   pos = prolog_atom (s, 0);
5475   if (pos < 1)
5476     return 0;
5477
5478   len = pos;
5479   pos = skip_spaces (s + pos) - s;
5480
5481   if ((s[pos] == '.'
5482        || (s[pos] == '(' && (pos += 1))
5483        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5484       && (last == NULL          /* save only the first clause */
5485           || len != (int)strlen (last)
5486           || !strneq (s, last, len)))
5487         {
5488           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5489           return len;
5490         }
5491   else
5492     return 0;
5493 }
5494
5495 /*
5496  * Consume a Prolog atom.
5497  * Return the number of bytes consumed, or -1 if there was an error.
5498  *
5499  * A prolog atom, in this context, could be one of:
5500  * - An alphanumeric sequence, starting with a lower case letter.
5501  * - A quoted arbitrary string. Single quotes can escape themselves.
5502  *   Backslash quotes everything.
5503  */
5504 static int
5505 prolog_atom (s, pos)
5506      char *s;
5507      int pos;
5508 {
5509   int origpos;
5510
5511   origpos = pos;
5512
5513   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5514     {
5515       /* The atom is unquoted. */
5516       pos++;
5517       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5518         {
5519           pos++;
5520         }
5521       return pos - origpos;
5522     }
5523   else if (s[pos] == '\'')
5524     {
5525       pos++;
5526
5527       for (;;)
5528         {
5529           if (s[pos] == '\'')
5530             {
5531               pos++;
5532               if (s[pos] != '\'')
5533                 break;
5534               pos++;            /* A double quote */
5535             }
5536           else if (s[pos] == '\0')
5537             /* Multiline quoted atoms are ignored. */
5538             return -1;
5539           else if (s[pos] == '\\')
5540             {
5541               if (s[pos+1] == '\0')
5542                 return -1;
5543               pos += 2;
5544             }
5545           else
5546             pos++;
5547         }
5548       return pos - origpos;
5549     }
5550   else
5551     return -1;
5552 }
5553
5554 \f
5555 /*
5556  * Support for Erlang
5557  *
5558  * Generates tags for functions, defines, and records.
5559  * Assumes that Erlang functions start at column 0.
5560  * Original code by Anders Lindgren (1996)
5561  */
5562 static int erlang_func __P((char *, char *));
5563 static void erlang_attribute __P((char *));
5564 static int erlang_atom __P((char *));
5565
5566 static void
5567 Erlang_functions (inf)
5568      FILE *inf;
5569 {
5570   char *cp, *last;
5571   int len;
5572   int allocated;
5573
5574   allocated = 0;
5575   len = 0;
5576   last = NULL;
5577
5578   LOOP_ON_INPUT_LINES (inf, lb, cp)
5579     {
5580       if (cp[0] == '\0')        /* Empty line */
5581         continue;
5582       else if (iswhite (cp[0])) /* Not function nor attribute */
5583         continue;
5584       else if (cp[0] == '%')    /* comment */
5585         continue;
5586       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5587         continue;
5588       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5589         {
5590           erlang_attribute (cp);
5591           if (last != NULL)
5592             {
5593               free (last);
5594               last = NULL;
5595             }
5596         }
5597       else if ((len = erlang_func (cp, last)) > 0)
5598         {
5599           /*
5600            * Function.  Store the function name so that we only
5601            * generates a tag for the first clause.
5602            */
5603           if (last == NULL)
5604             last = xnew (len + 1, char);
5605           else if (len + 1 > allocated)
5606             xrnew (last, len + 1, char);
5607           allocated = len + 1;
5608           strncpy (last, cp, len);
5609           last[len] = '\0';
5610         }
5611     }
5612   if (last != NULL)
5613     free (last);
5614 }
5615
5616
5617 /*
5618  * A function definition is added if it matches:
5619  *     <beginning of line><Erlang Atom><whitespace>(
5620  *
5621  * It is added to the tags database if it doesn't match the
5622  * name of the previous clause header.
5623  *
5624  * Return the size of the name of the function, or 0 if no function
5625  * was found.
5626  */
5627 static int
5628 erlang_func (s, last)
5629      char *s;
5630      char *last;                /* Name of last clause. */
5631 {
5632   int pos;
5633   int len;
5634
5635   pos = erlang_atom (s);
5636   if (pos < 1)
5637     return 0;
5638
5639   len = pos;
5640   pos = skip_spaces (s + pos) - s;
5641
5642   /* Save only the first clause. */
5643   if (s[pos++] == '('
5644       && (last == NULL
5645           || len != (int)strlen (last)
5646           || !strneq (s, last, len)))
5647         {
5648           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5649           return len;
5650         }
5651
5652   return 0;
5653 }
5654
5655
5656 /*
5657  * Handle attributes.  Currently, tags are generated for defines
5658  * and records.
5659  *
5660  * They are on the form:
5661  * -define(foo, bar).
5662  * -define(Foo(M, N), M+N).
5663  * -record(graph, {vtab = notable, cyclic = true}).
5664  */
5665 static void
5666 erlang_attribute (s)
5667      char *s;
5668 {
5669   char *cp = s;
5670
5671   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5672       && *cp++ == '(')
5673     {
5674       int len = erlang_atom (skip_spaces (cp));
5675       if (len > 0)
5676         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5677     }
5678   return;
5679 }
5680
5681
5682 /*
5683  * Consume an Erlang atom (or variable).
5684  * Return the number of bytes consumed, or -1 if there was an error.
5685  */
5686 static int
5687 erlang_atom (s)
5688      char *s;
5689 {
5690   int pos = 0;
5691
5692   if (ISALPHA (s[pos]) || s[pos] == '_')
5693     {
5694       /* The atom is unquoted. */
5695       do
5696         pos++;
5697       while (ISALNUM (s[pos]) || s[pos] == '_');
5698     }
5699   else if (s[pos] == '\'')
5700     {
5701       for (pos++; s[pos] != '\''; pos++)
5702         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5703             || (s[pos] == '\\' && s[++pos] == '\0'))
5704           return 0;
5705       pos++;
5706     }
5707
5708   return pos;
5709 }
5710
5711 \f
5712 static char *scan_separators __P((char *));
5713 static void add_regex __P((char *, language *));
5714 static char *substitute __P((char *, char *, struct re_registers *));
5715
5716 /*
5717  * Take a string like "/blah/" and turn it into "blah", verifying
5718  * that the first and last characters are the same, and handling
5719  * quoted separator characters.  Actually, stops on the occurrence of
5720  * an unquoted separator.  Also process \t, \n, etc. and turn into
5721  * appropriate characters. Works in place.  Null terminates name string.
5722  * Returns pointer to terminating separator, or NULL for
5723  * unterminated regexps.
5724  */
5725 static char *
5726 scan_separators (name)
5727      char *name;
5728 {
5729   char sep = name[0];
5730   char *copyto = name;
5731   bool quoted = FALSE;
5732
5733   for (++name; *name != '\0'; ++name)
5734     {
5735       if (quoted)
5736         {
5737           switch (*name)
5738             {
5739             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5740             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5741             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5742             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5743             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5744             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5745             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5746             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5747             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5748             default:
5749               if (*name == sep)
5750                 *copyto++ = sep;
5751               else
5752                 {
5753                   /* Something else is quoted, so preserve the quote. */
5754                   *copyto++ = '\\';
5755                   *copyto++ = *name;
5756                 }
5757               break;
5758             }
5759           quoted = FALSE;
5760         }
5761       else if (*name == '\\')
5762         quoted = TRUE;
5763       else if (*name == sep)
5764         break;
5765       else
5766         *copyto++ = *name;
5767     }
5768   if (*name != sep)
5769     name = NULL;                /* signal unterminated regexp */
5770
5771   /* Terminate copied string. */
5772   *copyto = '\0';
5773   return name;
5774 }
5775
5776 /* Look at the argument of --regex or --no-regex and do the right
5777    thing.  Same for each line of a regexp file. */
5778 static void
5779 analyse_regex (regex_arg)
5780      char *regex_arg;
5781 {
5782   if (regex_arg == NULL)
5783     {
5784       free_regexps ();          /* --no-regex: remove existing regexps */
5785       return;
5786     }
5787
5788   /* A real --regexp option or a line in a regexp file. */
5789   switch (regex_arg[0])
5790     {
5791       /* Comments in regexp file or null arg to --regex. */
5792     case '\0':
5793     case ' ':
5794     case '\t':
5795       break;
5796
5797       /* Read a regex file.  This is recursive and may result in a
5798          loop, which will stop when the file descriptors are exhausted. */
5799     case '@':
5800       {
5801         FILE *regexfp;
5802         linebuffer regexbuf;
5803         char *regexfile = regex_arg + 1;
5804
5805         /* regexfile is a file containing regexps, one per line. */
5806         regexfp = fopen (regexfile, "r");
5807         if (regexfp == NULL)
5808           {
5809             pfatal (regexfile);
5810             return;
5811           }
5812         linebuffer_init (&regexbuf);
5813         while (readline_internal (&regexbuf, regexfp) > 0)
5814           analyse_regex (regexbuf.buffer);
5815         free (regexbuf.buffer);
5816         fclose (regexfp);
5817       }
5818       break;
5819
5820       /* Regexp to be used for a specific language only. */
5821     case '{':
5822       {
5823         language *lang;
5824         char *lang_name = regex_arg + 1;
5825         char *cp;
5826
5827         for (cp = lang_name; *cp != '}'; cp++)
5828           if (*cp == '\0')
5829             {
5830               error ("unterminated language name in regex: %s", regex_arg);
5831               return;
5832             }
5833         *cp++ = '\0';
5834         lang = get_language_from_langname (lang_name);
5835         if (lang == NULL)
5836           return;
5837         add_regex (cp, lang);
5838       }
5839       break;
5840
5841       /* Regexp to be used for any language. */
5842     default:
5843       add_regex (regex_arg, NULL);
5844       break;
5845     }
5846 }
5847
5848 /* Separate the regexp pattern, compile it,
5849    and care for optional name and modifiers. */
5850 static void
5851 add_regex (regexp_pattern, lang)
5852      char *regexp_pattern;
5853      language *lang;
5854 {
5855   static struct re_pattern_buffer zeropattern;
5856   char sep, *pat, *name, *modifiers;
5857   const char *err;
5858   struct re_pattern_buffer *patbuf;
5859   regexp *rp;
5860   bool
5861     force_explicit_name = TRUE, /* do not use implicit tag names */
5862     ignore_case = FALSE,        /* case is significant */
5863     multi_line = FALSE,         /* matches are done one line at a time */
5864     single_line = FALSE;        /* dot does not match newline */
5865
5866
5867   if (strlen(regexp_pattern) < 3)
5868     {
5869       error ("null regexp", (char *)NULL);
5870       return;
5871     }
5872   sep = regexp_pattern[0];
5873   name = scan_separators (regexp_pattern);
5874   if (name == NULL)
5875     {
5876       error ("%s: unterminated regexp", regexp_pattern);
5877       return;
5878     }
5879   if (name[1] == sep)
5880     {
5881       error ("null name for regexp \"%s\"", regexp_pattern);
5882       return;
5883     }
5884   modifiers = scan_separators (name);
5885   if (modifiers == NULL)        /* no terminating separator --> no name */
5886     {
5887       modifiers = name;
5888       name = "";
5889     }
5890   else
5891     modifiers += 1;             /* skip separator */
5892
5893   /* Parse regex modifiers. */
5894   for (; modifiers[0] != '\0'; modifiers++)
5895     switch (modifiers[0])
5896       {
5897       case 'N':
5898         if (modifiers == name)
5899           error ("forcing explicit tag name but no name, ignoring", NULL);
5900         force_explicit_name = TRUE;
5901         break;
5902       case 'i':
5903         ignore_case = TRUE;
5904         break;
5905       case 's':
5906         single_line = TRUE;
5907         /* FALLTHRU */
5908       case 'm':
5909         multi_line = TRUE;
5910         need_filebuf = TRUE;
5911         break;
5912       default:
5913         {
5914           char wrongmod [2];
5915           wrongmod[0] = modifiers[0];
5916           wrongmod[1] = '\0';
5917           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5918         }
5919         break;
5920       }
5921
5922   patbuf = xnew (1, struct re_pattern_buffer);
5923   *patbuf = zeropattern;
5924   if (ignore_case)
5925     {
5926       static char lc_trans[CHARS];
5927       int i;
5928       for (i = 0; i < CHARS; i++)
5929         lc_trans[i] = lowcase (i);
5930       patbuf->translate = lc_trans;     /* translation table to fold case  */
5931     }
5932
5933   if (multi_line)
5934     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5935   else
5936     pat = regexp_pattern;
5937
5938   if (single_line)
5939     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5940   else
5941     re_set_syntax (RE_SYNTAX_EMACS);
5942
5943   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5944   if (multi_line)
5945     free (pat);
5946   if (err != NULL)
5947     {
5948       error ("%s while compiling pattern", err);
5949       return;
5950     }
5951
5952   rp = p_head;
5953   p_head = xnew (1, regexp);
5954   p_head->pattern = savestr (regexp_pattern);
5955   p_head->p_next = rp;
5956   p_head->lang = lang;
5957   p_head->pat = patbuf;
5958   p_head->name = savestr (name);
5959   p_head->error_signaled = FALSE;
5960   p_head->force_explicit_name = force_explicit_name;
5961   p_head->ignore_case = ignore_case;
5962   p_head->multi_line = multi_line;
5963 }
5964
5965 /*
5966  * Do the substitutions indicated by the regular expression and
5967  * arguments.
5968  */
5969 static char *
5970 substitute (in, out, regs)
5971      char *in, *out;
5972      struct re_registers *regs;
5973 {
5974   char *result, *t;
5975   int size, dig, diglen;
5976
5977   result = NULL;
5978   size = strlen (out);
5979
5980   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5981   if (out[size - 1] == '\\')
5982     fatal ("pattern error in \"%s\"", out);
5983   for (t = etags_strchr (out, '\\');
5984        t != NULL;
5985        t = etags_strchr (t + 2, '\\'))
5986     if (ISDIGIT (t[1]))
5987       {
5988         dig = t[1] - '0';
5989         diglen = regs->end[dig] - regs->start[dig];
5990         size += diglen - 2;
5991       }
5992     else
5993       size -= 1;
5994
5995   /* Allocate space and do the substitutions. */
5996   assert (size >= 0);
5997   result = xnew (size + 1, char);
5998
5999   for (t = result; *out != '\0'; out++)
6000     if (*out == '\\' && ISDIGIT (*++out))
6001       {
6002         dig = *out - '0';
6003         diglen = regs->end[dig] - regs->start[dig];
6004         strncpy (t, in + regs->start[dig], diglen);
6005         t += diglen;
6006       }
6007     else
6008       *t++ = *out;
6009   *t = '\0';
6010
6011   assert (t <= result + size);
6012   assert (t - result == (int)strlen (result));
6013
6014   return result;
6015 }
6016
6017 /* Deallocate all regexps. */
6018 static void
6019 free_regexps ()
6020 {
6021   regexp *rp;
6022   while (p_head != NULL)
6023     {
6024       rp = p_head->p_next;
6025       free (p_head->pattern);
6026       free (p_head->name);
6027       free (p_head);
6028       p_head = rp;
6029     }
6030   return;
6031 }
6032
6033 /*
6034  * Reads the whole file as a single string from `filebuf' and looks for
6035  * multi-line regular expressions, creating tags on matches.
6036  * readline already dealt with normal regexps.
6037  *
6038  * Idea by Ben Wing <ben@666.com> (2002).
6039  */
6040 static void
6041 regex_tag_multiline ()
6042 {
6043   char *buffer = filebuf.buffer;
6044   regexp *rp;
6045   char *name;
6046
6047   for (rp = p_head; rp != NULL; rp = rp->p_next)
6048     {
6049       int match = 0;
6050
6051       if (!rp->multi_line)
6052         continue;               /* skip normal regexps */
6053
6054       /* Generic initialisations before parsing file from memory. */
6055       lineno = 1;               /* reset global line number */
6056       charno = 0;               /* reset global char number */
6057       linecharno = 0;           /* reset global char number of line start */
6058
6059       /* Only use generic regexps or those for the current language. */
6060       if (rp->lang != NULL && rp->lang != curfdp->lang)
6061         continue;
6062
6063       while (match >= 0 && match < filebuf.len)
6064         {
6065           match = re_search (rp->pat, buffer, filebuf.len, charno,
6066                              filebuf.len - match, &rp->regs);
6067           switch (match)
6068             {
6069             case -2:
6070               /* Some error. */
6071               if (!rp->error_signaled)
6072                 {
6073                   error ("regexp stack overflow while matching \"%s\"",
6074                          rp->pattern);
6075                   rp->error_signaled = TRUE;
6076                 }
6077               break;
6078             case -1:
6079               /* No match. */
6080               break;
6081             default:
6082               if (match == rp->regs.end[0])
6083                 {
6084                   if (!rp->error_signaled)
6085                     {
6086                       error ("regexp matches the empty string: \"%s\"",
6087                              rp->pattern);
6088                       rp->error_signaled = TRUE;
6089                     }
6090                   match = -3;   /* exit from while loop */
6091                   break;
6092                 }
6093
6094               /* Match occurred.  Construct a tag. */
6095               while (charno < rp->regs.end[0])
6096                 if (buffer[charno++] == '\n')
6097                   lineno++, linecharno = charno;
6098               name = rp->name;
6099               if (name[0] == '\0')
6100                 name = NULL;
6101               else /* make a named tag */
6102                 name = substitute (buffer, rp->name, &rp->regs);
6103               if (rp->force_explicit_name)
6104                 /* Force explicit tag name, if a name is there. */
6105                 pfnote (name, TRUE, buffer + linecharno,
6106                         charno - linecharno + 1, lineno, linecharno);
6107               else
6108                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6109                           charno - linecharno + 1, lineno, linecharno);
6110               break;
6111             }
6112         }
6113     }
6114 }
6115
6116 \f
6117 static bool
6118 nocase_tail (cp)
6119      char *cp;
6120 {
6121   register int len = 0;
6122
6123   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6124     cp++, len++;
6125   if (*cp == '\0' && !intoken (dbp[len]))
6126     {
6127       dbp += len;
6128       return TRUE;
6129     }
6130   return FALSE;
6131 }
6132
6133 static void
6134 get_tag (bp, namepp)
6135      register char *bp;
6136      char **namepp;
6137 {
6138   register char *cp = bp;
6139
6140   if (*bp != '\0')
6141     {
6142       /* Go till you get to white space or a syntactic break */
6143       for (cp = bp + 1; !notinname (*cp); cp++)
6144         continue;
6145       make_tag (bp, cp - bp, TRUE,
6146                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6147     }
6148
6149   if (namepp != NULL)
6150     *namepp = savenstr (bp, cp - bp);
6151 }
6152
6153 /*
6154  * Read a line of text from `stream' into `lbp', excluding the
6155  * newline or CR-NL, if any.  Return the number of characters read from
6156  * `stream', which is the length of the line including the newline.
6157  *
6158  * On DOS or Windows we do not count the CR character, if any before the
6159  * NL, in the returned length; this mirrors the behavior of Emacs on those
6160  * platforms (for text files, it translates CR-NL to NL as it reads in the
6161  * file).
6162  *
6163  * If multi-line regular expressions are requested, each line read is
6164  * appended to `filebuf'.
6165  */
6166 static long
6167 readline_internal (lbp, stream)
6168      linebuffer *lbp;
6169      register FILE *stream;
6170 {
6171   char *buffer = lbp->buffer;
6172   register char *p = lbp->buffer;
6173   register char *pend;
6174   int chars_deleted;
6175
6176   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6177
6178   for (;;)
6179     {
6180       register int c = getc (stream);
6181       if (p == pend)
6182         {
6183           /* We're at the end of linebuffer: expand it. */
6184           lbp->size *= 2;
6185           xrnew (buffer, lbp->size, char);
6186           p += buffer - lbp->buffer;
6187           pend = buffer + lbp->size;
6188           lbp->buffer = buffer;
6189         }
6190       if (c == EOF)
6191         {
6192           *p = '\0';
6193           chars_deleted = 0;
6194           break;
6195         }
6196       if (c == '\n')
6197         {
6198           if (p > buffer && p[-1] == '\r')
6199             {
6200               p -= 1;
6201 #ifdef DOS_NT
6202              /* Assume CRLF->LF translation will be performed by Emacs
6203                 when loading this file, so CRs won't appear in the buffer.
6204                 It would be cleaner to compensate within Emacs;
6205                 however, Emacs does not know how many CRs were deleted
6206                 before any given point in the file.  */
6207               chars_deleted = 1;
6208 #else
6209               chars_deleted = 2;
6210 #endif
6211             }
6212           else
6213             {
6214               chars_deleted = 1;
6215             }
6216           *p = '\0';
6217           break;
6218         }
6219       *p++ = c;
6220     }
6221   lbp->len = p - buffer;
6222
6223   if (need_filebuf              /* we need filebuf for multi-line regexps */
6224       && chars_deleted > 0)     /* not at EOF */
6225     {
6226       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6227         {
6228           /* Expand filebuf. */
6229           filebuf.size *= 2;
6230           xrnew (filebuf.buffer, filebuf.size, char);
6231         }
6232       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6233       filebuf.len += lbp->len;
6234       filebuf.buffer[filebuf.len++] = '\n';
6235       filebuf.buffer[filebuf.len] = '\0';
6236     }
6237
6238   return lbp->len + chars_deleted;
6239 }
6240
6241 /*
6242  * Like readline_internal, above, but in addition try to match the
6243  * input line against relevant regular expressions and manage #line
6244  * directives.
6245  */
6246 static void
6247 readline (lbp, stream)
6248      linebuffer *lbp;
6249      FILE *stream;
6250 {
6251   long result;
6252
6253   linecharno = charno;          /* update global char number of line start */
6254   result = readline_internal (lbp, stream); /* read line */
6255   lineno += 1;                  /* increment global line number */
6256   charno += result;             /* increment global char number */
6257
6258   /* Honour #line directives. */
6259   if (!no_line_directive)
6260     {
6261       static bool discard_until_line_directive;
6262
6263       /* Check whether this is a #line directive. */
6264       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6265         {
6266           unsigned int lno;
6267           int start = 0;
6268
6269           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6270               && start > 0)     /* double quote character found */
6271             {
6272               char *endp = lbp->buffer + start;
6273
6274               while ((endp = etags_strchr (endp, '"')) != NULL
6275                      && endp[-1] == '\\')
6276                 endp++;
6277               if (endp != NULL)
6278                 /* Ok, this is a real #line directive.  Let's deal with it. */
6279                 {
6280                   char *taggedabsname;  /* absolute name of original file */
6281                   char *taggedfname;    /* name of original file as given */
6282                   char *name;           /* temp var */
6283
6284                   discard_until_line_directive = FALSE; /* found it */
6285                   name = lbp->buffer + start;
6286                   *endp = '\0';
6287                   canonicalize_filename (name); /* for DOS */
6288                   taggedabsname = absolute_filename (name, tagfiledir);
6289                   if (filename_is_absolute (name)
6290                       || filename_is_absolute (curfdp->infname))
6291                     taggedfname = savestr (taggedabsname);
6292                   else
6293                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6294
6295                   if (streq (curfdp->taggedfname, taggedfname))
6296                     /* The #line directive is only a line number change.  We
6297                        deal with this afterwards. */
6298                     free (taggedfname);
6299                   else
6300                     /* The tags following this #line directive should be
6301                        attributed to taggedfname.  In order to do this, set
6302                        curfdp accordingly. */
6303                     {
6304                       fdesc *fdp; /* file description pointer */
6305
6306                       /* Go look for a file description already set up for the
6307                          file indicated in the #line directive.  If there is
6308                          one, use it from now until the next #line
6309                          directive. */
6310                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6311                         if (streq (fdp->infname, curfdp->infname)
6312                             && streq (fdp->taggedfname, taggedfname))
6313                           /* If we remove the second test above (after the &&)
6314                              then all entries pertaining to the same file are
6315                              coalesced in the tags file.  If we use it, then
6316                              entries pertaining to the same file but generated
6317                              from different files (via #line directives) will
6318                              go into separate sections in the tags file.  These
6319                              alternatives look equivalent.  The first one
6320                              destroys some apparently useless information. */
6321                           {
6322                             curfdp = fdp;
6323                             free (taggedfname);
6324                             break;
6325                           }
6326                       /* Else, if we already tagged the real file, skip all
6327                          input lines until the next #line directive. */
6328                       if (fdp == NULL) /* not found */
6329                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6330                           if (streq (fdp->infabsname, taggedabsname))
6331                             {
6332                               discard_until_line_directive = TRUE;
6333                               free (taggedfname);
6334                               break;
6335                             }
6336                       /* Else create a new file description and use that from
6337                          now on, until the next #line directive. */
6338                       if (fdp == NULL) /* not found */
6339                         {
6340                           fdp = fdhead;
6341                           fdhead = xnew (1, fdesc);
6342                           *fdhead = *curfdp; /* copy curr. file description */
6343                           fdhead->next = fdp;
6344                           fdhead->infname = savestr (curfdp->infname);
6345                           fdhead->infabsname = savestr (curfdp->infabsname);
6346                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6347                           fdhead->taggedfname = taggedfname;
6348                           fdhead->usecharno = FALSE;
6349                           fdhead->prop = NULL;
6350                           fdhead->written = FALSE;
6351                           curfdp = fdhead;
6352                         }
6353                     }
6354                   free (taggedabsname);
6355                   lineno = lno - 1;
6356                   readline (lbp, stream);
6357                   return;
6358                 } /* if a real #line directive */
6359             } /* if #line is followed by a a number */
6360         } /* if line begins with "#line " */
6361
6362       /* If we are here, no #line directive was found. */
6363       if (discard_until_line_directive)
6364         {
6365           if (result > 0)
6366             {
6367               /* Do a tail recursion on ourselves, thus discarding the contents
6368                  of the line buffer. */
6369               readline (lbp, stream);
6370               return;
6371             }
6372           /* End of file. */
6373           discard_until_line_directive = FALSE;
6374           return;
6375         }
6376     } /* if #line directives should be considered */
6377
6378   {
6379     int match;
6380     regexp *rp;
6381     char *name;
6382
6383     /* Match against relevant regexps. */
6384     if (lbp->len > 0)
6385       for (rp = p_head; rp != NULL; rp = rp->p_next)
6386         {
6387           /* Only use generic regexps or those for the current language.
6388              Also do not use multiline regexps, which is the job of
6389              regex_tag_multiline. */
6390           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6391               || rp->multi_line)
6392             continue;
6393
6394           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6395           switch (match)
6396             {
6397             case -2:
6398               /* Some error. */
6399               if (!rp->error_signaled)
6400                 {
6401                   error ("regexp stack overflow while matching \"%s\"",
6402                          rp->pattern);
6403                   rp->error_signaled = TRUE;
6404                 }
6405               break;
6406             case -1:
6407               /* No match. */
6408               break;
6409             case 0:
6410               /* Empty string matched. */
6411               if (!rp->error_signaled)
6412                 {
6413                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6414                   rp->error_signaled = TRUE;
6415                 }
6416               break;
6417             default:
6418               /* Match occurred.  Construct a tag. */
6419               name = rp->name;
6420               if (name[0] == '\0')
6421                 name = NULL;
6422               else /* make a named tag */
6423                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6424               if (rp->force_explicit_name)
6425                 /* Force explicit tag name, if a name is there. */
6426                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6427               else
6428                 make_tag (name, strlen (name), TRUE,
6429                           lbp->buffer, match, lineno, linecharno);
6430               break;
6431             }
6432         }
6433   }
6434 }
6435
6436 \f
6437 /*
6438  * Return a pointer to a space of size strlen(cp)+1 allocated
6439  * with xnew where the string CP has been copied.
6440  */
6441 static char *
6442 savestr (cp)
6443      char *cp;
6444 {
6445   return savenstr (cp, strlen (cp));
6446 }
6447
6448 /*
6449  * Return a pointer to a space of size LEN+1 allocated with xnew where
6450  * the string CP has been copied for at most the first LEN characters.
6451  */
6452 static char *
6453 savenstr (cp, len)
6454      char *cp;
6455      int len;
6456 {
6457   register char *dp;
6458
6459   dp = xnew (len + 1, char);
6460   strncpy (dp, cp, len);
6461   dp[len] = '\0';
6462   return dp;
6463 }
6464
6465 /*
6466  * Return the ptr in sp at which the character c last
6467  * appears; NULL if not found
6468  *
6469  * Identical to POSIX strrchr, included for portability.
6470  */
6471 static char *
6472 etags_strrchr (sp, c)
6473      register const char *sp;
6474      register int c;
6475 {
6476   register const char *r;
6477
6478   r = NULL;
6479   do
6480     {
6481       if (*sp == c)
6482         r = sp;
6483   } while (*sp++);
6484   return (char *)r;
6485 }
6486
6487 /*
6488  * Return the ptr in sp at which the character c first
6489  * appears; NULL if not found
6490  *
6491  * Identical to POSIX strchr, included for portability.
6492  */
6493 static char *
6494 etags_strchr (sp, c)
6495      register const char *sp;
6496      register int c;
6497 {
6498   do
6499     {
6500       if (*sp == c)
6501         return (char *)sp;
6502     } while (*sp++);
6503   return NULL;
6504 }
6505
6506 /*
6507  * Compare two strings, ignoring case for alphabetic characters.
6508  *
6509  * Same as BSD's strcasecmp, included for portability.
6510  */
6511 static int
6512 etags_strcasecmp (s1, s2)
6513      register const char *s1;
6514      register const char *s2;
6515 {
6516   while (*s1 != '\0'
6517          && (ISALPHA (*s1) && ISALPHA (*s2)
6518              ? lowcase (*s1) == lowcase (*s2)
6519              : *s1 == *s2))
6520     s1++, s2++;
6521
6522   return (ISALPHA (*s1) && ISALPHA (*s2)
6523           ? lowcase (*s1) - lowcase (*s2)
6524           : *s1 - *s2);
6525 }
6526
6527 /*
6528  * Compare two strings, ignoring case for alphabetic characters.
6529  * Stop after a given number of characters
6530  *
6531  * Same as BSD's strncasecmp, included for portability.
6532  */
6533 static int
6534 etags_strncasecmp (s1, s2, n)
6535      register const char *s1;
6536      register const char *s2;
6537      register int n;
6538 {
6539   while (*s1 != '\0' && n-- > 0
6540          && (ISALPHA (*s1) && ISALPHA (*s2)
6541              ? lowcase (*s1) == lowcase (*s2)
6542              : *s1 == *s2))
6543     s1++, s2++;
6544
6545   if (n < 0)
6546     return 0;
6547   else
6548     return (ISALPHA (*s1) && ISALPHA (*s2)
6549             ? lowcase (*s1) - lowcase (*s2)
6550             : *s1 - *s2);
6551 }
6552
6553 /* Skip spaces (end of string is not space), return new pointer. */
6554 static char *
6555 skip_spaces (cp)
6556      char *cp;
6557 {
6558   while (iswhite (*cp))
6559     cp++;
6560   return cp;
6561 }
6562
6563 /* Skip non spaces, except end of string, return new pointer. */
6564 static char *
6565 skip_non_spaces (cp)
6566      char *cp;
6567 {
6568   while (*cp != '\0' && !iswhite (*cp))
6569     cp++;
6570   return cp;
6571 }
6572
6573 /* Print error message and exit.  */
6574 void
6575 fatal (s1, s2)
6576      char *s1, *s2;
6577 {
6578   error (s1, s2);
6579   exit (EXIT_FAILURE);
6580 }
6581
6582 static void
6583 pfatal (s1)
6584      char *s1;
6585 {
6586   perror (s1);
6587   exit (EXIT_FAILURE);
6588 }
6589
6590 static void
6591 suggest_asking_for_help ()
6592 {
6593   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6594            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6595   exit (EXIT_FAILURE);
6596 }
6597
6598 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6599 static void
6600 error (s1, s2)
6601      const char *s1, *s2;
6602 {
6603   fprintf (stderr, "%s: ", progname);
6604   fprintf (stderr, s1, s2);
6605   fprintf (stderr, "\n");
6606 }
6607
6608 /* Return a newly-allocated string whose contents
6609    concatenate those of s1, s2, s3.  */
6610 static char *
6611 concat (s1, s2, s3)
6612      char *s1, *s2, *s3;
6613 {
6614   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6615   char *result = xnew (len1 + len2 + len3 + 1, char);
6616
6617   strcpy (result, s1);
6618   strcpy (result + len1, s2);
6619   strcpy (result + len1 + len2, s3);
6620   result[len1 + len2 + len3] = '\0';
6621
6622   return result;
6623 }
6624
6625 \f
6626 /* Does the same work as the system V getcwd, but does not need to
6627    guess the buffer size in advance. */
6628 static char *
6629 etags_getcwd ()
6630 {
6631 #ifdef HAVE_GETCWD
6632   int bufsize = 200;
6633   char *path = xnew (bufsize, char);
6634
6635   while (getcwd (path, bufsize) == NULL)
6636     {
6637       if (errno != ERANGE)
6638         pfatal ("getcwd");
6639       bufsize *= 2;
6640       free (path);
6641       path = xnew (bufsize, char);
6642     }
6643
6644   canonicalize_filename (path);
6645   return path;
6646
6647 #else /* not HAVE_GETCWD */
6648 #if MSDOS
6649
6650   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6651
6652   getwd (path);
6653
6654   for (p = path; *p != '\0'; p++)
6655     if (*p == '\\')
6656       *p = '/';
6657     else
6658       *p = lowcase (*p);
6659
6660   return strdup (path);
6661 #else /* not MSDOS */
6662   linebuffer path;
6663   FILE *pipe;
6664
6665   linebuffer_init (&path);
6666   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6667   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6668     pfatal ("pwd");
6669   pclose (pipe);
6670
6671   return path.buffer;
6672 #endif /* not MSDOS */
6673 #endif /* not HAVE_GETCWD */
6674 }
6675
6676 /* Return a newly allocated string containing the file name of FILE
6677    relative to the absolute directory DIR (which should end with a slash). */
6678 static char *
6679 relative_filename (file, dir)
6680      char *file, *dir;
6681 {
6682   char *fp, *dp, *afn, *res;
6683   int i;
6684
6685   /* Find the common root of file and dir (with a trailing slash). */
6686   afn = absolute_filename (file, cwd);
6687   fp = afn;
6688   dp = dir;
6689   while (*fp++ == *dp++)
6690     continue;
6691   fp--, dp--;                   /* back to the first differing char */
6692 #ifdef DOS_NT
6693   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6694     return afn;
6695 #endif
6696   do                            /* look at the equal chars until '/' */
6697     fp--, dp--;
6698   while (*fp != '/');
6699
6700   /* Build a sequence of "../" strings for the resulting relative file name. */
6701   i = 0;
6702   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6703     i += 1;
6704   res = xnew (3*i + strlen (fp + 1) + 1, char);
6705   res[0] = '\0';
6706   while (i-- > 0)
6707     strcat (res, "../");
6708
6709   /* Add the file name relative to the common root of file and dir. */
6710   strcat (res, fp + 1);
6711   free (afn);
6712
6713   return res;
6714 }
6715
6716 /* Return a newly allocated string containing the absolute file name
6717    of FILE given DIR (which should end with a slash). */
6718 static char *
6719 absolute_filename (file, dir)
6720      char *file, *dir;
6721 {
6722   char *slashp, *cp, *res;
6723
6724   if (filename_is_absolute (file))
6725     res = savestr (file);
6726 #ifdef DOS_NT
6727   /* We don't support non-absolute file names with a drive
6728      letter, like `d:NAME' (it's too much hassle).  */
6729   else if (file[1] == ':')
6730     fatal ("%s: relative file names with drive letters not supported", file);
6731 #endif
6732   else
6733     res = concat (dir, file, "");
6734
6735   /* Delete the "/dirname/.." and "/." substrings. */
6736   slashp = etags_strchr (res, '/');
6737   while (slashp != NULL && slashp[0] != '\0')
6738     {
6739       if (slashp[1] == '.')
6740         {
6741           if (slashp[2] == '.'
6742               && (slashp[3] == '/' || slashp[3] == '\0'))
6743             {
6744               cp = slashp;
6745               do
6746                 cp--;
6747               while (cp >= res && !filename_is_absolute (cp));
6748               if (cp < res)
6749                 cp = slashp;    /* the absolute name begins with "/.." */
6750 #ifdef DOS_NT
6751               /* Under MSDOS and NT we get `d:/NAME' as absolute
6752                  file name, so the luser could say `d:/../NAME'.
6753                  We silently treat this as `d:/NAME'.  */
6754               else if (cp[0] != '/')
6755                 cp = slashp;
6756 #endif
6757               strcpy (cp, slashp + 3);
6758               slashp = cp;
6759               continue;
6760             }
6761           else if (slashp[2] == '/' || slashp[2] == '\0')
6762             {
6763               strcpy (slashp, slashp + 2);
6764               continue;
6765             }
6766         }
6767
6768       slashp = etags_strchr (slashp + 1, '/');
6769     }
6770
6771   if (res[0] == '\0')           /* just a safety net: should never happen */
6772     {
6773       free (res);
6774       return savestr ("/");
6775     }
6776   else
6777     return res;
6778 }
6779
6780 /* Return a newly allocated string containing the absolute
6781    file name of dir where FILE resides given DIR (which should
6782    end with a slash). */
6783 static char *
6784 absolute_dirname (file, dir)
6785      char *file, *dir;
6786 {
6787   char *slashp, *res;
6788   char save;
6789
6790   canonicalize_filename (file);
6791   slashp = etags_strrchr (file, '/');
6792   if (slashp == NULL)
6793     return savestr (dir);
6794   save = slashp[1];
6795   slashp[1] = '\0';
6796   res = absolute_filename (file, dir);
6797   slashp[1] = save;
6798
6799   return res;
6800 }
6801
6802 /* Whether the argument string is an absolute file name.  The argument
6803    string must have been canonicalized with canonicalize_filename. */
6804 static bool
6805 filename_is_absolute (fn)
6806      char *fn;
6807 {
6808   return (fn[0] == '/'
6809 #ifdef DOS_NT
6810           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6811 #endif
6812           );
6813 }
6814
6815 /* Translate backslashes into slashes.  Works in place. */
6816 static void
6817 canonicalize_filename (fn)
6818      register char *fn;
6819 {
6820 #ifdef DOS_NT
6821   /* Canonicalize drive letter case.  */
6822   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6823     fn[0] = upcase (fn[0]);
6824   /* Convert backslashes to slashes.  */
6825   for (; *fn != '\0'; fn++)
6826     if (*fn == '\\')
6827       *fn = '/';
6828 #else
6829   /* No action. */
6830   fn = NULL;                    /* shut up the compiler */
6831 #endif
6832 }
6833
6834 \f
6835 /* Initialize a linebuffer for use */
6836 static void
6837 linebuffer_init (lbp)
6838      linebuffer *lbp;
6839 {
6840   lbp->size = (DEBUG) ? 3 : 200;
6841   lbp->buffer = xnew (lbp->size, char);
6842   lbp->buffer[0] = '\0';
6843   lbp->len = 0;
6844 }
6845
6846 /* Set the minimum size of a string contained in a linebuffer. */
6847 static void
6848 linebuffer_setlen (lbp, toksize)
6849      linebuffer *lbp;
6850      int toksize;
6851 {
6852   while (lbp->size <= toksize)
6853     {
6854       lbp->size *= 2;
6855       xrnew (lbp->buffer, lbp->size, char);
6856     }
6857   lbp->len = toksize;
6858 }
6859
6860 /* Like malloc but get fatal error if memory is exhausted. */
6861 static PTR
6862 xmalloc (size)
6863      unsigned int size;
6864 {
6865   PTR result = (PTR) malloc (size);
6866   if (result == NULL)
6867     fatal ("virtual memory exhausted", (char *)NULL);
6868   return result;
6869 }
6870
6871 static PTR
6872 xrealloc (ptr, size)
6873      char *ptr;
6874      unsigned int size;
6875 {
6876   PTR result = (PTR) realloc (ptr, size);
6877   if (result == NULL)
6878     fatal ("virtual memory exhausted", (char *)NULL);
6879   return result;
6880 }
6881
6882 /*
6883  * Local Variables:
6884  * indent-tabs-mode: t
6885  * tab-width: 8
6886  * fill-column: 79
6887  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6888  * End:
6889  */
6890
6891 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6892    (do not change this comment) */
6893
6894 /* etags.c ends here */