lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
   3                  1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4                  2005, 2006 Free Software Foundation, Inc. and Ken Arnold
   5
   6  This file is not considered part of GNU Emacs.
   7
   8  This program is free software; you can redistribute it and/or modify
   9  it under the terms of the GNU General Public License as published by
  10  the Free Software Foundation; either version 2 of the License, or
  11  (at your option) any later version.
  12
  13  This program is distributed in the hope that it will be useful,
  14  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  GNU General Public License for more details.
  17
  18  You should have received a copy of the GNU General Public License
  19  along with this program; if not, write to the Free Software Foundation,
  20  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  21
  22 /*
  23  * Authors:
  24  *      Ctags originally by Ken Arnold.
  25  *      Fortran added by Jim Kleckner.
  26  *      Ed Pelegri-Llopart added C typedefs.
  27  *      Gnu Emacs TAGS format and modifications by RMS?
  28  * 1989 Sam Kendall added C++.
  29  * 1992 Joseph B. Wells improved C and C++ parsing.
  30  * 1993 Francesco Potortì reorganised C and C++.
  31  * 1994 Line-by-line regexp tags by Tom Tromey.
  32  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  33  * 2002 #line directives by Francesco Potortì.
  34  *
  35  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  36  */
  37
  38 /*
  39  * If you want to add support for a new language, start by looking at the LUA
  40  * language, which is the simplest.  Alternatively, consider shipping a
  41  * configuration file containing regexp definitions for etags.
  42  */
  43
  44 char pot_etags_version[] = "@(#) pot revision number is 17.26";
  45
  46 #define TRUE    1
  47 #define FALSE   0
  48
  49 #ifdef DEBUG
  50 #  undef DEBUG
  51 #  define DEBUG TRUE
  52 #else
  53 #  define DEBUG  FALSE
  54 #  define NDEBUG                /* disable assert */
  55 #endif
  56
  57 #ifdef HAVE_CONFIG_H
  58 # include <config.h>
  59   /* On some systems, Emacs defines static as nothing for the sake
  60      of unexec.  We don't want that here since we don't use unexec. */
  61 # undef static
  62 # ifndef PTR                    /* for XEmacs */
  63 #   define PTR void *
  64 # endif
  65 # ifndef __P                    /* for XEmacs */
  66 #   define __P(args) args
  67 # endif
  68 #else  /* no config.h */
  69 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  70 #   define __P(args) args       /* use prototypes */
  71 #   define PTR void *           /* for generic pointers */
  72 # else /* not standard C */
  73 #   define __P(args) ()         /* no prototypes */
  74 #   define const                /* remove const for old compilers' sake */
  75 #   define PTR long *           /* don't use void* */
  76 # endif
  77 #endif /* !HAVE_CONFIG_H */
  78
  79 #ifndef _GNU_SOURCE
  80 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  81 #endif
  82
  83 /* WIN32_NATIVE is for XEmacs.
  84    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  85 #ifdef WIN32_NATIVE
  86 # undef MSDOS
  87 # undef  WINDOWSNT
  88 # define WINDOWSNT
  89 #endif /* WIN32_NATIVE */
  90
  91 #ifdef MSDOS
  92 # undef MSDOS
  93 # define MSDOS TRUE
  94 # include <fcntl.h>
  95 # include <sys/param.h>
  96 # include <io.h>
  97 # ifndef HAVE_CONFIG_H
  98 #   define DOS_NT
  99 #   include <sys/config.h>
 100 # endif
 101 #else
 102 # define MSDOS FALSE
 103 #endif /* MSDOS */
 104
 105 #ifdef WINDOWSNT
 106 # include <stdlib.h>
 107 # include <fcntl.h>
 108 # include <string.h>
 109 # include <direct.h>
 110 # include <io.h>
 111 # define MAXPATHLEN _MAX_PATH
 112 # undef HAVE_NTGUI
 113 # undef  DOS_NT
 114 # define DOS_NT
 115 # ifndef HAVE_GETCWD
 116 #   define HAVE_GETCWD
 117 # endif /* undef HAVE_GETCWD */
 118 #else /* not WINDOWSNT */
 119 # ifdef STDC_HEADERS
 120 #  include <stdlib.h>
 121 #  include <string.h>
 122 # else /* no standard C headers */
 123     extern char *getenv ();
 124 #  ifdef VMS
 125 #   define EXIT_SUCCESS 1
 126 #   define EXIT_FAILURE 0
 127 #  else /* no VMS */
 128 #   define EXIT_SUCCESS 0
 129 #   define EXIT_FAILURE 1
 130 #  endif
 131 # endif
 132 #endif /* !WINDOWSNT */
 133
 134 #ifdef HAVE_UNISTD_H
 135 # include <unistd.h>
 136 #else
 137 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 138     extern char *getcwd (char *buf, size_t size);
 139 # endif
 140 #endif /* HAVE_UNISTD_H */
 141
 142 #include <stdio.h>
 143 #include <ctype.h>
 144 #include <errno.h>
 145 #ifndef errno
 146   extern int errno;
 147 #endif
 148 #include <sys/types.h>
 149 #include <sys/stat.h>
 150
 151 #include <assert.h>
 152 #ifdef NDEBUG
 153 # undef  assert                 /* some systems have a buggy assert.h */
 154 # define assert(x) ((void) 0)
 155 #endif
 156
 157 #if !defined (S_ISREG) && defined (S_IFREG)
 158 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 159 #endif
 160
 161 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 162 # define NO_LONG_OPTIONS TRUE
 163 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 164   extern char *optarg;
 165   extern int optind, opterr;
 166 #else
 167 # define NO_LONG_OPTIONS FALSE
 168 # include <getopt.h>
 169 #endif /* NO_LONG_OPTIONS */
 170
 171 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 172 # ifdef __CYGWIN__              /* compiling on Cygwin */
 173                              !!! NOTICE !!!
 174  the regex.h distributed with Cygwin is not compatible with etags, alas!
 175 If you want regular expression support, you should delete this notice and
 176               arrange to use the GNU regex.h and regex.c.
 177 # endif
 178 #endif
 179 #include <regex.h>
 180
 181 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 182  Leave it undefined to make the program "etags", which makes emacs-style
 183  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 184 #ifdef CTAGS
 185 # undef  CTAGS
 186 # define CTAGS TRUE
 187 #else
 188 # define CTAGS FALSE
 189 #endif
 190
 191 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 192 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 193 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 194 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 195
 196 #define CHARS 256               /* 2^sizeof(char) */
 197 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 198 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 199 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 200 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 201 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 202 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 203
 204 #define ISALNUM(c)      isalnum (CHAR(c))
 205 #define ISALPHA(c)      isalpha (CHAR(c))
 206 #define ISDIGIT(c)      isdigit (CHAR(c))
 207 #define ISLOWER(c)      islower (CHAR(c))
 208
 209 #define lowcase(c)      tolower (CHAR(c))
 210 #define upcase(c)       toupper (CHAR(c))
 211
 212
 213 /*
 214  *      xnew, xrnew -- allocate, reallocate storage
 215  *
 216  * SYNOPSIS:    Type *xnew (int n, Type);
 217  *              void xrnew (OldPointer, int n, Type);
 218  */
 219 #if DEBUG
 220 # include "chkmalloc.h"
 221 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 222                                                   (n) * sizeof (Type)))
 223 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 224                                         (char *) (op), (n) * sizeof (Type)))
 225 #else
 226 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 227 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 228                                         (char *) (op), (n) * sizeof (Type)))
 229 #endif
 230
 231 #define bool int
 232
 233 typedef void Lang_function __P((FILE *));
 234
 235 typedef struct
 236 {
 237   char *suffix;                 /* file name suffix for this compressor */
 238   char *command;                /* takes one arg and decompresses to stdout */
 239 } compressor;
 240
 241 typedef struct
 242 {
 243   char *name;                   /* language name */
 244   char *help;                   /* detailed help for the language */
 245   Lang_function *function;      /* parse function */
 246   char **suffixes;              /* name suffixes of this language's files */
 247   char **filenames;             /* names of this language's files */
 248   char **interpreters;          /* interpreters for this language */
 249   bool metasource;              /* source used to generate other sources */
 250 } language;
 251
 252 typedef struct fdesc
 253 {
 254   struct fdesc *next;           /* for the linked list */
 255   char *infname;                /* uncompressed input file name */
 256   char *infabsname;             /* absolute uncompressed input file name */
 257   char *infabsdir;              /* absolute dir of input file */
 258   char *taggedfname;            /* file name to write in tagfile */
 259   language *lang;               /* language of file */
 260   char *prop;                   /* file properties to write in tagfile */
 261   bool usecharno;               /* etags tags shall contain char number */
 262   bool written;                 /* entry written in the tags file */
 263 } fdesc;
 264
 265 typedef struct node_st
 266 {                               /* sorting structure */
 267   struct node_st *left, *right; /* left and right sons */
 268   fdesc *fdp;                   /* description of file to whom tag belongs */
 269   char *name;                   /* tag name */
 270   char *regex;                  /* search regexp */
 271   bool valid;                   /* write this tag on the tag file */
 272   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 273   bool been_warned;             /* warning already given for duplicated tag */
 274   int lno;                      /* line number tag is on */
 275   long cno;                     /* character number line starts on */
 276 } node;
 277
 278 /*
 279  * A `linebuffer' is a structure which holds a line of text.
 280  * `readline_internal' reads a line from a stream into a linebuffer
 281  * and works regardless of the length of the line.
 282  * SIZE is the size of BUFFER, LEN is the length of the string in
 283  * BUFFER after readline reads it.
 284  */
 285 typedef struct
 286 {
 287   long size;
 288   int len;
 289   char *buffer;
 290 } linebuffer;
 291
 292 /* Used to support mixing of --lang and file names. */
 293 typedef struct
 294 {
 295   enum {
 296     at_language,                /* a language specification */
 297     at_regexp,                  /* a regular expression */
 298     at_filename,                /* a file name */
 299     at_stdin,                   /* read from stdin here */
 300     at_end                      /* stop parsing the list */
 301   } arg_type;                   /* argument type */
 302   language *lang;               /* language associated with the argument */
 303   char *what;                   /* the argument itself */
 304 } argument;
 305
 306 /* Structure defining a regular expression. */
 307 typedef struct regexp
 308 {
 309   struct regexp *p_next;        /* pointer to next in list */
 310   language *lang;               /* if set, use only for this language */
 311   char *pattern;                /* the regexp pattern */
 312   char *name;                   /* tag name */
 313   struct re_pattern_buffer *pat; /* the compiled pattern */
 314   struct re_registers regs;     /* re registers */
 315   bool error_signaled;          /* already signaled for this regexp */
 316   bool force_explicit_name;     /* do not allow implict tag name */
 317   bool ignore_case;             /* ignore case when matching */
 318   bool multi_line;              /* do a multi-line match on the whole file */
 319 } regexp;
 320
 321
 322 /* Many compilers barf on this:
 323         Lang_function Ada_funcs;
 324    so let's write it this way */
 325 static void Ada_funcs __P((FILE *));
 326 static void Asm_labels __P((FILE *));
 327 static void C_entries __P((int c_ext, FILE *));
 328 static void default_C_entries __P((FILE *));
 329 static void plain_C_entries __P((FILE *));
 330 static void Cjava_entries __P((FILE *));
 331 static void Cobol_paragraphs __P((FILE *));
 332 static void Cplusplus_entries __P((FILE *));
 333 static void Cstar_entries __P((FILE *));
 334 static void Erlang_functions __P((FILE *));
 335 static void Forth_words __P((FILE *));
 336 static void Fortran_functions __P((FILE *));
 337 static void HTML_labels __P((FILE *));
 338 static void Lisp_functions __P((FILE *));
 339 static void Lua_functions __P((FILE *));
 340 static void Makefile_targets __P((FILE *));
 341 static void Pascal_functions __P((FILE *));
 342 static void Perl_functions __P((FILE *));
 343 static void PHP_functions __P((FILE *));
 344 static void PS_functions __P((FILE *));
 345 static void Prolog_functions __P((FILE *));
 346 static void Python_functions __P((FILE *));
 347 static void Scheme_functions __P((FILE *));
 348 static void TeX_commands __P((FILE *));
 349 static void Texinfo_nodes __P((FILE *));
 350 static void Yacc_entries __P((FILE *));
 351 static void just_read_file __P((FILE *));
 352
 353 static void print_language_names __P((void));
 354 static void print_version __P((void));
 355 static void print_help __P((argument *));
 356 int main __P((int, char **));
 357
 358 static compressor *get_compressor_from_suffix __P((char *, char **));
 359 static language *get_language_from_langname __P((const char *));
 360 static language *get_language_from_interpreter __P((char *));
 361 static language *get_language_from_filename __P((char *, bool));
 362 static void readline __P((linebuffer *, FILE *));
 363 static long readline_internal __P((linebuffer *, FILE *));
 364 static bool nocase_tail __P((char *));
 365 static void get_tag __P((char *, char **));
 366
 367 static void analyse_regex __P((char *));
 368 static void free_regexps __P((void));
 369 static void regex_tag_multiline __P((void));
 370 static void error __P((const char *, const char *));
 371 static void suggest_asking_for_help __P((void));
 372 void fatal __P((char *, char *));
 373 static void pfatal __P((char *));
 374 static void add_node __P((node *, node **));
 375
 376 static void init __P((void));
 377 static void process_file_name __P((char *, language *));
 378 static void process_file __P((FILE *, char *, language *));
 379 static void find_entries __P((FILE *));
 380 static void free_tree __P((node *));
 381 static void free_fdesc __P((fdesc *));
 382 static void pfnote __P((char *, bool, char *, int, int, long));
 383 static void make_tag __P((char *, int, bool, char *, int, int, long));
 384 static void invalidate_nodes __P((fdesc *, node **));
 385 static void put_entries __P((node *));
 386
 387 static char *concat __P((char *, char *, char *));
 388 static char *skip_spaces __P((char *));
 389 static char *skip_non_spaces __P((char *));
 390 static char *savenstr __P((char *, int));
 391 static char *savestr __P((char *));
 392 static char *etags_strchr __P((const char *, int));
 393 static char *etags_strrchr __P((const char *, int));
 394 static int etags_strcasecmp __P((const char *, const char *));
 395 static int etags_strncasecmp __P((const char *, const char *, int));
 396 static char *etags_getcwd __P((void));
 397 static char *relative_filename __P((char *, char *));
 398 static char *absolute_filename __P((char *, char *));
 399 static char *absolute_dirname __P((char *, char *));
 400 static bool filename_is_absolute __P((char *f));
 401 static void canonicalize_filename __P((char *));
 402 static void linebuffer_init __P((linebuffer *));
 403 static void linebuffer_setlen __P((linebuffer *, int));
 404 static PTR xmalloc __P((unsigned int));
 405 static PTR xrealloc __P((char *, unsigned int));
 406
 407 \f
 408 static char searchar = '/';     /* use /.../ searches */
 409
 410 static char *tagfile;           /* output file */
 411 static char *progname;          /* name this program was invoked with */
 412 static char *cwd;               /* current working directory */
 413 static char *tagfiledir;        /* directory of tagfile */
 414 static FILE *tagf;              /* ioptr for tags file */
 415
 416 static fdesc *fdhead;           /* head of file description list */
 417 static fdesc *curfdp;           /* current file description */
 418 static int lineno;              /* line number of current line */
 419 static long charno;             /* current character number */
 420 static long linecharno;         /* charno of start of current line */
 421 static char *dbp;               /* pointer to start of current tag */
 422
 423 static const int invalidcharno = -1;
 424
 425 static node *nodehead;          /* the head of the binary tree of tags */
 426 static node *last_node;         /* the last node created */
 427
 428 static linebuffer lb;           /* the current line */
 429 static linebuffer filebuf;      /* a buffer containing the whole file */
 430 static linebuffer token_name;   /* a buffer containing a tag name */
 431
 432 /* boolean "functions" (see init)       */
 433 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 434 static char
 435   /* white chars */
 436   *white = " \f\t\n\r\v",
 437   /* not in a name */
 438   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 439   /* token ending chars */
 440   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 441   /* token starting chars */
 442   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 443   /* valid in-token chars */
 444   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 445
 446 static bool append_to_tagfile;  /* -a: append to tags */
 447 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 448 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 449 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 450                                 /* 0 struct/enum/union decls, and C++ */
 451                                 /* member functions. */
 452 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 453                                 /* constants and variables. */
 454                                 /* -D: opposite of -d.  Default under ctags. */
 455 static bool globals;            /* create tags for global variables */
 456 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 457 static bool members;            /* create tags for C member variables */
 458 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 459 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 460 static bool update;             /* -u: update tags */
 461 static bool vgrind_style;       /* -v: create vgrind style index output */
 462 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 463 static bool cxref_style;        /* -x: create cxref style output */
 464 static bool cplusplus;          /* .[hc] means C++, not C */
 465 static bool ignoreindent;       /* -I: ignore indentation in C */
 466 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 467
 468 /* STDIN is defined in LynxOS system headers */
 469 #ifdef STDIN
 470 # undef STDIN
 471 #endif
 472
 473 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 474 static bool parsing_stdin;      /* --parse-stdin used */
 475
 476 static regexp *p_head;          /* list of all regexps */
 477 static bool need_filebuf;       /* some regexes are multi-line */
 478
 479 static struct option longopts[] =
 480 {
 481   { "append",             no_argument,       NULL,               'a'   },
 482   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 483   { "c++",                no_argument,       NULL,               'C'   },
 484   { "declarations",       no_argument,       &declarations,      TRUE  },
 485   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 486   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 487   { "help",               no_argument,       NULL,               'h'   },
 488   { "help",               no_argument,       NULL,               'H'   },
 489   { "ignore-indentation", no_argument,       NULL,               'I'   },
 490   { "language",           required_argument, NULL,               'l'   },
 491   { "members",            no_argument,       &members,           TRUE  },
 492   { "no-members",         no_argument,       &members,           FALSE },
 493   { "output",             required_argument, NULL,               'o'   },
 494   { "regex",              required_argument, NULL,               'r'   },
 495   { "no-regex",           no_argument,       NULL,               'R'   },
 496   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 497   { "parse-stdin",        required_argument, NULL,               STDIN },
 498   { "version",            no_argument,       NULL,               'V'   },
 499
 500 #if CTAGS /* Ctags options */
 501   { "backward-search",    no_argument,       NULL,               'B'   },
 502   { "cxref",              no_argument,       NULL,               'x'   },
 503   { "defines",            no_argument,       NULL,               'd'   },
 504   { "globals",            no_argument,       &globals,           TRUE  },
 505   { "typedefs",           no_argument,       NULL,               't'   },
 506   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 507   { "update",             no_argument,       NULL,               'u'   },
 508   { "vgrind",             no_argument,       NULL,               'v'   },
 509   { "no-warn",            no_argument,       NULL,               'w'   },
 510
 511 #else /* Etags options */
 512   { "no-defines",         no_argument,       NULL,               'D'   },
 513   { "no-globals",         no_argument,       &globals,           FALSE },
 514   { "include",            required_argument, NULL,               'i'   },
 515 #endif
 516   { NULL }
 517 };
 518
 519 static compressor compressors[] =
 520 {
 521   { "z", "gzip -d -c"},
 522   { "Z", "gzip -d -c"},
 523   { "gz", "gzip -d -c"},
 524   { "GZ", "gzip -d -c"},
 525   { "bz2", "bzip2 -d -c" },
 526   { NULL }
 527 };
 528
 529 /*
 530  * Language stuff.
 531  */
 532
 533 /* Ada code */
 534 static char *Ada_suffixes [] =
 535   { "ads", "adb", "ada", NULL };
 536 static char Ada_help [] =
 537 "In Ada code, functions, procedures, packages, tasks and types are\n\
 538 tags.  Use the `--packages-only' option to create tags for\n\
 539 packages only.\n\
 540 Ada tag names have suffixes indicating the type of entity:\n\
 541         Entity type:    Qualifier:\n\
 542         ------------    ----------\n\
 543         function        /f\n\
 544         procedure       /p\n\
 545         package spec    /s\n\
 546         package body    /b\n\
 547         type            /t\n\
 548         task            /k\n\
 549 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 550 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 551 will just search for any tag `bidule'.";
 552
 553 /* Assembly code */
 554 static char *Asm_suffixes [] =
 555   { "a",        /* Unix assembler */
 556     "asm", /* Microcontroller assembly */
 557     "def", /* BSO/Tasking definition includes  */
 558     "inc", /* Microcontroller include files */
 559     "ins", /* Microcontroller include files */
 560     "s", "sa", /* Unix assembler */
 561     "S",   /* cpp-processed Unix assembler */
 562     "src", /* BSO/Tasking C compiler output */
 563     NULL
 564   };
 565 static char Asm_help [] =
 566 "In assembler code, labels appearing at the beginning of a line,\n\
 567 followed by a colon, are tags.";
 568
 569
 570 /* Note that .c and .h can be considered C++, if the --c++ flag was
 571    given, or if the `class' or `template' keyowrds are met inside the file.
 572    That is why default_C_entries is called for these. */
 573 static char *default_C_suffixes [] =
 574   { "c", "h", NULL };
 575 static char default_C_help [] =
 576 "In C code, any C function or typedef is a tag, and so are\n\
 577 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 578 definitions and `enum' constants are tags unless you specify\n\
 579 `--no-defines'.  Global variables are tags unless you specify\n\
 580 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 581 can make the tags table file much smaller.\n\
 582 You can tag function declarations and external variables by\n\
 583 using `--declarations', and struct members by using `--members'.";
 584
 585 static char *Cplusplus_suffixes [] =
 586   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 587     "M",                        /* Objective C++ */
 588     "pdb",                      /* Postscript with C syntax */
 589     NULL };
 590 static char Cplusplus_help [] =
 591 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 592 --help --lang=c --lang=c++ for full help.)\n\
 593 In addition to C tags, member functions are also recognized, and\n\
 594 optionally member variables if you use the `--members' option.\n\
 595 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 596 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 597 `operator+'.";
 598
 599 static char *Cjava_suffixes [] =
 600   { "java", NULL };
 601 static char Cjava_help [] =
 602 "In Java code, all the tags constructs of C and C++ code are\n\
 603 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 604
 605
 606 static char *Cobol_suffixes [] =
 607   { "COB", "cob", NULL };
 608 static char Cobol_help [] =
 609 "In Cobol code, tags are paragraph names; that is, any word\n\
 610 starting in column 8 and followed by a period.";
 611
 612 static char *Cstar_suffixes [] =
 613   { "cs", "hs", NULL };
 614
 615 static char *Erlang_suffixes [] =
 616   { "erl", "hrl", NULL };
 617 static char Erlang_help [] =
 618 "In Erlang code, the tags are the functions, records and macros\n\
 619 defined in the file.";
 620
 621 char *Forth_suffixes [] =
 622   { "fth", "tok", NULL };
 623 static char Forth_help [] =
 624 "In Forth code, tags are words defined by `:',\n\
 625 constant, code, create, defer, value, variable, buffer:, field.";
 626
 627 static char *Fortran_suffixes [] =
 628   { "F", "f", "f90", "for", NULL };
 629 static char Fortran_help [] =
 630 "In Fortran code, functions, subroutines and block data are tags.";
 631
 632 static char *HTML_suffixes [] =
 633   { "htm", "html", "shtml", NULL };
 634 static char HTML_help [] =
 635 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 636 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 637 occurrences of `id='.";
 638
 639 static char *Lisp_suffixes [] =
 640   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 641 static char Lisp_help [] =
 642 "In Lisp code, any function defined with `defun', any variable\n\
 643 defined with `defvar' or `defconst', and in general the first\n\
 644 argument of any expression that starts with `(def' in column zero\n\
 645 is a tag.";
 646
 647 static char *Lua_suffixes [] =
 648   { "lua", "LUA", NULL };
 649 static char Lua_help [] =
 650 "In Lua scripts, all functions are tags.";
 651
 652 static char *Makefile_filenames [] =
 653   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 654 static char Makefile_help [] =
 655 "In makefiles, targets are tags; additionally, variables are tags\n\
 656 unless you specify `--no-globals'.";
 657
 658 static char *Objc_suffixes [] =
 659   { "lm",                       /* Objective lex file */
 660     "m",                        /* Objective C file */
 661      NULL };
 662 static char Objc_help [] =
 663 "In Objective C code, tags include Objective C definitions for classes,\n\
 664 class categories, methods and protocols.  Tags for variables and\n\
 665 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 666 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 667
 668 static char *Pascal_suffixes [] =
 669   { "p", "pas", NULL };
 670 static char Pascal_help [] =
 671 "In Pascal code, the tags are the functions and procedures defined\n\
 672 in the file.";
 673 /* " // this is for working around an Emacs highlighting bug... */
 674
 675 static char *Perl_suffixes [] =
 676   { "pl", "pm", NULL };
 677 static char *Perl_interpreters [] =
 678   { "perl", "@PERL@", NULL };
 679 static char Perl_help [] =
 680 "In Perl code, the tags are the packages, subroutines and variables\n\
 681 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 682 `--globals' if you want to tag global variables.  Tags for\n\
 683 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 684 defined in the default package is `main::SUB'.";
 685
 686 static char *PHP_suffixes [] =
 687   { "php", "php3", "php4", NULL };
 688 static char PHP_help [] =
 689 "In PHP code, tags are functions, classes and defines.  When using\n\
 690 the `--members' option, vars are tags too.";
 691
 692 static char *plain_C_suffixes [] =
 693   { "pc",                       /* Pro*C file */
 694      NULL };
 695
 696 static char *PS_suffixes [] =
 697   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 698 static char PS_help [] =
 699 "In PostScript code, the tags are the functions.";
 700
 701 static char *Prolog_suffixes [] =
 702   { "prolog", NULL };
 703 static char Prolog_help [] =
 704 "In Prolog code, tags are predicates and rules at the beginning of\n\
 705 line.";
 706
 707 static char *Python_suffixes [] =
 708   { "py", NULL };
 709 static char Python_help [] =
 710 "In Python code, `def' or `class' at the beginning of a line\n\
 711 generate a tag.";
 712
 713 /* Can't do the `SCM' or `scm' prefix with a version number. */
 714 static char *Scheme_suffixes [] =
 715   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 716 static char Scheme_help [] =
 717 "In Scheme code, tags include anything defined with `def' or with a\n\
 718 construct whose name starts with `def'.  They also include\n\
 719 variables set with `set!' at top level in the file.";
 720
 721 static char *TeX_suffixes [] =
 722   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 723 static char TeX_help [] =
 724 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 725 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 726 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 727 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 728 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 729 \n\
 730 Other commands can be specified by setting the environment variable\n\
 731 `TEXTAGS' to a colon-separated list like, for example,\n\
 732      TEXTAGS=\"mycommand:myothercommand\".";
 733
 734
 735 static char *Texinfo_suffixes [] =
 736   { "texi", "texinfo", "txi", NULL };
 737 static char Texinfo_help [] =
 738 "for texinfo files, lines starting with @node are tagged.";
 739
 740 static char *Yacc_suffixes [] =
 741   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 742 static char Yacc_help [] =
 743 "In Bison or Yacc input files, each rule defines as a tag the\n\
 744 nonterminal it constructs.  The portions of the file that contain\n\
 745 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 746 for full help).";
 747
 748 static char auto_help [] =
 749 "`auto' is not a real language, it indicates to use\n\
 750 a default language for files base on file name suffix and file contents.";
 751
 752 static char none_help [] =
 753 "`none' is not a real language, it indicates to only do\n\
 754 regexp processing on files.";
 755
 756 static char no_lang_help [] =
 757 "No detailed help available for this language.";
 758
 759
 760 /*
 761  * Table of languages.
 762  *
 763  * It is ok for a given function to be listed under more than one
 764  * name.  I just didn't.
 765  */
 766
 767 static language lang_names [] =
 768 {
 769   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 770   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 771   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 772   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 773   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 774   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 775   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 776   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 777   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 778   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 779   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 780   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 781   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 782   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 783   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 784   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 785   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 786   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 787   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 788   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 789   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 790   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 791   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 792   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 793   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 794   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 795   { "auto",      auto_help },                      /* default guessing scheme */
 796   { "none",      none_help,      just_read_file }, /* regexp matching only */
 797   { NULL }                /* end of list */
 798 };
 799
 800 \f
 801 static void
 802 print_language_names ()
 803 {
 804   language *lang;
 805   char **name, **ext;
 806
 807   puts ("\nThese are the currently supported languages, along with the\n\
 808 default file names and dot suffixes:");
 809   for (lang = lang_names; lang->name != NULL; lang++)
 810     {
 811       printf ("  %-*s", 10, lang->name);
 812       if (lang->filenames != NULL)
 813         for (name = lang->filenames; *name != NULL; name++)
 814           printf (" %s", *name);
 815       if (lang->suffixes != NULL)
 816         for (ext = lang->suffixes; *ext != NULL; ext++)
 817           printf (" .%s", *ext);
 818       puts ("");
 819     }
 820   puts ("where `auto' means use default language for files based on file\n\
 821 name suffix, and `none' means only do regexp processing on files.\n\
 822 If no language is specified and no matching suffix is found,\n\
 823 the first line of the file is read for a sharp-bang (#!) sequence\n\
 824 followed by the name of an interpreter.  If no such sequence is found,\n\
 825 Fortran is tried first; if no tags are found, C is tried next.\n\
 826 When parsing any C file, a \"class\" or \"template\" keyword\n\
 827 switches to C++.");
 828   puts ("Compressed files are supported using gzip and bzip2.\n\
 829 \n\
 830 For detailed help on a given language use, for example,\n\
 831 etags --help --lang=ada.");
 832 }
 833
 834 #ifndef EMACS_NAME
 835 # define EMACS_NAME "standalone"
 836 #endif
 837 #ifndef VERSION
 838 # define VERSION "version"
 839 #endif
 840 static void
 841 print_version ()
 842 {
 843   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 844   puts ("Copyright (C) 2006 Free Software Foundation, Inc. and Ken Arnold");
 845   puts ("This program is distributed under the same terms as Emacs");
 846
 847   exit (EXIT_SUCCESS);
 848 }
 849
 850 static void
 851 print_help (argbuffer)
 852      argument *argbuffer;
 853 {
 854   bool help_for_lang = FALSE;
 855
 856   for (; argbuffer->arg_type != at_end; argbuffer++)
 857     if (argbuffer->arg_type == at_language)
 858       {
 859         if (help_for_lang)
 860           puts ("");
 861         puts (argbuffer->lang->help);
 862         help_for_lang = TRUE;
 863       }
 864
 865   if (help_for_lang)
 866     exit (EXIT_SUCCESS);
 867
 868   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 869 \n\
 870 These are the options accepted by %s.\n", progname, progname);
 871   if (NO_LONG_OPTIONS)
 872     puts ("WARNING: long option names do not work with this executable,\n\
 873 as it is not linked with GNU getopt.");
 874   else
 875     puts ("You may use unambiguous abbreviations for the long option names.");
 876   puts ("  A - as file name means read names from stdin (one per line).\n\
 877 Absolute names are stored in the output file as they are.\n\
 878 Relative ones are stored relative to the output file's directory.\n");
 879
 880   puts ("-a, --append\n\
 881         Append tag entries to existing tags file.");
 882
 883   puts ("--packages-only\n\
 884         For Ada files, only generate tags for packages.");
 885
 886   if (CTAGS)
 887     puts ("-B, --backward-search\n\
 888         Write the search commands for the tag entries using '?', the\n\
 889         backward-search command instead of '/', the forward-search command.");
 890
 891   /* This option is mostly obsolete, because etags can now automatically
 892      detect C++.  Retained for backward compatibility and for debugging and
 893      experimentation.  In principle, we could want to tag as C++ even
 894      before any "class" or "template" keyword.
 895   puts ("-C, --c++\n\
 896         Treat files whose name suffix defaults to C language as C++ files.");
 897   */
 898
 899   puts ("--declarations\n\
 900         In C and derived languages, create tags for function declarations,");
 901   if (CTAGS)
 902     puts ("\tand create tags for extern variables if --globals is used.");
 903   else
 904     puts
 905       ("\tand create tags for extern variables unless --no-globals is used.");
 906
 907   if (CTAGS)
 908     puts ("-d, --defines\n\
 909         Create tag entries for C #define constants and enum constants, too.");
 910   else
 911     puts ("-D, --no-defines\n\
 912         Don't create tag entries for C #define constants and enum constants.\n\
 913         This makes the tags file smaller.");
 914
 915   if (!CTAGS)
 916     puts ("-i FILE, --include=FILE\n\
 917         Include a note in tag file indicating that, when searching for\n\
 918         a tag, one should also consult the tags file FILE after\n\
 919         checking the current file.");
 920
 921   puts ("-l LANG, --language=LANG\n\
 922         Force the following files to be considered as written in the\n\
 923         named language up to the next --language=LANG option.");
 924
 925   if (CTAGS)
 926     puts ("--globals\n\
 927         Create tag entries for global variables in some languages.");
 928   else
 929     puts ("--no-globals\n\
 930         Do not create tag entries for global variables in some\n\
 931         languages.  This makes the tags file smaller.");
 932   puts ("--members\n\
 933         Create tag entries for members of structures in some languages.");
 934
 935   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 936         Make a tag for each line matching a regular expression pattern\n\
 937         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 938         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 939         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 940         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 941   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 942         For example Tcl named tags can be created with:\n\
 943           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 944         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 945         `m' means to allow multi-line matches, `s' implies `m' and\n\
 946         causes dot to match any character, including newline.");
 947   puts ("-R, --no-regex\n\
 948         Don't create tags from regexps for the following files.");
 949   puts ("-I, --ignore-indentation\n\
 950         In C and C++ do not assume that a closing brace in the first\n\
 951         column is the final brace of a function or structure definition.");
 952   puts ("-o FILE, --output=FILE\n\
 953         Write the tags to FILE.");
 954   puts ("--parse-stdin=NAME\n\
 955         Read from standard input and record tags as belonging to file NAME.");
 956
 957   if (CTAGS)
 958     {
 959       puts ("-t, --typedefs\n\
 960         Generate tag entries for C and Ada typedefs.");
 961       puts ("-T, --typedefs-and-c++\n\
 962         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 963         and C++ member functions.");
 964     }
 965
 966   if (CTAGS)
 967     puts ("-u, --update\n\
 968         Update the tag entries for the given files, leaving tag\n\
 969         entries for other files in place.  Currently, this is\n\
 970         implemented by deleting the existing entries for the given\n\
 971         files and then rewriting the new entries at the end of the\n\
 972         tags file.  It is often faster to simply rebuild the entire\n\
 973         tag file than to use this.");
 974
 975   if (CTAGS)
 976     {
 977       puts ("-v, --vgrind\n\
 978         Print on the standard output an index of items intended for\n\
 979         human consumption, similar to the output of vgrind.  The index\n\
 980         is sorted, and gives the page number of each item.");
 981 # if PRINT_UNDOCUMENTED_OPTIONS_HELP
 982       puts ("-w, --no-duplicates\n\
 983         Do not create duplicate tag entries, for compatibility with\n\
 984         traditional ctags.");
 985       puts ("-w, --no-warn\n\
 986         Suppress warning messages about duplicate tag entries.");
 987 # endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
 988       puts ("-x, --cxref\n\
 989         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 990         The output uses line numbers instead of page numbers, but\n\
 991         beyond that the differences are cosmetic; try both to see\n\
 992         which you like.");
 993     }
 994
 995   puts ("-V, --version\n\
 996         Print the version of the program.\n\
 997 -h, --help\n\
 998         Print this help message.\n\
 999         Followed by one or more `--language' options prints detailed\n\
1000         help about tag generation for the specified languages.");
1001
1002   print_language_names ();
1003
1004   puts ("");
1005   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1006
1007   exit (EXIT_SUCCESS);
1008 }
1009
1010 \f
1011 #ifdef VMS                      /* VMS specific functions */
1012
1013 #define EOS     '\0'
1014
1015 /* This is a BUG!  ANY arbitrary limit is a BUG!
1016    Won't someone please fix this?  */
1017 #define MAX_FILE_SPEC_LEN       255
1018 typedef struct  {
1019   short   curlen;
1020   char    body[MAX_FILE_SPEC_LEN + 1];
1021 } vspec;
1022
1023 /*
1024  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1025  returning in each successive call the next file name matching the input
1026  spec. The function expects that each in_spec passed
1027  to it will be processed to completion; in particular, up to and
1028  including the call following that in which the last matching name
1029  is returned, the function ignores the value of in_spec, and will
1030  only start processing a new spec with the following call.
1031  If an error occurs, on return out_spec contains the value
1032  of in_spec when the error occurred.
1033
1034  With each successive file name returned in out_spec, the
1035  function's return value is one. When there are no more matching
1036  names the function returns zero. If on the first call no file
1037  matches in_spec, or there is any other error, -1 is returned.
1038 */
1039
1040 #include        <rmsdef.h>
1041 #include        <descrip.h>
1042 #define         OUTSIZE MAX_FILE_SPEC_LEN
1043 static short
1044 fn_exp (out, in)
1045      vspec *out;
1046      char *in;
1047 {
1048   static long context = 0;
1049   static struct dsc$descriptor_s o;
1050   static struct dsc$descriptor_s i;
1051   static bool pass1 = TRUE;
1052   long status;
1053   short retval;
1054
1055   if (pass1)
1056     {
1057       pass1 = FALSE;
1058       o.dsc$a_pointer = (char *) out;
1059       o.dsc$w_length = (short)OUTSIZE;
1060       i.dsc$a_pointer = in;
1061       i.dsc$w_length = (short)strlen(in);
1062       i.dsc$b_dtype = DSC$K_DTYPE_T;
1063       i.dsc$b_class = DSC$K_CLASS_S;
1064       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1065       o.dsc$b_class = DSC$K_CLASS_VS;
1066     }
1067   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1068     {
1069       out->body[out->curlen] = EOS;
1070       return 1;
1071     }
1072   else if (status == RMS$_NMF)
1073     retval = 0;
1074   else
1075     {
1076       strcpy(out->body, in);
1077       retval = -1;
1078     }
1079   lib$find_file_end(&context);
1080   pass1 = TRUE;
1081   return retval;
1082 }
1083
1084 /*
1085   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1086   name of each file specified by the provided arg expanding wildcards.
1087 */
1088 static char *
1089 gfnames (arg, p_error)
1090      char *arg;
1091      bool *p_error;
1092 {
1093   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1094
1095   switch (fn_exp (&filename, arg))
1096     {
1097     case 1:
1098       *p_error = FALSE;
1099       return filename.body;
1100     case 0:
1101       *p_error = FALSE;
1102       return NULL;
1103     default:
1104       *p_error = TRUE;
1105       return filename.body;
1106     }
1107 }
1108
1109 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1110 system (cmd)
1111      char *cmd;
1112 {
1113   error ("%s", "system() function not implemented under VMS");
1114 }
1115 #endif
1116
1117 #define VERSION_DELIM   ';'
1118 char *massage_name (s)
1119      char *s;
1120 {
1121   char *start = s;
1122
1123   for ( ; *s; s++)
1124     if (*s == VERSION_DELIM)
1125       {
1126         *s = EOS;
1127         break;
1128       }
1129     else
1130       *s = lowcase (*s);
1131   return start;
1132 }
1133 #endif /* VMS */
1134
1135 \f
1136 int
1137 main (argc, argv)
1138      int argc;
1139      char *argv[];
1140 {
1141   int i;
1142   unsigned int nincluded_files;
1143   char **included_files;
1144   argument *argbuffer;
1145   int current_arg, file_count;
1146   linebuffer filename_lb;
1147   bool help_asked = FALSE;
1148 #ifdef VMS
1149   bool got_err;
1150 #endif
1151  char *optstring;
1152  int opt;
1153
1154
1155 #ifdef DOS_NT
1156   _fmode = O_BINARY;   /* all of files are treated as binary files */
1157 #endif /* DOS_NT */
1158
1159   progname = argv[0];
1160   nincluded_files = 0;
1161   included_files = xnew (argc, char *);
1162   current_arg = 0;
1163   file_count = 0;
1164
1165   /* Allocate enough no matter what happens.  Overkill, but each one
1166      is small. */
1167   argbuffer = xnew (argc, argument);
1168
1169   /*
1170    * If etags, always find typedefs and structure tags.  Why not?
1171    * Also default to find macro constants, enum constants and
1172    * global variables.
1173    */
1174   if (!CTAGS)
1175     {
1176       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1177       globals = TRUE;
1178     }
1179
1180   /* When the optstring begins with a '-' getopt_long does not rearrange the
1181      non-options arguments to be at the end, but leaves them alone. */
1182   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1183                       "ac:Cf:Il:o:r:RSVhH",
1184                       (CTAGS) ? "BxdtTuvw" : "Di:");
1185
1186   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1187     switch (opt)
1188       {
1189       case 0:
1190         /* If getopt returns 0, then it has already processed a
1191            long-named option.  We should do nothing.  */
1192         break;
1193
1194       case 1:
1195         /* This means that a file name has been seen.  Record it. */
1196         argbuffer[current_arg].arg_type = at_filename;
1197         argbuffer[current_arg].what     = optarg;
1198         ++current_arg;
1199         ++file_count;
1200         break;
1201
1202       case STDIN:
1203         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1204         argbuffer[current_arg].arg_type = at_stdin;
1205         argbuffer[current_arg].what     = optarg;
1206         ++current_arg;
1207         ++file_count;
1208         if (parsing_stdin)
1209           fatal ("cannot parse standard input more than once", (char *)NULL);
1210         parsing_stdin = TRUE;
1211         break;
1212
1213         /* Common options. */
1214       case 'a': append_to_tagfile = TRUE;       break;
1215       case 'C': cplusplus = TRUE;               break;
1216       case 'f':         /* for compatibility with old makefiles */
1217       case 'o':
1218         if (tagfile)
1219           {
1220             error ("-o option may only be given once.", (char *)NULL);
1221             suggest_asking_for_help ();
1222             /* NOTREACHED */
1223           }
1224         tagfile = optarg;
1225         break;
1226       case 'I':
1227       case 'S':         /* for backward compatibility */
1228         ignoreindent = TRUE;
1229         break;
1230       case 'l':
1231         {
1232           language *lang = get_language_from_langname (optarg);
1233           if (lang != NULL)
1234             {
1235               argbuffer[current_arg].lang = lang;
1236               argbuffer[current_arg].arg_type = at_language;
1237               ++current_arg;
1238             }
1239         }
1240         break;
1241       case 'c':
1242         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1243         optarg = concat (optarg, "i", ""); /* memory leak here */
1244         /* FALLTHRU */
1245       case 'r':
1246         argbuffer[current_arg].arg_type = at_regexp;
1247         argbuffer[current_arg].what = optarg;
1248         ++current_arg;
1249         break;
1250       case 'R':
1251         argbuffer[current_arg].arg_type = at_regexp;
1252         argbuffer[current_arg].what = NULL;
1253         ++current_arg;
1254         break;
1255       case 'V':
1256         print_version ();
1257         break;
1258       case 'h':
1259       case 'H':
1260         help_asked = TRUE;
1261         break;
1262
1263         /* Etags options */
1264       case 'D': constantypedefs = FALSE;                        break;
1265       case 'i': included_files[nincluded_files++] = optarg;     break;
1266
1267         /* Ctags options. */
1268       case 'B': searchar = '?';                                 break;
1269       case 'd': constantypedefs = TRUE;                         break;
1270       case 't': typedefs = TRUE;                                break;
1271       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1272       case 'u': update = TRUE;                                  break;
1273       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1274       case 'x': cxref_style = TRUE;                             break;
1275       case 'w': no_warnings = TRUE;                             break;
1276       default:
1277         suggest_asking_for_help ();
1278         /* NOTREACHED */
1279       }
1280
1281   /* No more options.  Store the rest of arguments. */
1282   for (; optind < argc; optind++)
1283     {
1284       argbuffer[current_arg].arg_type = at_filename;
1285       argbuffer[current_arg].what = argv[optind];
1286       ++current_arg;
1287       ++file_count;
1288     }
1289
1290   argbuffer[current_arg].arg_type = at_end;
1291
1292   if (help_asked)
1293     print_help (argbuffer);
1294     /* NOTREACHED */
1295
1296   if (nincluded_files == 0 && file_count == 0)
1297     {
1298       error ("no input files specified.", (char *)NULL);
1299       suggest_asking_for_help ();
1300       /* NOTREACHED */
1301     }
1302
1303   if (tagfile == NULL)
1304     tagfile = CTAGS ? "tags" : "TAGS";
1305   cwd = etags_getcwd ();        /* the current working directory */
1306   if (cwd[strlen (cwd) - 1] != '/')
1307     {
1308       char *oldcwd = cwd;
1309       cwd = concat (oldcwd, "/", "");
1310       free (oldcwd);
1311     }
1312   /* Relative file names are made relative to the current directory. */
1313   if (streq (tagfile, "-")
1314       || strneq (tagfile, "/dev/", 5))
1315     tagfiledir = cwd;
1316   else
1317     tagfiledir = absolute_dirname (tagfile, cwd);
1318
1319   init ();                      /* set up boolean "functions" */
1320
1321   linebuffer_init (&lb);
1322   linebuffer_init (&filename_lb);
1323   linebuffer_init (&filebuf);
1324   linebuffer_init (&token_name);
1325
1326   if (!CTAGS)
1327     {
1328       if (streq (tagfile, "-"))
1329         {
1330           tagf = stdout;
1331 #ifdef DOS_NT
1332           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1333              doesn't take effect until after `stdout' is already open). */
1334           if (!isatty (fileno (stdout)))
1335             setmode (fileno (stdout), O_BINARY);
1336 #endif /* DOS_NT */
1337         }
1338       else
1339         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1340       if (tagf == NULL)
1341         pfatal (tagfile);
1342     }
1343
1344   /*
1345    * Loop through files finding functions.
1346    */
1347   for (i = 0; i < current_arg; i++)
1348     {
1349       static language *lang;    /* non-NULL if language is forced */
1350       char *this_file;
1351
1352       switch (argbuffer[i].arg_type)
1353         {
1354         case at_language:
1355           lang = argbuffer[i].lang;
1356           break;
1357         case at_regexp:
1358           analyse_regex (argbuffer[i].what);
1359           break;
1360         case at_filename:
1361 #ifdef VMS
1362           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1363             {
1364               if (got_err)
1365                 {
1366                   error ("can't find file %s\n", this_file);
1367                   argc--, argv++;
1368                 }
1369               else
1370                 {
1371                   this_file = massage_name (this_file);
1372                 }
1373 #else
1374               this_file = argbuffer[i].what;
1375 #endif
1376               /* Input file named "-" means read file names from stdin
1377                  (one per line) and use them. */
1378               if (streq (this_file, "-"))
1379                 {
1380                   if (parsing_stdin)
1381                     fatal ("cannot parse standard input AND read file names from it",
1382                            (char *)NULL);
1383                   while (readline_internal (&filename_lb, stdin) > 0)
1384                     process_file_name (filename_lb.buffer, lang);
1385                 }
1386               else
1387                 process_file_name (this_file, lang);
1388 #ifdef VMS
1389             }
1390 #endif
1391           break;
1392         case at_stdin:
1393           this_file = argbuffer[i].what;
1394           process_file (stdin, this_file, lang);
1395           break;
1396         }
1397     }
1398
1399   free_regexps ();
1400   free (lb.buffer);
1401   free (filebuf.buffer);
1402   free (token_name.buffer);
1403
1404   if (!CTAGS || cxref_style)
1405     {
1406       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1407       put_entries (nodehead);
1408       free_tree (nodehead);
1409       nodehead = NULL;
1410       if (!CTAGS)
1411         {
1412           fdesc *fdp;
1413
1414           /* Output file entries that have no tags. */
1415           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1416             if (!fdp->written)
1417               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1418
1419           while (nincluded_files-- > 0)
1420             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1421
1422           if (fclose (tagf) == EOF)
1423             pfatal (tagfile);
1424         }
1425
1426       exit (EXIT_SUCCESS);
1427     }
1428
1429   if (update)
1430     {
1431       char cmd[BUFSIZ];
1432       for (i = 0; i < current_arg; ++i)
1433         {
1434           switch (argbuffer[i].arg_type)
1435             {
1436             case at_filename:
1437             case at_stdin:
1438               break;
1439             default:
1440               continue;         /* the for loop */
1441             }
1442           sprintf (cmd,
1443                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1444                    tagfile, argbuffer[i].what, tagfile);
1445           if (system (cmd) != EXIT_SUCCESS)
1446             fatal ("failed to execute shell command", (char *)NULL);
1447         }
1448       append_to_tagfile = TRUE;
1449     }
1450
1451   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1452   if (tagf == NULL)
1453     pfatal (tagfile);
1454   put_entries (nodehead);       /* write all the tags (CTAGS) */
1455   free_tree (nodehead);
1456   nodehead = NULL;
1457   if (fclose (tagf) == EOF)
1458     pfatal (tagfile);
1459
1460   if (CTAGS)
1461     if (append_to_tagfile || update)
1462       {
1463         char cmd[2*BUFSIZ+20];
1464         /* Maybe these should be used:
1465            setenv ("LC_COLLATE", "C", 1);
1466            setenv ("LC_ALL", "C", 1); */
1467         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1468         exit (system (cmd));
1469       }
1470   return EXIT_SUCCESS;
1471 }
1472
1473
1474 /*
1475  * Return a compressor given the file name.  If EXTPTR is non-zero,
1476  * return a pointer into FILE where the compressor-specific
1477  * extension begins.  If no compressor is found, NULL is returned
1478  * and EXTPTR is not significant.
1479  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1480  */
1481 static compressor *
1482 get_compressor_from_suffix (file, extptr)
1483      char *file;
1484      char **extptr;
1485 {
1486   compressor *compr;
1487   char *slash, *suffix;
1488
1489   /* This relies on FN to be after canonicalize_filename,
1490      so we don't need to consider backslashes on DOS_NT.  */
1491   slash = etags_strrchr (file, '/');
1492   suffix = etags_strrchr (file, '.');
1493   if (suffix == NULL || suffix < slash)
1494     return NULL;
1495   if (extptr != NULL)
1496     *extptr = suffix;
1497   suffix += 1;
1498   /* Let those poor souls who live with DOS 8+3 file name limits get
1499      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1500      Only the first do loop is run if not MSDOS */
1501   do
1502     {
1503       for (compr = compressors; compr->suffix != NULL; compr++)
1504         if (streq (compr->suffix, suffix))
1505           return compr;
1506       if (!MSDOS)
1507         break;                  /* do it only once: not really a loop */
1508       if (extptr != NULL)
1509         *extptr = ++suffix;
1510     } while (*suffix != '\0');
1511   return NULL;
1512 }
1513
1514
1515
1516 /*
1517  * Return a language given the name.
1518  */
1519 static language *
1520 get_language_from_langname (name)
1521      const char *name;
1522 {
1523   language *lang;
1524
1525   if (name == NULL)
1526     error ("empty language name", (char *)NULL);
1527   else
1528     {
1529       for (lang = lang_names; lang->name != NULL; lang++)
1530         if (streq (name, lang->name))
1531           return lang;
1532       error ("unknown language \"%s\"", name);
1533     }
1534
1535   return NULL;
1536 }
1537
1538
1539 /*
1540  * Return a language given the interpreter name.
1541  */
1542 static language *
1543 get_language_from_interpreter (interpreter)
1544      char *interpreter;
1545 {
1546   language *lang;
1547   char **iname;
1548
1549   if (interpreter == NULL)
1550     return NULL;
1551   for (lang = lang_names; lang->name != NULL; lang++)
1552     if (lang->interpreters != NULL)
1553       for (iname = lang->interpreters; *iname != NULL; iname++)
1554         if (streq (*iname, interpreter))
1555             return lang;
1556
1557   return NULL;
1558 }
1559
1560
1561
1562 /*
1563  * Return a language given the file name.
1564  */
1565 static language *
1566 get_language_from_filename (file, case_sensitive)
1567      char *file;
1568      bool case_sensitive;
1569 {
1570   language *lang;
1571   char **name, **ext, *suffix;
1572
1573   /* Try whole file name first. */
1574   for (lang = lang_names; lang->name != NULL; lang++)
1575     if (lang->filenames != NULL)
1576       for (name = lang->filenames; *name != NULL; name++)
1577         if ((case_sensitive)
1578             ? streq (*name, file)
1579             : strcaseeq (*name, file))
1580           return lang;
1581
1582   /* If not found, try suffix after last dot. */
1583   suffix = etags_strrchr (file, '.');
1584   if (suffix == NULL)
1585     return NULL;
1586   suffix += 1;
1587   for (lang = lang_names; lang->name != NULL; lang++)
1588     if (lang->suffixes != NULL)
1589       for (ext = lang->suffixes; *ext != NULL; ext++)
1590         if ((case_sensitive)
1591             ? streq (*ext, suffix)
1592             : strcaseeq (*ext, suffix))
1593           return lang;
1594   return NULL;
1595 }
1596
1597 \f
1598 /*
1599  * This routine is called on each file argument.
1600  */
1601 static void
1602 process_file_name (file, lang)
1603      char *file;
1604      language *lang;
1605 {
1606   struct stat stat_buf;
1607   FILE *inf;
1608   fdesc *fdp;
1609   compressor *compr;
1610   char *compressed_name, *uncompressed_name;
1611   char *ext, *real_name;
1612   int retval;
1613
1614   canonicalize_filename (file);
1615   if (streq (file, tagfile) && !streq (tagfile, "-"))
1616     {
1617       error ("skipping inclusion of %s in self.", file);
1618       return;
1619     }
1620   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1621     {
1622       compressed_name = NULL;
1623       real_name = uncompressed_name = savestr (file);
1624     }
1625   else
1626     {
1627       real_name = compressed_name = savestr (file);
1628       uncompressed_name = savenstr (file, ext - file);
1629     }
1630
1631   /* If the canonicalized uncompressed name
1632      has already been dealt with, skip it silently. */
1633   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1634     {
1635       assert (fdp->infname != NULL);
1636       if (streq (uncompressed_name, fdp->infname))
1637         goto cleanup;
1638     }
1639
1640   if (stat (real_name, &stat_buf) != 0)
1641     {
1642       /* Reset real_name and try with a different name. */
1643       real_name = NULL;
1644       if (compressed_name != NULL) /* try with the given suffix */
1645         {
1646           if (stat (uncompressed_name, &stat_buf) == 0)
1647             real_name = uncompressed_name;
1648         }
1649       else                      /* try all possible suffixes */
1650         {
1651           for (compr = compressors; compr->suffix != NULL; compr++)
1652             {
1653               compressed_name = concat (file, ".", compr->suffix);
1654               if (stat (compressed_name, &stat_buf) != 0)
1655                 {
1656                   if (MSDOS)
1657                     {
1658                       char *suf = compressed_name + strlen (file);
1659                       size_t suflen = strlen (compr->suffix) + 1;
1660                       for ( ; suf[1]; suf++, suflen--)
1661                         {
1662                           memmove (suf, suf + 1, suflen);
1663                           if (stat (compressed_name, &stat_buf) == 0)
1664                             {
1665                               real_name = compressed_name;
1666                               break;
1667                             }
1668                         }
1669                       if (real_name != NULL)
1670                         break;
1671                     } /* MSDOS */
1672                   free (compressed_name);
1673                   compressed_name = NULL;
1674                 }
1675               else
1676                 {
1677                   real_name = compressed_name;
1678                   break;
1679                 }
1680             }
1681         }
1682       if (real_name == NULL)
1683         {
1684           perror (file);
1685           goto cleanup;
1686         }
1687     } /* try with a different name */
1688
1689   if (!S_ISREG (stat_buf.st_mode))
1690     {
1691       error ("skipping %s: it is not a regular file.", real_name);
1692       goto cleanup;
1693     }
1694   if (real_name == compressed_name)
1695     {
1696       char *cmd = concat (compr->command, " ", real_name);
1697       inf = (FILE *) popen (cmd, "r");
1698       free (cmd);
1699     }
1700   else
1701     inf = fopen (real_name, "r");
1702   if (inf == NULL)
1703     {
1704       perror (real_name);
1705       goto cleanup;
1706     }
1707
1708   process_file (inf, uncompressed_name, lang);
1709
1710   if (real_name == compressed_name)
1711     retval = pclose (inf);
1712   else
1713     retval = fclose (inf);
1714   if (retval < 0)
1715     pfatal (file);
1716
1717  cleanup:
1718   if (compressed_name) free (compressed_name);
1719   if (uncompressed_name) free (uncompressed_name);
1720   last_node = NULL;
1721   curfdp = NULL;
1722   return;
1723 }
1724
1725 static void
1726 process_file (fh, fn, lang)
1727      FILE *fh;
1728      char *fn;
1729      language *lang;
1730 {
1731   static const fdesc emptyfdesc;
1732   fdesc *fdp;
1733
1734   /* Create a new input file description entry. */
1735   fdp = xnew (1, fdesc);
1736   *fdp = emptyfdesc;
1737   fdp->next = fdhead;
1738   fdp->infname = savestr (fn);
1739   fdp->lang = lang;
1740   fdp->infabsname = absolute_filename (fn, cwd);
1741   fdp->infabsdir = absolute_dirname (fn, cwd);
1742   if (filename_is_absolute (fn))
1743     {
1744       /* An absolute file name.  Canonicalize it. */
1745       fdp->taggedfname = absolute_filename (fn, NULL);
1746     }
1747   else
1748     {
1749       /* A file name relative to cwd.  Make it relative
1750          to the directory of the tags file. */
1751       fdp->taggedfname = relative_filename (fn, tagfiledir);
1752     }
1753   fdp->usecharno = TRUE;        /* use char position when making tags */
1754   fdp->prop = NULL;
1755   fdp->written = FALSE;         /* not written on tags file yet */
1756
1757   fdhead = fdp;
1758   curfdp = fdhead;              /* the current file description */
1759
1760   find_entries (fh);
1761
1762   /* If not Ctags, and if this is not metasource and if it contained no #line
1763      directives, we can write the tags and free all nodes pointing to
1764      curfdp. */
1765   if (!CTAGS
1766       && curfdp->usecharno      /* no #line directives in this file */
1767       && !curfdp->lang->metasource)
1768     {
1769       node *np, *prev;
1770
1771       /* Look for the head of the sublist relative to this file.  See add_node
1772          for the structure of the node tree. */
1773       prev = NULL;
1774       for (np = nodehead; np != NULL; prev = np, np = np->left)
1775         if (np->fdp == curfdp)
1776           break;
1777
1778       /* If we generated tags for this file, write and delete them. */
1779       if (np != NULL)
1780         {
1781           /* This is the head of the last sublist, if any.  The following
1782              instructions depend on this being true. */
1783           assert (np->left == NULL);
1784
1785           assert (fdhead == curfdp);
1786           assert (last_node->fdp == curfdp);
1787           put_entries (np);     /* write tags for file curfdp->taggedfname */
1788           free_tree (np);       /* remove the written nodes */
1789           if (prev == NULL)
1790             nodehead = NULL;    /* no nodes left */
1791           else
1792             prev->left = NULL;  /* delete the pointer to the sublist */
1793         }
1794     }
1795 }
1796
1797 /*
1798  * This routine sets up the boolean pseudo-functions which work
1799  * by setting boolean flags dependent upon the corresponding character.
1800  * Every char which is NOT in that string is not a white char.  Therefore,
1801  * all of the array "_wht" is set to FALSE, and then the elements
1802  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1803  * of a char is TRUE if it is the string "white", else FALSE.
1804  */
1805 static void
1806 init ()
1807 {
1808   register char *sp;
1809   register int i;
1810
1811   for (i = 0; i < CHARS; i++)
1812     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1813   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1814   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1815   notinname('\0') = notinname('\n');
1816   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1817   begtoken('\0') = begtoken('\n');
1818   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1819   intoken('\0') = intoken('\n');
1820   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1821   endtoken('\0') = endtoken('\n');
1822 }
1823
1824 /*
1825  * This routine opens the specified file and calls the function
1826  * which finds the function and type definitions.
1827  */
1828 static void
1829 find_entries (inf)
1830      FILE *inf;
1831 {
1832   char *cp;
1833   language *lang = curfdp->lang;
1834   Lang_function *parser = NULL;
1835
1836   /* If user specified a language, use it. */
1837   if (lang != NULL && lang->function != NULL)
1838     {
1839       parser = lang->function;
1840     }
1841
1842   /* Else try to guess the language given the file name. */
1843   if (parser == NULL)
1844     {
1845       lang = get_language_from_filename (curfdp->infname, TRUE);
1846       if (lang != NULL && lang->function != NULL)
1847         {
1848           curfdp->lang = lang;
1849           parser = lang->function;
1850         }
1851     }
1852
1853   /* Else look for sharp-bang as the first two characters. */
1854   if (parser == NULL
1855       && readline_internal (&lb, inf) > 0
1856       && lb.len >= 2
1857       && lb.buffer[0] == '#'
1858       && lb.buffer[1] == '!')
1859     {
1860       char *lp;
1861
1862       /* Set lp to point at the first char after the last slash in the
1863          line or, if no slashes, at the first nonblank.  Then set cp to
1864          the first successive blank and terminate the string. */
1865       lp = etags_strrchr (lb.buffer+2, '/');
1866       if (lp != NULL)
1867         lp += 1;
1868       else
1869         lp = skip_spaces (lb.buffer + 2);
1870       cp = skip_non_spaces (lp);
1871       *cp = '\0';
1872
1873       if (strlen (lp) > 0)
1874         {
1875           lang = get_language_from_interpreter (lp);
1876           if (lang != NULL && lang->function != NULL)
1877             {
1878               curfdp->lang = lang;
1879               parser = lang->function;
1880             }
1881         }
1882     }
1883
1884   /* We rewind here, even if inf may be a pipe.  We fail if the
1885      length of the first line is longer than the pipe block size,
1886      which is unlikely. */
1887   rewind (inf);
1888
1889   /* Else try to guess the language given the case insensitive file name. */
1890   if (parser == NULL)
1891     {
1892       lang = get_language_from_filename (curfdp->infname, FALSE);
1893       if (lang != NULL && lang->function != NULL)
1894         {
1895           curfdp->lang = lang;
1896           parser = lang->function;
1897         }
1898     }
1899
1900   /* Else try Fortran or C. */
1901   if (parser == NULL)
1902     {
1903       node *old_last_node = last_node;
1904
1905       curfdp->lang = get_language_from_langname ("fortran");
1906       find_entries (inf);
1907
1908       if (old_last_node == last_node)
1909         /* No Fortran entries found.  Try C. */
1910         {
1911           /* We do not tag if rewind fails.
1912              Only the file name will be recorded in the tags file. */
1913           rewind (inf);
1914           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1915           find_entries (inf);
1916         }
1917       return;
1918     }
1919
1920   if (!no_line_directive
1921       && curfdp->lang != NULL && curfdp->lang->metasource)
1922     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1923        file, or anyway we parsed a file that is automatically generated from
1924        this one.  If this is the case, the bingo.c file contained #line
1925        directives that generated tags pointing to this file.  Let's delete
1926        them all before parsing this file, which is the real source. */
1927     {
1928       fdesc **fdpp = &fdhead;
1929       while (*fdpp != NULL)
1930         if (*fdpp != curfdp
1931             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1932           /* We found one of those!  We must delete both the file description
1933              and all tags referring to it. */
1934           {
1935             fdesc *badfdp = *fdpp;
1936
1937             /* Delete the tags referring to badfdp->taggedfname
1938                that were obtained from badfdp->infname. */
1939             invalidate_nodes (badfdp, &nodehead);
1940
1941             *fdpp = badfdp->next; /* remove the bad description from the list */
1942             free_fdesc (badfdp);
1943           }
1944         else
1945           fdpp = &(*fdpp)->next; /* advance the list pointer */
1946     }
1947
1948   assert (parser != NULL);
1949
1950   /* Generic initialisations before reading from file. */
1951   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1952
1953   /* Generic initialisations before parsing file with readline. */
1954   lineno = 0;                  /* reset global line number */
1955   charno = 0;                  /* reset global char number */
1956   linecharno = 0;              /* reset global char number of line start */
1957
1958   parser (inf);
1959
1960   regex_tag_multiline ();
1961 }
1962
1963 \f
1964 /*
1965  * Check whether an implicitly named tag should be created,
1966  * then call `pfnote'.
1967  * NAME is a string that is internally copied by this function.
1968  *
1969  * TAGS format specification
1970  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1971  * The following is explained in some more detail in etc/ETAGS.EBNF.
1972  *
1973  * make_tag creates tags with "implicit tag names" (unnamed tags)
1974  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1975  *  1. NAME does not contain any of the characters in NONAM;
1976  *  2. LINESTART contains name as either a rightmost, or rightmost but
1977  *     one character, substring;
1978  *  3. the character, if any, immediately before NAME in LINESTART must
1979  *     be a character in NONAM;
1980  *  4. the character, if any, immediately after NAME in LINESTART must
1981  *     also be a character in NONAM.
1982  *
1983  * The implementation uses the notinname() macro, which recognises the
1984  * characters stored in the string `nonam'.
1985  * etags.el needs to use the same characters that are in NONAM.
1986  */
1987 static void
1988 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1989      char *name;                /* tag name, or NULL if unnamed */
1990      int namelen;               /* tag length */
1991      bool is_func;              /* tag is a function */
1992      char *linestart;           /* start of the line where tag is */
1993      int linelen;               /* length of the line where tag is */
1994      int lno;                   /* line number */
1995      long cno;                  /* character number */
1996 {
1997   bool named = (name != NULL && namelen > 0);
1998
1999   if (!CTAGS && named)          /* maybe set named to false */
2000     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2001        such that etags.el can guess a name from it. */
2002     {
2003       int i;
2004       register char *cp = name;
2005
2006       for (i = 0; i < namelen; i++)
2007         if (notinname (*cp++))
2008           break;
2009       if (i == namelen)                         /* rule #1 */
2010         {
2011           cp = linestart + linelen - namelen;
2012           if (notinname (linestart[linelen-1]))
2013             cp -= 1;                            /* rule #4 */
2014           if (cp >= linestart                   /* rule #2 */
2015               && (cp == linestart
2016                   || notinname (cp[-1]))        /* rule #3 */
2017               && strneq (name, cp, namelen))    /* rule #2 */
2018             named = FALSE;      /* use implicit tag name */
2019         }
2020     }
2021
2022   if (named)
2023     name = savenstr (name, namelen);
2024   else
2025     name = NULL;
2026   pfnote (name, is_func, linestart, linelen, lno, cno);
2027 }
2028
2029 /* Record a tag. */
2030 static void
2031 pfnote (name, is_func, linestart, linelen, lno, cno)
2032      char *name;                /* tag name, or NULL if unnamed */
2033      bool is_func;              /* tag is a function */
2034      char *linestart;           /* start of the line where tag is */
2035      int linelen;               /* length of the line where tag is */
2036      int lno;                   /* line number */
2037      long cno;                  /* character number */
2038 {
2039   register node *np;
2040
2041   assert (name == NULL || name[0] != '\0');
2042   if (CTAGS && name == NULL)
2043     return;
2044
2045   np = xnew (1, node);
2046
2047   /* If ctags mode, change name "main" to M<thisfilename>. */
2048   if (CTAGS && !cxref_style && streq (name, "main"))
2049     {
2050       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2051       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2052       fp = etags_strrchr (np->name, '.');
2053       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2054         fp[0] = '\0';
2055     }
2056   else
2057     np->name = name;
2058   np->valid = TRUE;
2059   np->been_warned = FALSE;
2060   np->fdp = curfdp;
2061   np->is_func = is_func;
2062   np->lno = lno;
2063   if (np->fdp->usecharno)
2064     /* Our char numbers are 0-base, because of C language tradition?
2065        ctags compatibility?  old versions compatibility?   I don't know.
2066        Anyway, since emacs's are 1-base we expect etags.el to take care
2067        of the difference.  If we wanted to have 1-based numbers, we would
2068        uncomment the +1 below. */
2069     np->cno = cno /* + 1 */ ;
2070   else
2071     np->cno = invalidcharno;
2072   np->left = np->right = NULL;
2073   if (CTAGS && !cxref_style)
2074     {
2075       if (strlen (linestart) < 50)
2076         np->regex = concat (linestart, "$", "");
2077       else
2078         np->regex = savenstr (linestart, 50);
2079     }
2080   else
2081     np->regex = savenstr (linestart, linelen);
2082
2083   add_node (np, &nodehead);
2084 }
2085
2086 /*
2087  * free_tree ()
2088  *      recurse on left children, iterate on right children.
2089  */
2090 static void
2091 free_tree (np)
2092      register node *np;
2093 {
2094   while (np)
2095     {
2096       register node *node_right = np->right;
2097       free_tree (np->left);
2098       if (np->name != NULL)
2099         free (np->name);
2100       free (np->regex);
2101       free (np);
2102       np = node_right;
2103     }
2104 }
2105
2106 /*
2107  * free_fdesc ()
2108  *      delete a file description
2109  */
2110 static void
2111 free_fdesc (fdp)
2112      register fdesc *fdp;
2113 {
2114   if (fdp->infname != NULL) free (fdp->infname);
2115   if (fdp->infabsname != NULL) free (fdp->infabsname);
2116   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2117   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2118   if (fdp->prop != NULL) free (fdp->prop);
2119   free (fdp);
2120 }
2121
2122 /*
2123  * add_node ()
2124  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2125  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2126  *      balancing.
2127  *
2128  *      add_node is the only function allowed to add nodes, so it can
2129  *      maintain state.
2130  */
2131 static void
2132 add_node (np, cur_node_p)
2133      node *np, **cur_node_p;
2134 {
2135   register int dif;
2136   register node *cur_node = *cur_node_p;
2137
2138   if (cur_node == NULL)
2139     {
2140       *cur_node_p = np;
2141       last_node = np;
2142       return;
2143     }
2144
2145   if (!CTAGS)
2146     /* Etags Mode */
2147     {
2148       /* For each file name, tags are in a linked sublist on the right
2149          pointer.  The first tags of different files are a linked list
2150          on the left pointer.  last_node points to the end of the last
2151          used sublist. */
2152       if (last_node != NULL && last_node->fdp == np->fdp)
2153         {
2154           /* Let's use the same sublist as the last added node. */
2155           assert (last_node->right == NULL);
2156           last_node->right = np;
2157           last_node = np;
2158         }
2159       else if (cur_node->fdp == np->fdp)
2160         {
2161           /* Scanning the list we found the head of a sublist which is
2162              good for us.  Let's scan this sublist. */
2163           add_node (np, &cur_node->right);
2164         }
2165       else
2166         /* The head of this sublist is not good for us.  Let's try the
2167            next one. */
2168         add_node (np, &cur_node->left);
2169     } /* if ETAGS mode */
2170
2171   else
2172     {
2173       /* Ctags Mode */
2174       dif = strcmp (np->name, cur_node->name);
2175
2176       /*
2177        * If this tag name matches an existing one, then
2178        * do not add the node, but maybe print a warning.
2179        */
2180       if (no_duplicates && !dif)
2181         {
2182           if (np->fdp == cur_node->fdp)
2183             {
2184               if (!no_warnings)
2185                 {
2186                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2187                            np->fdp->infname, lineno, np->name);
2188                   fprintf (stderr, "Second entry ignored\n");
2189                 }
2190             }
2191           else if (!cur_node->been_warned && !no_warnings)
2192             {
2193               fprintf
2194                 (stderr,
2195                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2196                  np->fdp->infname, cur_node->fdp->infname, np->name);
2197               cur_node->been_warned = TRUE;
2198             }
2199           return;
2200         }
2201
2202       /* Actually add the node */
2203       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2204     } /* if CTAGS mode */
2205 }
2206
2207 /*
2208  * invalidate_nodes ()
2209  *      Scan the node tree and invalidate all nodes pointing to the
2210  *      given file description (CTAGS case) or free them (ETAGS case).
2211  */
2212 static void
2213 invalidate_nodes (badfdp, npp)
2214      fdesc *badfdp;
2215      node **npp;
2216 {
2217   node *np = *npp;
2218
2219   if (np == NULL)
2220     return;
2221
2222   if (CTAGS)
2223     {
2224       if (np->left != NULL)
2225         invalidate_nodes (badfdp, &np->left);
2226       if (np->fdp == badfdp)
2227         np->valid = FALSE;
2228       if (np->right != NULL)
2229         invalidate_nodes (badfdp, &np->right);
2230     }
2231   else
2232     {
2233       assert (np->fdp != NULL);
2234       if (np->fdp == badfdp)
2235         {
2236           *npp = np->left;      /* detach the sublist from the list */
2237           np->left = NULL;      /* isolate it */
2238           free_tree (np);       /* free it */
2239           invalidate_nodes (badfdp, npp);
2240         }
2241       else
2242         invalidate_nodes (badfdp, &np->left);
2243     }
2244 }
2245
2246 \f
2247 static int total_size_of_entries __P((node *));
2248 static int number_len __P((long));
2249
2250 /* Length of a non-negative number's decimal representation. */
2251 static int
2252 number_len (num)
2253      long num;
2254 {
2255   int len = 1;
2256   while ((num /= 10) > 0)
2257     len += 1;
2258   return len;
2259 }
2260
2261 /*
2262  * Return total number of characters that put_entries will output for
2263  * the nodes in the linked list at the right of the specified node.
2264  * This count is irrelevant with etags.el since emacs 19.34 at least,
2265  * but is still supplied for backward compatibility.
2266  */
2267 static int
2268 total_size_of_entries (np)
2269      register node *np;
2270 {
2271   register int total = 0;
2272
2273   for (; np != NULL; np = np->right)
2274     if (np->valid)
2275       {
2276         total += strlen (np->regex) + 1;                /* pat\177 */
2277         if (np->name != NULL)
2278           total += strlen (np->name) + 1;               /* name\001 */
2279         total += number_len ((long) np->lno) + 1;       /* lno, */
2280         if (np->cno != invalidcharno)                   /* cno */
2281           total += number_len (np->cno);
2282         total += 1;                                     /* newline */
2283       }
2284
2285   return total;
2286 }
2287
2288 static void
2289 put_entries (np)
2290      register node *np;
2291 {
2292   register char *sp;
2293   static fdesc *fdp = NULL;
2294
2295   if (np == NULL)
2296     return;
2297
2298   /* Output subentries that precede this one */
2299   if (CTAGS)
2300     put_entries (np->left);
2301
2302   /* Output this entry */
2303   if (np->valid)
2304     {
2305       if (!CTAGS)
2306         {
2307           /* Etags mode */
2308           if (fdp != np->fdp)
2309             {
2310               fdp = np->fdp;
2311               fprintf (tagf, "\f\n%s,%d\n",
2312                        fdp->taggedfname, total_size_of_entries (np));
2313               fdp->written = TRUE;
2314             }
2315           fputs (np->regex, tagf);
2316           fputc ('\177', tagf);
2317           if (np->name != NULL)
2318             {
2319               fputs (np->name, tagf);
2320               fputc ('\001', tagf);
2321             }
2322           fprintf (tagf, "%d,", np->lno);
2323           if (np->cno != invalidcharno)
2324             fprintf (tagf, "%ld", np->cno);
2325           fputs ("\n", tagf);
2326         }
2327       else
2328         {
2329           /* Ctags mode */
2330           if (np->name == NULL)
2331             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2332
2333           if (cxref_style)
2334             {
2335               if (vgrind_style)
2336                 fprintf (stdout, "%s %s %d\n",
2337                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2338               else
2339                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2340                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2341             }
2342           else
2343             {
2344               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2345
2346               if (np->is_func)
2347                 {               /* function or #define macro with args */
2348                   putc (searchar, tagf);
2349                   putc ('^', tagf);
2350
2351                   for (sp = np->regex; *sp; sp++)
2352                     {
2353                       if (*sp == '\\' || *sp == searchar)
2354                         putc ('\\', tagf);
2355                       putc (*sp, tagf);
2356                     }
2357                   putc (searchar, tagf);
2358                 }
2359               else
2360                 {               /* anything else; text pattern inadequate */
2361                   fprintf (tagf, "%d", np->lno);
2362                 }
2363               putc ('\n', tagf);
2364             }
2365         }
2366     } /* if this node contains a valid tag */
2367
2368   /* Output subentries that follow this one */
2369   put_entries (np->right);
2370   if (!CTAGS)
2371     put_entries (np->left);
2372 }
2373
2374 \f
2375 /* C extensions. */
2376 #define C_EXT   0x00fff         /* C extensions */
2377 #define C_PLAIN 0x00000         /* C */
2378 #define C_PLPL  0x00001         /* C++ */
2379 #define C_STAR  0x00003         /* C* */
2380 #define C_JAVA  0x00005         /* JAVA */
2381 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2382 #define YACC    0x10000         /* yacc file */
2383
2384 /*
2385  * The C symbol tables.
2386  */
2387 enum sym_type
2388 {
2389   st_none,
2390   st_C_objprot, st_C_objimpl, st_C_objend,
2391   st_C_gnumacro,
2392   st_C_ignore, st_C_attribute,
2393   st_C_javastruct,
2394   st_C_operator,
2395   st_C_class, st_C_template,
2396   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2397 };
2398
2399 static unsigned int hash __P((const char *, unsigned int));
2400 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2401 static enum sym_type C_symtype __P((char *, int, int));
2402
2403 /* Feed stuff between (but not including) %[ and %] lines to:
2404      gperf -m 5
2405 %[
2406 %compare-strncmp
2407 %enum
2408 %struct-type
2409 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2410 %%
2411 if,             0,                      st_C_ignore
2412 for,            0,                      st_C_ignore
2413 while,          0,                      st_C_ignore
2414 switch,         0,                      st_C_ignore
2415 return,         0,                      st_C_ignore
2416 __attribute__,  0,                      st_C_attribute
2417 @interface,     0,                      st_C_objprot
2418 @protocol,      0,                      st_C_objprot
2419 @implementation,0,                      st_C_objimpl
2420 @end,           0,                      st_C_objend
2421 import,         (C_JAVA & !C_PLPL),     st_C_ignore
2422 package,        (C_JAVA & !C_PLPL),     st_C_ignore
2423 friend,         C_PLPL,                 st_C_ignore
2424 extends,        (C_JAVA & !C_PLPL),     st_C_javastruct
2425 implements,     (C_JAVA & !C_PLPL),     st_C_javastruct
2426 interface,      (C_JAVA & !C_PLPL),     st_C_struct
2427 class,          0,                      st_C_class
2428 namespace,      C_PLPL,                 st_C_struct
2429 domain,         C_STAR,                 st_C_struct
2430 union,          0,                      st_C_struct
2431 struct,         0,                      st_C_struct
2432 extern,         0,                      st_C_extern
2433 enum,           0,                      st_C_enum
2434 typedef,        0,                      st_C_typedef
2435 define,         0,                      st_C_define
2436 undef,          0,                      st_C_define
2437 operator,       C_PLPL,                 st_C_operator
2438 template,       0,                      st_C_template
2439 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2440 DEFUN,          0,                      st_C_gnumacro
2441 SYSCALL,        0,                      st_C_gnumacro
2442 ENTRY,          0,                      st_C_gnumacro
2443 PSEUDO,         0,                      st_C_gnumacro
2444 # These are defined inside C functions, so currently they are not met.
2445 # EXFUN used in glibc, DEFVAR_* in emacs.
2446 #EXFUN,         0,                      st_C_gnumacro
2447 #DEFVAR_,       0,                      st_C_gnumacro
2448 %]
2449 and replace lines between %< and %> with its output, then:
2450  - remove the #if characterset check
2451  - make in_word_set static and not inline. */
2452 /*%<*/
2453 /* C code produced by gperf version 3.0.1 */
2454 /* Command-line: gperf -m 5  */
2455 /* Computed positions: -k'2-3' */
2456
2457 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2458 /* maximum key range = 33, duplicates = 0 */
2459
2460 #ifdef __GNUC__
2461 __inline
2462 #else
2463 #ifdef __cplusplus
2464 inline
2465 #endif
2466 #endif
2467 static unsigned int
2468 hash (str, len)
2469      register const char *str;
2470      register unsigned int len;
2471 {
2472   static unsigned char asso_values[] =
2473     {
2474       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2475       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2476       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2477       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2478       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2479       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2480       35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2481       14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2482       35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2483       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2484       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2485        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2486        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2487       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2488       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2489       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2490       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2491       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2492       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2493       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2494       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2495       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2496       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2497       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2498       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2499       35, 35, 35, 35, 35, 35
2500     };
2501   register int hval = len;
2502
2503   switch (hval)
2504     {
2505       default:
2506         hval += asso_values[(unsigned char)str[2]];
2507       /*FALLTHROUGH*/
2508       case 2:
2509         hval += asso_values[(unsigned char)str[1]];
2510         break;
2511     }
2512   return hval;
2513 }
2514
2515 static struct C_stab_entry *
2516 in_word_set (str, len)
2517      register const char *str;
2518      register unsigned int len;
2519 {
2520   enum
2521     {
2522       TOTAL_KEYWORDS = 32,
2523       MIN_WORD_LENGTH = 2,
2524       MAX_WORD_LENGTH = 15,
2525       MIN_HASH_VALUE = 2,
2526       MAX_HASH_VALUE = 34
2527     };
2528
2529   static struct C_stab_entry wordlist[] =
2530     {
2531       {""}, {""},
2532       {"if",            0,                      st_C_ignore},
2533       {""},
2534       {"@end",          0,                      st_C_objend},
2535       {"union",         0,                      st_C_struct},
2536       {"define",                0,                      st_C_define},
2537       {"import",                (C_JAVA & !C_PLPL),     st_C_ignore},
2538       {"template",      0,                      st_C_template},
2539       {"operator",      C_PLPL,                 st_C_operator},
2540       {"@interface",    0,                      st_C_objprot},
2541       {"implements",    (C_JAVA & !C_PLPL),     st_C_javastruct},
2542       {"friend",                C_PLPL,                 st_C_ignore},
2543       {"typedef",       0,                      st_C_typedef},
2544       {"return",                0,                      st_C_ignore},
2545       {"@implementation",0,                     st_C_objimpl},
2546       {"@protocol",     0,                      st_C_objprot},
2547       {"interface",     (C_JAVA & !C_PLPL),     st_C_struct},
2548       {"extern",                0,                      st_C_extern},
2549       {"extends",       (C_JAVA & !C_PLPL),     st_C_javastruct},
2550       {"struct",                0,                      st_C_struct},
2551       {"domain",                C_STAR,                 st_C_struct},
2552       {"switch",                0,                      st_C_ignore},
2553       {"enum",          0,                      st_C_enum},
2554       {"for",           0,                      st_C_ignore},
2555       {"namespace",     C_PLPL,                 st_C_struct},
2556       {"class",         0,                      st_C_class},
2557       {"while",         0,                      st_C_ignore},
2558       {"undef",         0,                      st_C_define},
2559       {"package",       (C_JAVA & !C_PLPL),     st_C_ignore},
2560       {"__attribute__", 0,                      st_C_attribute},
2561       {"SYSCALL",       0,                      st_C_gnumacro},
2562       {"ENTRY",         0,                      st_C_gnumacro},
2563       {"PSEUDO",                0,                      st_C_gnumacro},
2564       {"DEFUN",         0,                      st_C_gnumacro}
2565     };
2566
2567   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2568     {
2569       register int key = hash (str, len);
2570
2571       if (key <= MAX_HASH_VALUE && key >= 0)
2572         {
2573           register const char *s = wordlist[key].name;
2574
2575           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2576             return &wordlist[key];
2577         }
2578     }
2579   return 0;
2580 }
2581 /*%>*/
2582
2583 static enum sym_type
2584 C_symtype (str, len, c_ext)
2585      char *str;
2586      int len;
2587      int c_ext;
2588 {
2589   register struct C_stab_entry *se = in_word_set (str, len);
2590
2591   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2592     return st_none;
2593   return se->type;
2594 }
2595
2596 \f
2597 /*
2598  * Ignoring __attribute__ ((list))
2599  */
2600 static bool inattribute;        /* looking at an __attribute__ construct */
2601
2602 /*
2603  * C functions and variables are recognized using a simple
2604  * finite automaton.  fvdef is its state variable.
2605  */
2606 static enum
2607 {
2608   fvnone,                       /* nothing seen */
2609   fdefunkey,                    /* Emacs DEFUN keyword seen */
2610   fdefunname,                   /* Emacs DEFUN name seen */
2611   foperator,                    /* func: operator keyword seen (cplpl) */
2612   fvnameseen,                   /* function or variable name seen */
2613   fstartlist,                   /* func: just after open parenthesis */
2614   finlist,                      /* func: in parameter list */
2615   flistseen,                    /* func: after parameter list */
2616   fignore,                      /* func: before open brace */
2617   vignore                       /* var-like: ignore until ';' */
2618 } fvdef;
2619
2620 static bool fvextern;           /* func or var: extern keyword seen; */
2621
2622 /*
2623  * typedefs are recognized using a simple finite automaton.
2624  * typdef is its state variable.
2625  */
2626 static enum
2627 {
2628   tnone,                        /* nothing seen */
2629   tkeyseen,                     /* typedef keyword seen */
2630   ttypeseen,                    /* defined type seen */
2631   tinbody,                      /* inside typedef body */
2632   tend,                         /* just before typedef tag */
2633   tignore                       /* junk after typedef tag */
2634 } typdef;
2635
2636 /*
2637  * struct-like structures (enum, struct and union) are recognized
2638  * using another simple finite automaton.  `structdef' is its state
2639  * variable.
2640  */
2641 static enum
2642 {
2643   snone,                        /* nothing seen yet,
2644                                    or in struct body if bracelev > 0 */
2645   skeyseen,                     /* struct-like keyword seen */
2646   stagseen,                     /* struct-like tag seen */
2647   scolonseen                    /* colon seen after struct-like tag */
2648 } structdef;
2649
2650 /*
2651  * When objdef is different from onone, objtag is the name of the class.
2652  */
2653 static char *objtag = "<uninited>";
2654
2655 /*
2656  * Yet another little state machine to deal with preprocessor lines.
2657  */
2658 static enum
2659 {
2660   dnone,                        /* nothing seen */
2661   dsharpseen,                   /* '#' seen as first char on line */
2662   ddefineseen,                  /* '#' and 'define' seen */
2663   dignorerest                   /* ignore rest of line */
2664 } definedef;
2665
2666 /*
2667  * State machine for Objective C protocols and implementations.
2668  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2669  */
2670 static enum
2671 {
2672   onone,                        /* nothing seen */
2673   oprotocol,                    /* @interface or @protocol seen */
2674   oimplementation,              /* @implementations seen */
2675   otagseen,                     /* class name seen */
2676   oparenseen,                   /* parenthesis before category seen */
2677   ocatseen,                     /* category name seen */
2678   oinbody,                      /* in @implementation body */
2679   omethodsign,                  /* in @implementation body, after +/- */
2680   omethodtag,                   /* after method name */
2681   omethodcolon,                 /* after method colon */
2682   omethodparm,                  /* after method parameter */
2683   oignore                       /* wait for @end */
2684 } objdef;
2685
2686
2687 /*
2688  * Use this structure to keep info about the token read, and how it
2689  * should be tagged.  Used by the make_C_tag function to build a tag.
2690  */
2691 static struct tok
2692 {
2693   char *line;                   /* string containing the token */
2694   int offset;                   /* where the token starts in LINE */
2695   int length;                   /* token length */
2696   /*
2697     The previous members can be used to pass strings around for generic
2698     purposes.  The following ones specifically refer to creating tags.  In this
2699     case the token contained here is the pattern that will be used to create a
2700     tag.
2701   */
2702   bool valid;                   /* do not create a tag; the token should be
2703                                    invalidated whenever a state machine is
2704                                    reset prematurely */
2705   bool named;                   /* create a named tag */
2706   int lineno;                   /* source line number of tag */
2707   long linepos;                 /* source char number of tag */
2708 } token;                        /* latest token read */
2709
2710 /*
2711  * Variables and functions for dealing with nested structures.
2712  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2713  */
2714 static void pushclass_above __P((int, char *, int));
2715 static void popclass_above __P((int));
2716 static void write_classname __P((linebuffer *, char *qualifier));
2717
2718 static struct {
2719   char **cname;                 /* nested class names */
2720   int *bracelev;                /* nested class brace level */
2721   int nl;                       /* class nesting level (elements used) */
2722   int size;                     /* length of the array */
2723 } cstack;                       /* stack for nested declaration tags */
2724 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2725 #define nestlev         (cstack.nl)
2726 /* After struct keyword or in struct body, not inside a nested function. */
2727 #define instruct        (structdef == snone && nestlev > 0                      \
2728                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2729
2730 static void
2731 pushclass_above (bracelev, str, len)
2732      int bracelev;
2733      char *str;
2734      int len;
2735 {
2736   int nl;
2737
2738   popclass_above (bracelev);
2739   nl = cstack.nl;
2740   if (nl >= cstack.size)
2741     {
2742       int size = cstack.size *= 2;
2743       xrnew (cstack.cname, size, char *);
2744       xrnew (cstack.bracelev, size, int);
2745     }
2746   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2747   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2748   cstack.bracelev[nl] = bracelev;
2749   cstack.nl = nl + 1;
2750 }
2751
2752 static void
2753 popclass_above (bracelev)
2754      int bracelev;
2755 {
2756   int nl;
2757
2758   for (nl = cstack.nl - 1;
2759        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2760        nl--)
2761     {
2762       if (cstack.cname[nl] != NULL)
2763         free (cstack.cname[nl]);
2764       cstack.nl = nl;
2765     }
2766 }
2767
2768 static void
2769 write_classname (cn, qualifier)
2770      linebuffer *cn;
2771      char *qualifier;
2772 {
2773   int i, len;
2774   int qlen = strlen (qualifier);
2775
2776   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2777     {
2778       len = 0;
2779       cn->len = 0;
2780       cn->buffer[0] = '\0';
2781     }
2782   else
2783     {
2784       len = strlen (cstack.cname[0]);
2785       linebuffer_setlen (cn, len);
2786       strcpy (cn->buffer, cstack.cname[0]);
2787     }
2788   for (i = 1; i < cstack.nl; i++)
2789     {
2790       char *s;
2791       int slen;
2792
2793       s = cstack.cname[i];
2794       if (s == NULL)
2795         continue;
2796       slen = strlen (s);
2797       len += slen + qlen;
2798       linebuffer_setlen (cn, len);
2799       strncat (cn->buffer, qualifier, qlen);
2800       strncat (cn->buffer, s, slen);
2801     }
2802 }
2803
2804 \f
2805 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2806 static void make_C_tag __P((bool));
2807
2808 /*
2809  * consider_token ()
2810  *      checks to see if the current token is at the start of a
2811  *      function or variable, or corresponds to a typedef, or
2812  *      is a struct/union/enum tag, or #define, or an enum constant.
2813  *
2814  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2815  *      with args.  C_EXTP points to which language we are looking at.
2816  *
2817  * Globals
2818  *      fvdef                   IN OUT
2819  *      structdef               IN OUT
2820  *      definedef               IN OUT
2821  *      typdef                  IN OUT
2822  *      objdef                  IN OUT
2823  */
2824
2825 static bool
2826 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2827      register char *str;        /* IN: token pointer */
2828      register int len;          /* IN: token length */
2829      register int c;            /* IN: first char after the token */
2830      int *c_extp;               /* IN, OUT: C extensions mask */
2831      int bracelev;              /* IN: brace level */
2832      int parlev;                /* IN: parenthesis level */
2833      bool *is_func_or_var;      /* OUT: function or variable found */
2834 {
2835   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2836      structtype is the type of the preceding struct-like keyword, and
2837      structbracelev is the brace level where it has been seen. */
2838   static enum sym_type structtype;
2839   static int structbracelev;
2840   static enum sym_type toktype;
2841
2842
2843   toktype = C_symtype (str, len, *c_extp);
2844
2845   /*
2846    * Skip __attribute__
2847    */
2848   if (toktype == st_C_attribute)
2849     {
2850       inattribute = TRUE;
2851       return FALSE;
2852      }
2853
2854    /*
2855     * Advance the definedef state machine.
2856     */
2857    switch (definedef)
2858      {
2859      case dnone:
2860        /* We're not on a preprocessor line. */
2861        if (toktype == st_C_gnumacro)
2862          {
2863            fvdef = fdefunkey;
2864            return FALSE;
2865          }
2866        break;
2867      case dsharpseen:
2868        if (toktype == st_C_define)
2869          {
2870            definedef = ddefineseen;
2871          }
2872        else
2873          {
2874            definedef = dignorerest;
2875          }
2876        return FALSE;
2877      case ddefineseen:
2878        /*
2879         * Make a tag for any macro, unless it is a constant
2880         * and constantypedefs is FALSE.
2881         */
2882        definedef = dignorerest;
2883        *is_func_or_var = (c == '(');
2884        if (!*is_func_or_var && !constantypedefs)
2885          return FALSE;
2886        else
2887          return TRUE;
2888      case dignorerest:
2889        return FALSE;
2890      default:
2891        error ("internal error: definedef value.", (char *)NULL);
2892      }
2893
2894    /*
2895     * Now typedefs
2896     */
2897    switch (typdef)
2898      {
2899      case tnone:
2900        if (toktype == st_C_typedef)
2901          {
2902            if (typedefs)
2903              typdef = tkeyseen;
2904            fvextern = FALSE;
2905            fvdef = fvnone;
2906            return FALSE;
2907          }
2908        break;
2909      case tkeyseen:
2910        switch (toktype)
2911          {
2912          case st_none:
2913          case st_C_class:
2914          case st_C_struct:
2915          case st_C_enum:
2916            typdef = ttypeseen;
2917          }
2918        break;
2919      case ttypeseen:
2920        if (structdef == snone && fvdef == fvnone)
2921          {
2922            fvdef = fvnameseen;
2923            return TRUE;
2924          }
2925        break;
2926      case tend:
2927        switch (toktype)
2928          {
2929          case st_C_class:
2930          case st_C_struct:
2931          case st_C_enum:
2932            return FALSE;
2933          }
2934        return TRUE;
2935      }
2936
2937    /*
2938     * This structdef business is NOT invoked when we are ctags and the
2939     * file is plain C.  This is because a struct tag may have the same
2940     * name as another tag, and this loses with ctags.
2941     */
2942    switch (toktype)
2943      {
2944      case st_C_javastruct:
2945        if (structdef == stagseen)
2946          structdef = scolonseen;
2947        return FALSE;
2948      case st_C_template:
2949      case st_C_class:
2950        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2951            && bracelev == 0
2952            && definedef == dnone && structdef == snone
2953            && typdef == tnone && fvdef == fvnone)
2954          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2955        if (toktype == st_C_template)
2956          break;
2957        /* FALLTHRU */
2958      case st_C_struct:
2959      case st_C_enum:
2960        if (parlev == 0
2961            && fvdef != vignore
2962            && (typdef == tkeyseen
2963                || (typedefs_or_cplusplus && structdef == snone)))
2964          {
2965            structdef = skeyseen;
2966            structtype = toktype;
2967            structbracelev = bracelev;
2968            if (fvdef == fvnameseen)
2969              fvdef = fvnone;
2970          }
2971        return FALSE;
2972      }
2973
2974    if (structdef == skeyseen)
2975      {
2976        structdef = stagseen;
2977        return TRUE;
2978      }
2979
2980    if (typdef != tnone)
2981      definedef = dnone;
2982
2983    /* Detect Objective C constructs. */
2984    switch (objdef)
2985      {
2986      case onone:
2987        switch (toktype)
2988          {
2989          case st_C_objprot:
2990            objdef = oprotocol;
2991            return FALSE;
2992          case st_C_objimpl:
2993            objdef = oimplementation;
2994            return FALSE;
2995          }
2996        break;
2997      case oimplementation:
2998        /* Save the class tag for functions or variables defined inside. */
2999        objtag = savenstr (str, len);
3000        objdef = oinbody;
3001        return FALSE;
3002      case oprotocol:
3003        /* Save the class tag for categories. */
3004        objtag = savenstr (str, len);
3005        objdef = otagseen;
3006        *is_func_or_var = TRUE;
3007        return TRUE;
3008      case oparenseen:
3009        objdef = ocatseen;
3010        *is_func_or_var = TRUE;
3011        return TRUE;
3012      case oinbody:
3013        break;
3014      case omethodsign:
3015        if (parlev == 0)
3016          {
3017            fvdef = fvnone;
3018            objdef = omethodtag;
3019            linebuffer_setlen (&token_name, len);
3020            strncpy (token_name.buffer, str, len);
3021            token_name.buffer[len] = '\0';
3022            return TRUE;
3023          }
3024        return FALSE;
3025      case omethodcolon:
3026        if (parlev == 0)
3027          objdef = omethodparm;
3028        return FALSE;
3029      case omethodparm:
3030        if (parlev == 0)
3031          {
3032            fvdef = fvnone;
3033            objdef = omethodtag;
3034            linebuffer_setlen (&token_name, token_name.len + len);
3035            strncat (token_name.buffer, str, len);
3036            return TRUE;
3037          }
3038        return FALSE;
3039      case oignore:
3040        if (toktype == st_C_objend)
3041          {
3042            /* Memory leakage here: the string pointed by objtag is
3043               never released, because many tests would be needed to
3044               avoid breaking on incorrect input code.  The amount of
3045               memory leaked here is the sum of the lengths of the
3046               class tags.
3047            free (objtag); */
3048            objdef = onone;
3049          }
3050        return FALSE;
3051      }
3052
3053    /* A function, variable or enum constant? */
3054    switch (toktype)
3055      {
3056      case st_C_extern:
3057        fvextern = TRUE;
3058        switch  (fvdef)
3059          {
3060          case finlist:
3061          case flistseen:
3062          case fignore:
3063          case vignore:
3064            break;
3065          default:
3066            fvdef = fvnone;
3067          }
3068        return FALSE;
3069      case st_C_ignore:
3070        fvextern = FALSE;
3071        fvdef = vignore;
3072        return FALSE;
3073      case st_C_operator:
3074        fvdef = foperator;
3075        *is_func_or_var = TRUE;
3076        return TRUE;
3077      case st_none:
3078        if (constantypedefs
3079            && structdef == snone
3080            && structtype == st_C_enum && bracelev > structbracelev)
3081          return TRUE;           /* enum constant */
3082        switch (fvdef)
3083          {
3084          case fdefunkey:
3085            if (bracelev > 0)
3086              break;
3087            fvdef = fdefunname;  /* GNU macro */
3088            *is_func_or_var = TRUE;
3089            return TRUE;
3090          case fvnone:
3091            switch (typdef)
3092              {
3093              case ttypeseen:
3094                return FALSE;
3095              case tnone:
3096                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3097                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3098                  {
3099                    fvdef = vignore;
3100                    return FALSE;
3101                  }
3102                break;
3103              }
3104           /* FALLTHRU */
3105           case fvnameseen:
3106           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3107             {
3108               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3109                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3110               fvdef = foperator;
3111               *is_func_or_var = TRUE;
3112               return TRUE;
3113             }
3114           if (bracelev > 0 && !instruct)
3115             break;
3116           fvdef = fvnameseen;   /* function or variable */
3117           *is_func_or_var = TRUE;
3118           return TRUE;
3119         }
3120       break;
3121     }
3122
3123   return FALSE;
3124 }
3125
3126 \f
3127 /*
3128  * C_entries often keeps pointers to tokens or lines which are older than
3129  * the line currently read.  By keeping two line buffers, and switching
3130  * them at end of line, it is possible to use those pointers.
3131  */
3132 static struct
3133 {
3134   long linepos;
3135   linebuffer lb;
3136 } lbs[2];
3137
3138 #define current_lb_is_new (newndx == curndx)
3139 #define switch_line_buffers() (curndx = 1 - curndx)
3140
3141 #define curlb (lbs[curndx].lb)
3142 #define newlb (lbs[newndx].lb)
3143 #define curlinepos (lbs[curndx].linepos)
3144 #define newlinepos (lbs[newndx].linepos)
3145
3146 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3147 #define cplpl (c_ext & C_PLPL)
3148 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3149
3150 #define CNL_SAVE_DEFINEDEF()                                            \
3151 do {                                                                    \
3152   curlinepos = charno;                                                  \
3153   readline (&curlb, inf);                                               \
3154   lp = curlb.buffer;                                                    \
3155   quotednl = FALSE;                                                     \
3156   newndx = curndx;                                                      \
3157 } while (0)
3158
3159 #define CNL()                                                           \
3160 do {                                                                    \
3161   CNL_SAVE_DEFINEDEF();                                                 \
3162   if (savetoken.valid)                                                  \
3163     {                                                                   \
3164       token = savetoken;                                                \
3165       savetoken.valid = FALSE;                                          \
3166     }                                                                   \
3167   definedef = dnone;                                                    \
3168 } while (0)
3169
3170
3171 static void
3172 make_C_tag (isfun)
3173      bool isfun;
3174 {
3175   /* This function is never called when token.valid is FALSE, but
3176      we must protect against invalid input or internal errors. */
3177   if (!DEBUG && !token.valid)
3178     return;
3179
3180   if (token.valid)
3181     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3182               token.offset+token.length+1, token.lineno, token.linepos);
3183   else                          /* this case is optimised away if !DEBUG */
3184     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3185               token_name.len + 17, isfun, token.line,
3186               token.offset+token.length+1, token.lineno, token.linepos);
3187
3188   token.valid = FALSE;
3189 }
3190
3191
3192 /*
3193  * C_entries ()
3194  *      This routine finds functions, variables, typedefs,
3195  *      #define's, enum constants and struct/union/enum definitions in
3196  *      C syntax and adds them to the list.
3197  */
3198 static void
3199 C_entries (c_ext, inf)
3200      int c_ext;                 /* extension of C */
3201      FILE *inf;                 /* input file */
3202 {
3203   register char c;              /* latest char read; '\0' for end of line */
3204   register char *lp;            /* pointer one beyond the character `c' */
3205   int curndx, newndx;           /* indices for current and new lb */
3206   register int tokoff;          /* offset in line of start of current token */
3207   register int toklen;          /* length of current token */
3208   char *qualifier;              /* string used to qualify names */
3209   int qlen;                     /* length of qualifier */
3210   int bracelev;                 /* current brace level */
3211   int bracketlev;               /* current bracket level */
3212   int parlev;                   /* current parenthesis level */
3213   int attrparlev;               /* __attribute__ parenthesis level */
3214   int templatelev;              /* current template level */
3215   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3216   bool incomm, inquote, inchar, quotednl, midtoken;
3217   bool yacc_rules;              /* in the rules part of a yacc file */
3218   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3219
3220
3221   linebuffer_init (&lbs[0].lb);
3222   linebuffer_init (&lbs[1].lb);
3223   if (cstack.size == 0)
3224     {
3225       cstack.size = (DEBUG) ? 1 : 4;
3226       cstack.nl = 0;
3227       cstack.cname = xnew (cstack.size, char *);
3228       cstack.bracelev = xnew (cstack.size, int);
3229     }
3230
3231   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3232   curndx = newndx = 0;
3233   lp = curlb.buffer;
3234   *lp = 0;
3235
3236   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3237   structdef = snone; definedef = dnone; objdef = onone;
3238   yacc_rules = FALSE;
3239   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3240   token.valid = savetoken.valid = FALSE;
3241   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3242   if (cjava)
3243     { qualifier = "."; qlen = 1; }
3244   else
3245     { qualifier = "::"; qlen = 2; }
3246
3247
3248   while (!feof (inf))
3249     {
3250       c = *lp++;
3251       if (c == '\\')
3252         {
3253           /* If we are at the end of the line, the next character is a
3254              '\0'; do not skip it, because it is what tells us
3255              to read the next line.  */
3256           if (*lp == '\0')
3257             {
3258               quotednl = TRUE;
3259               continue;
3260             }
3261           lp++;
3262           c = ' ';
3263         }
3264       else if (incomm)
3265         {
3266           switch (c)
3267             {
3268             case '*':
3269               if (*lp == '/')
3270                 {
3271                   c = *lp++;
3272                   incomm = FALSE;
3273                 }
3274               break;
3275             case '\0':
3276               /* Newlines inside comments do not end macro definitions in
3277                  traditional cpp. */
3278               CNL_SAVE_DEFINEDEF ();
3279               break;
3280             }
3281           continue;
3282         }
3283       else if (inquote)
3284         {
3285           switch (c)
3286             {
3287             case '"':
3288               inquote = FALSE;
3289               break;
3290             case '\0':
3291               /* Newlines inside strings do not end macro definitions
3292                  in traditional cpp, even though compilers don't
3293                  usually accept them. */
3294               CNL_SAVE_DEFINEDEF ();
3295               break;
3296             }
3297           continue;
3298         }
3299       else if (inchar)
3300         {
3301           switch (c)
3302             {
3303             case '\0':
3304               /* Hmmm, something went wrong. */
3305               CNL ();
3306               /* FALLTHRU */
3307             case '\'':
3308               inchar = FALSE;
3309               break;
3310             }
3311           continue;
3312         }
3313       else if (bracketlev > 0)
3314         {
3315           switch (c)
3316             {
3317             case ']':
3318               if (--bracketlev > 0)
3319                 continue;
3320               break;
3321             case '\0':
3322               CNL_SAVE_DEFINEDEF ();
3323               break;
3324             }
3325           continue;
3326         }
3327       else switch (c)
3328         {
3329         case '"':
3330           inquote = TRUE;
3331           if (inattribute)
3332             break;
3333           switch (fvdef)
3334             {
3335             case fdefunkey:
3336             case fstartlist:
3337             case finlist:
3338             case fignore:
3339             case vignore:
3340               break;
3341             default:
3342               fvextern = FALSE;
3343               fvdef = fvnone;
3344             }
3345           continue;
3346         case '\'':
3347           inchar = TRUE;
3348           if (inattribute)
3349             break;
3350           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3351             {
3352               fvextern = FALSE;
3353               fvdef = fvnone;
3354             }
3355           continue;
3356         case '/':
3357           if (*lp == '*')
3358             {
3359               lp++;
3360               incomm = TRUE;
3361               continue;
3362             }
3363           else if (/* cplpl && */ *lp == '/')
3364             {
3365               c = '\0';
3366               break;
3367             }
3368           else
3369             break;
3370         case '%':
3371           if ((c_ext & YACC) && *lp == '%')
3372             {
3373               /* Entering or exiting rules section in yacc file. */
3374               lp++;
3375               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3376               typdef = tnone; structdef = snone;
3377               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3378               bracelev = 0;
3379               yacc_rules = !yacc_rules;
3380               continue;
3381             }
3382           else
3383             break;
3384         case '#':
3385           if (definedef == dnone)
3386             {
3387               char *cp;
3388               bool cpptoken = TRUE;
3389
3390               /* Look back on this line.  If all blanks, or nonblanks
3391                  followed by an end of comment, this is a preprocessor
3392                  token. */
3393               for (cp = newlb.buffer; cp < lp-1; cp++)
3394                 if (!iswhite (*cp))
3395                   {
3396                     if (*cp == '*' && *(cp+1) == '/')
3397                       {
3398                         cp++;
3399                         cpptoken = TRUE;
3400                       }
3401                     else
3402                       cpptoken = FALSE;
3403                   }
3404               if (cpptoken)
3405                 definedef = dsharpseen;
3406             } /* if (definedef == dnone) */
3407           continue;
3408         case '[':
3409           bracketlev++;
3410             continue;
3411         } /* switch (c) */
3412
3413
3414       /* Consider token only if some involved conditions are satisfied. */
3415       if (typdef != tignore
3416           && definedef != dignorerest
3417           && fvdef != finlist
3418           && templatelev == 0
3419           && (definedef != dnone
3420               || structdef != scolonseen)
3421           && !inattribute)
3422         {
3423           if (midtoken)
3424             {
3425               if (endtoken (c))
3426                 {
3427                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3428                     /* This handles :: in the middle,
3429                        but not at the beginning of an identifier.
3430                        Also, space-separated :: is not recognised. */
3431                     {
3432                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3433                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3434                       lp += 2;
3435                       toklen += 2;
3436                       c = lp[-1];
3437                       goto still_in_token;
3438                     }
3439                   else
3440                     {
3441                       bool funorvar = FALSE;
3442
3443                       if (yacc_rules
3444                           || consider_token (newlb.buffer + tokoff, toklen, c,
3445                                              &c_ext, bracelev, parlev,
3446                                              &funorvar))
3447                         {
3448                           if (fvdef == foperator)
3449                             {
3450                               char *oldlp = lp;
3451                               lp = skip_spaces (lp-1);
3452                               if (*lp != '\0')
3453                                 lp += 1;
3454                               while (*lp != '\0'
3455                                      && !iswhite (*lp) && *lp != '(')
3456                                 lp += 1;
3457                               c = *lp++;
3458                               toklen += lp - oldlp;
3459                             }
3460                           token.named = FALSE;
3461                           if (!plainc
3462                               && nestlev > 0 && definedef == dnone)
3463                             /* in struct body */
3464                             {
3465                               write_classname (&token_name, qualifier);
3466                               linebuffer_setlen (&token_name,
3467                                                  token_name.len+qlen+toklen);
3468                               strcat (token_name.buffer, qualifier);
3469                               strncat (token_name.buffer,
3470                                        newlb.buffer + tokoff, toklen);
3471                               token.named = TRUE;
3472                             }
3473                           else if (objdef == ocatseen)
3474                             /* Objective C category */
3475                             {
3476                               int len = strlen (objtag) + 2 + toklen;
3477                               linebuffer_setlen (&token_name, len);
3478                               strcpy (token_name.buffer, objtag);
3479                               strcat (token_name.buffer, "(");
3480                               strncat (token_name.buffer,
3481                                        newlb.buffer + tokoff, toklen);
3482                               strcat (token_name.buffer, ")");
3483                               token.named = TRUE;
3484                             }
3485                           else if (objdef == omethodtag
3486                                    || objdef == omethodparm)
3487                             /* Objective C method */
3488                             {
3489                               token.named = TRUE;
3490                             }
3491                           else if (fvdef == fdefunname)
3492                             /* GNU DEFUN and similar macros */
3493                             {
3494                               bool defun = (newlb.buffer[tokoff] == 'F');
3495                               int off = tokoff;
3496                               int len = toklen;
3497
3498                               /* Rewrite the tag so that emacs lisp DEFUNs
3499                                  can be found by their elisp name */
3500                               if (defun)
3501                                 {
3502                                   off += 1;
3503                                   len -= 1;
3504                                 }
3505                               linebuffer_setlen (&token_name, len);
3506                               strncpy (token_name.buffer,
3507                                        newlb.buffer + off, len);
3508                               token_name.buffer[len] = '\0';
3509                               if (defun)
3510                                 while (--len >= 0)
3511                                   if (token_name.buffer[len] == '_')
3512                                     token_name.buffer[len] = '-';
3513                               token.named = defun;
3514                             }
3515                           else
3516                             {
3517                               linebuffer_setlen (&token_name, toklen);
3518                               strncpy (token_name.buffer,
3519                                        newlb.buffer + tokoff, toklen);
3520                               token_name.buffer[toklen] = '\0';
3521                               /* Name macros and members. */
3522                               token.named = (structdef == stagseen
3523                                              || typdef == ttypeseen
3524                                              || typdef == tend
3525                                              || (funorvar
3526                                                  && definedef == dignorerest)
3527                                              || (funorvar
3528                                                  && definedef == dnone
3529                                                  && structdef == snone
3530                                                  && bracelev > 0));
3531                             }
3532                           token.lineno = lineno;
3533                           token.offset = tokoff;
3534                           token.length = toklen;
3535                           token.line = newlb.buffer;
3536                           token.linepos = newlinepos;
3537                           token.valid = TRUE;
3538
3539                           if (definedef == dnone
3540                               && (fvdef == fvnameseen
3541                                   || fvdef == foperator
3542                                   || structdef == stagseen
3543                                   || typdef == tend
3544                                   || typdef == ttypeseen
3545                                   || objdef != onone))
3546                             {
3547                               if (current_lb_is_new)
3548                                 switch_line_buffers ();
3549                             }
3550                           else if (definedef != dnone
3551                                    || fvdef == fdefunname
3552                                    || instruct)
3553                             make_C_tag (funorvar);
3554                         }
3555                       else /* not yacc and consider_token failed */
3556                         {
3557                           if (inattribute && fvdef == fignore)
3558                             {
3559                               /* We have just met __attribute__ after a
3560                                  function parameter list: do not tag the
3561                                  function again. */
3562                               fvdef = fvnone;
3563                             }
3564                         }
3565                       midtoken = FALSE;
3566                     }
3567                 } /* if (endtoken (c)) */
3568               else if (intoken (c))
3569                 still_in_token:
3570                 {
3571                   toklen++;
3572                   continue;
3573                 }
3574             } /* if (midtoken) */
3575           else if (begtoken (c))
3576             {
3577               switch (definedef)
3578                 {
3579                 case dnone:
3580                   switch (fvdef)
3581                     {
3582                     case fstartlist:
3583                       /* This prevents tagging fb in
3584                          void (__attribute__((noreturn)) *fb) (void);
3585                          Fixing this is not easy and not very important. */
3586                       fvdef = finlist;
3587                       continue;
3588                     case flistseen:
3589                       if (plainc || declarations)
3590                         {
3591                           make_C_tag (TRUE); /* a function */
3592                           fvdef = fignore;
3593                         }
3594                       break;
3595                     }
3596                   if (structdef == stagseen && !cjava)
3597                     {
3598                       popclass_above (bracelev);
3599                       structdef = snone;
3600                     }
3601                   break;
3602                 case dsharpseen:
3603                   savetoken = token;
3604                   break;
3605                 }
3606               if (!yacc_rules || lp == newlb.buffer + 1)
3607                 {
3608                   tokoff = lp - 1 - newlb.buffer;
3609                   toklen = 1;
3610                   midtoken = TRUE;
3611                 }
3612               continue;
3613             } /* if (begtoken) */
3614         } /* if must look at token */
3615
3616
3617       /* Detect end of line, colon, comma, semicolon and various braces
3618          after having handled a token.*/
3619       switch (c)
3620         {
3621         case ':':
3622           if (inattribute)
3623             break;
3624           if (yacc_rules && token.offset == 0 && token.valid)
3625             {
3626               make_C_tag (FALSE); /* a yacc function */
3627               break;
3628             }
3629           if (definedef != dnone)
3630             break;
3631           switch (objdef)
3632             {
3633             case  otagseen:
3634               objdef = oignore;
3635               make_C_tag (TRUE); /* an Objective C class */
3636               break;
3637             case omethodtag:
3638             case omethodparm:
3639               objdef = omethodcolon;
3640               linebuffer_setlen (&token_name, token_name.len + 1);
3641               strcat (token_name.buffer, ":");
3642               break;
3643             }
3644           if (structdef == stagseen)
3645             {
3646               structdef = scolonseen;
3647               break;
3648             }
3649           /* Should be useless, but may be work as a safety net. */
3650           if (cplpl && fvdef == flistseen)
3651             {
3652               make_C_tag (TRUE); /* a function */
3653               fvdef = fignore;
3654               break;
3655             }
3656           break;
3657         case ';':
3658           if (definedef != dnone || inattribute)
3659             break;
3660           switch (typdef)
3661             {
3662             case tend:
3663             case ttypeseen:
3664               make_C_tag (FALSE); /* a typedef */
3665               typdef = tnone;
3666               fvdef = fvnone;
3667               break;
3668             case tnone:
3669             case tinbody:
3670             case tignore:
3671               switch (fvdef)
3672                 {
3673                 case fignore:
3674                   if (typdef == tignore || cplpl)
3675                     fvdef = fvnone;
3676                   break;
3677                 case fvnameseen:
3678                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3679                       || (members && instruct))
3680                     make_C_tag (FALSE); /* a variable */
3681                   fvextern = FALSE;
3682                   fvdef = fvnone;
3683                   token.valid = FALSE;
3684                   break;
3685                 case flistseen:
3686                   if ((declarations
3687                        && (cplpl || !instruct)
3688                        && (typdef == tnone || (typdef != tignore && instruct)))
3689                       || (members
3690                           && plainc && instruct))
3691                     make_C_tag (TRUE);  /* a function */
3692                   /* FALLTHRU */
3693                 default:
3694                   fvextern = FALSE;
3695                   fvdef = fvnone;
3696                   if (declarations
3697                        && cplpl && structdef == stagseen)
3698                     make_C_tag (FALSE); /* forward declaration */
3699                   else
3700                     token.valid = FALSE;
3701                 } /* switch (fvdef) */
3702               /* FALLTHRU */
3703             default:
3704               if (!instruct)
3705                 typdef = tnone;
3706             }
3707           if (structdef == stagseen)
3708             structdef = snone;
3709           break;
3710         case ',':
3711           if (definedef != dnone || inattribute)
3712             break;
3713           switch (objdef)
3714             {
3715             case omethodtag:
3716             case omethodparm:
3717               make_C_tag (TRUE); /* an Objective C method */
3718               objdef = oinbody;
3719               break;
3720             }
3721           switch (fvdef)
3722             {
3723             case fdefunkey:
3724             case foperator:
3725             case fstartlist:
3726             case finlist:
3727             case fignore:
3728             case vignore:
3729               break;
3730             case fdefunname:
3731               fvdef = fignore;
3732               break;
3733             case fvnameseen:
3734               if (parlev == 0
3735                   && ((globals
3736                        && bracelev == 0
3737                        && templatelev == 0
3738                        && (!fvextern || declarations))
3739                       || (members && instruct)))
3740                   make_C_tag (FALSE); /* a variable */
3741               break;
3742             case flistseen:
3743               if ((declarations && typdef == tnone && !instruct)
3744                   || (members && typdef != tignore && instruct))
3745                 {
3746                   make_C_tag (TRUE); /* a function */
3747                   fvdef = fvnameseen;
3748                 }
3749               else if (!declarations)
3750                 fvdef = fvnone;
3751               token.valid = FALSE;
3752               break;
3753             default:
3754               fvdef = fvnone;
3755             }
3756           if (structdef == stagseen)
3757             structdef = snone;
3758           break;
3759         case ']':
3760           if (definedef != dnone || inattribute)
3761             break;
3762           if (structdef == stagseen)
3763             structdef = snone;
3764           switch (typdef)
3765             {
3766             case ttypeseen:
3767             case tend:
3768               typdef = tignore;
3769               make_C_tag (FALSE);       /* a typedef */
3770               break;
3771             case tnone:
3772             case tinbody:
3773               switch (fvdef)
3774                 {
3775                 case foperator:
3776                 case finlist:
3777                 case fignore:
3778                 case vignore:
3779                   break;
3780                 case fvnameseen:
3781                   if ((members && bracelev == 1)
3782                       || (globals && bracelev == 0
3783                           && (!fvextern || declarations)))
3784                     make_C_tag (FALSE); /* a variable */
3785                   /* FALLTHRU */
3786                 default:
3787                   fvdef = fvnone;
3788                 }
3789               break;
3790             }
3791           break;
3792         case '(':
3793           if (inattribute)
3794             {
3795               attrparlev++;
3796               break;
3797             }
3798           if (definedef != dnone)
3799             break;
3800           if (objdef == otagseen && parlev == 0)
3801             objdef = oparenseen;
3802           switch (fvdef)
3803             {
3804             case fvnameseen:
3805               if (typdef == ttypeseen
3806                   && *lp != '*'
3807                   && !instruct)
3808                 {
3809                   /* This handles constructs like:
3810                      typedef void OperatorFun (int fun); */
3811                   make_C_tag (FALSE);
3812                   typdef = tignore;
3813                   fvdef = fignore;
3814                   break;
3815                 }
3816               /* FALLTHRU */
3817             case foperator:
3818               fvdef = fstartlist;
3819               break;
3820             case flistseen:
3821               fvdef = finlist;
3822               break;
3823             }
3824           parlev++;
3825           break;
3826         case ')':
3827           if (inattribute)
3828             {
3829               if (--attrparlev == 0)
3830                 inattribute = FALSE;
3831               break;
3832             }
3833           if (definedef != dnone)
3834             break;
3835           if (objdef == ocatseen && parlev == 1)
3836             {
3837               make_C_tag (TRUE); /* an Objective C category */
3838               objdef = oignore;
3839             }
3840           if (--parlev == 0)
3841             {
3842               switch (fvdef)
3843                 {
3844                 case fstartlist:
3845                 case finlist:
3846                   fvdef = flistseen;
3847                   break;
3848                 }
3849               if (!instruct
3850                   && (typdef == tend
3851                       || typdef == ttypeseen))
3852                 {
3853                   typdef = tignore;
3854                   make_C_tag (FALSE); /* a typedef */
3855                 }
3856             }
3857           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3858             parlev = 0;
3859           break;
3860         case '{':
3861           if (definedef != dnone)
3862             break;
3863           if (typdef == ttypeseen)
3864             {
3865               /* Whenever typdef is set to tinbody (currently only
3866                  here), typdefbracelev should be set to bracelev. */
3867               typdef = tinbody;
3868               typdefbracelev = bracelev;
3869             }
3870           switch (fvdef)
3871             {
3872             case flistseen:
3873               make_C_tag (TRUE);    /* a function */
3874               /* FALLTHRU */
3875             case fignore:
3876               fvdef = fvnone;
3877               break;
3878             case fvnone:
3879               switch (objdef)
3880                 {
3881                 case otagseen:
3882                   make_C_tag (TRUE); /* an Objective C class */
3883                   objdef = oignore;
3884                   break;
3885                 case omethodtag:
3886                 case omethodparm:
3887                   make_C_tag (TRUE); /* an Objective C method */
3888                   objdef = oinbody;
3889                   break;
3890                 default:
3891                   /* Neutralize `extern "C" {' grot. */
3892                   if (bracelev == 0 && structdef == snone && nestlev == 0
3893                       && typdef == tnone)
3894                     bracelev = -1;
3895                 }
3896               break;
3897             }
3898           switch (structdef)
3899             {
3900             case skeyseen:         /* unnamed struct */
3901               pushclass_above (bracelev, NULL, 0);
3902               structdef = snone;
3903               break;
3904             case stagseen:         /* named struct or enum */
3905             case scolonseen:       /* a class */
3906               pushclass_above (bracelev,token.line+token.offset, token.length);
3907               structdef = snone;
3908               make_C_tag (FALSE);  /* a struct or enum */
3909               break;
3910             }
3911           bracelev++;
3912           break;
3913         case '*':
3914           if (definedef != dnone)
3915             break;
3916           if (fvdef == fstartlist)
3917             {
3918               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3919               token.valid = FALSE;
3920             }
3921           break;
3922         case '}':
3923           if (definedef != dnone)
3924             break;
3925           if (!ignoreindent && lp == newlb.buffer + 1)
3926             {
3927               if (bracelev != 0)
3928                 token.valid = FALSE;
3929               bracelev = 0;     /* reset brace level if first column */
3930               parlev = 0;       /* also reset paren level, just in case... */
3931             }
3932           else if (bracelev > 0)
3933             bracelev--;
3934           else
3935             token.valid = FALSE; /* something gone amiss, token unreliable */
3936           popclass_above (bracelev);
3937           structdef = snone;
3938           /* Only if typdef == tinbody is typdefbracelev significant. */
3939           if (typdef == tinbody && bracelev <= typdefbracelev)
3940             {
3941               assert (bracelev == typdefbracelev);
3942               typdef = tend;
3943             }
3944           break;
3945         case '=':
3946           if (definedef != dnone)
3947             break;
3948           switch (fvdef)
3949             {
3950             case foperator:
3951             case finlist:
3952             case fignore:
3953             case vignore:
3954               break;
3955             case fvnameseen:
3956               if ((members && bracelev == 1)
3957                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3958                 make_C_tag (FALSE); /* a variable */
3959               /* FALLTHRU */
3960             default:
3961               fvdef = vignore;
3962             }
3963           break;
3964         case '<':
3965           if (cplpl
3966               && (structdef == stagseen || fvdef == fvnameseen))
3967             {
3968               templatelev++;
3969               break;
3970             }
3971           goto resetfvdef;
3972         case '>':
3973           if (templatelev > 0)
3974             {
3975               templatelev--;
3976               break;
3977             }
3978           goto resetfvdef;
3979         case '+':
3980         case '-':
3981           if (objdef == oinbody && bracelev == 0)
3982             {
3983               objdef = omethodsign;
3984               break;
3985             }
3986           /* FALLTHRU */
3987         resetfvdef:
3988         case '#': case '~': case '&': case '%': case '/':
3989         case '|': case '^': case '!': case '.': case '?':
3990           if (definedef != dnone)
3991             break;
3992           /* These surely cannot follow a function tag in C. */
3993           switch (fvdef)
3994             {
3995             case foperator:
3996             case finlist:
3997             case fignore:
3998             case vignore:
3999               break;
4000             default:
4001               fvdef = fvnone;
4002             }
4003           break;
4004         case '\0':
4005           if (objdef == otagseen)
4006             {
4007               make_C_tag (TRUE); /* an Objective C class */
4008               objdef = oignore;
4009             }
4010           /* If a macro spans multiple lines don't reset its state. */
4011           if (quotednl)
4012             CNL_SAVE_DEFINEDEF ();
4013           else
4014             CNL ();
4015           break;
4016         } /* switch (c) */
4017
4018     } /* while not eof */
4019
4020   free (lbs[0].lb.buffer);
4021   free (lbs[1].lb.buffer);
4022 }
4023
4024 /*
4025  * Process either a C++ file or a C file depending on the setting
4026  * of a global flag.
4027  */
4028 static void
4029 default_C_entries (inf)
4030      FILE *inf;
4031 {
4032   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4033 }
4034
4035 /* Always do plain C. */
4036 static void
4037 plain_C_entries (inf)
4038      FILE *inf;
4039 {
4040   C_entries (0, inf);
4041 }
4042
4043 /* Always do C++. */
4044 static void
4045 Cplusplus_entries (inf)
4046      FILE *inf;
4047 {
4048   C_entries (C_PLPL, inf);
4049 }
4050
4051 /* Always do Java. */
4052 static void
4053 Cjava_entries (inf)
4054      FILE *inf;
4055 {
4056   C_entries (C_JAVA, inf);
4057 }
4058
4059 /* Always do C*. */
4060 static void
4061 Cstar_entries (inf)
4062      FILE *inf;
4063 {
4064   C_entries (C_STAR, inf);
4065 }
4066
4067 /* Always do Yacc. */
4068 static void
4069 Yacc_entries (inf)
4070      FILE *inf;
4071 {
4072   C_entries (YACC, inf);
4073 }
4074
4075 \f
4076 /* Useful macros. */
4077 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4078   for (;                        /* loop initialization */               \
4079        !feof (file_pointer)     /* loop test */                         \
4080        &&                       /* instructions at start of loop */     \
4081           (readline (&line_buffer, file_pointer),                       \
4082            char_pointer = line_buffer.buffer,                           \
4083            TRUE);                                                       \
4084       )
4085
4086 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4087   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4088    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4089    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4090    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4091
4092 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4093 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4094   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4095    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4096    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4097
4098 /*
4099  * Read a file, but do no processing.  This is used to do regexp
4100  * matching on files that have no language defined.
4101  */
4102 static void
4103 just_read_file (inf)
4104      FILE *inf;
4105 {
4106   register char *dummy;
4107
4108   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4109     continue;
4110 }
4111
4112 \f
4113 /* Fortran parsing */
4114
4115 static void F_takeprec __P((void));
4116 static void F_getit __P((FILE *));
4117
4118 static void
4119 F_takeprec ()
4120 {
4121   dbp = skip_spaces (dbp);
4122   if (*dbp != '*')
4123     return;
4124   dbp++;
4125   dbp = skip_spaces (dbp);
4126   if (strneq (dbp, "(*)", 3))
4127     {
4128       dbp += 3;
4129       return;
4130     }
4131   if (!ISDIGIT (*dbp))
4132     {
4133       --dbp;                    /* force failure */
4134       return;
4135     }
4136   do
4137     dbp++;
4138   while (ISDIGIT (*dbp));
4139 }
4140
4141 static void
4142 F_getit (inf)
4143      FILE *inf;
4144 {
4145   register char *cp;
4146
4147   dbp = skip_spaces (dbp);
4148   if (*dbp == '\0')
4149     {
4150       readline (&lb, inf);
4151       dbp = lb.buffer;
4152       if (dbp[5] != '&')
4153         return;
4154       dbp += 6;
4155       dbp = skip_spaces (dbp);
4156     }
4157   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4158     return;
4159   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4160     continue;
4161   make_tag (dbp, cp-dbp, TRUE,
4162             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4163 }
4164
4165
4166 static void
4167 Fortran_functions (inf)
4168      FILE *inf;
4169 {
4170   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4171     {
4172       if (*dbp == '%')
4173         dbp++;                  /* Ratfor escape to fortran */
4174       dbp = skip_spaces (dbp);
4175       if (*dbp == '\0')
4176         continue;
4177       switch (lowcase (*dbp))
4178         {
4179         case 'i':
4180           if (nocase_tail ("integer"))
4181             F_takeprec ();
4182           break;
4183         case 'r':
4184           if (nocase_tail ("real"))
4185             F_takeprec ();
4186           break;
4187         case 'l':
4188           if (nocase_tail ("logical"))
4189             F_takeprec ();
4190           break;
4191         case 'c':
4192           if (nocase_tail ("complex") || nocase_tail ("character"))
4193             F_takeprec ();
4194           break;
4195         case 'd':
4196           if (nocase_tail ("double"))
4197             {
4198               dbp = skip_spaces (dbp);
4199               if (*dbp == '\0')
4200                 continue;
4201               if (nocase_tail ("precision"))
4202                 break;
4203               continue;
4204             }
4205           break;
4206         }
4207       dbp = skip_spaces (dbp);
4208       if (*dbp == '\0')
4209         continue;
4210       switch (lowcase (*dbp))
4211         {
4212         case 'f':
4213           if (nocase_tail ("function"))
4214             F_getit (inf);
4215           continue;
4216         case 's':
4217           if (nocase_tail ("subroutine"))
4218             F_getit (inf);
4219           continue;
4220         case 'e':
4221           if (nocase_tail ("entry"))
4222             F_getit (inf);
4223           continue;
4224         case 'b':
4225           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4226             {
4227               dbp = skip_spaces (dbp);
4228               if (*dbp == '\0') /* assume un-named */
4229                 make_tag ("blockdata", 9, TRUE,
4230                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4231               else
4232                 F_getit (inf);  /* look for name */
4233             }
4234           continue;
4235         }
4236     }
4237 }
4238
4239 \f
4240 /*
4241  * Ada parsing
4242  * Original code by
4243  * Philippe Waroquiers (1998)
4244  */
4245
4246 static void Ada_getit __P((FILE *, char *));
4247
4248 /* Once we are positioned after an "interesting" keyword, let's get
4249    the real tag value necessary. */
4250 static void
4251 Ada_getit (inf, name_qualifier)
4252      FILE *inf;
4253      char *name_qualifier;
4254 {
4255   register char *cp;
4256   char *name;
4257   char c;
4258
4259   while (!feof (inf))
4260     {
4261       dbp = skip_spaces (dbp);
4262       if (*dbp == '\0'
4263           || (dbp[0] == '-' && dbp[1] == '-'))
4264         {
4265           readline (&lb, inf);
4266           dbp = lb.buffer;
4267         }
4268       switch (lowcase(*dbp))
4269         {
4270         case 'b':
4271           if (nocase_tail ("body"))
4272             {
4273               /* Skipping body of   procedure body   or   package body or ....
4274                  resetting qualifier to body instead of spec. */
4275               name_qualifier = "/b";
4276               continue;
4277             }
4278           break;
4279         case 't':
4280           /* Skipping type of   task type   or   protected type ... */
4281           if (nocase_tail ("type"))
4282             continue;
4283           break;
4284         }
4285       if (*dbp == '"')
4286         {
4287           dbp += 1;
4288           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4289             continue;
4290         }
4291       else
4292         {
4293           dbp = skip_spaces (dbp);
4294           for (cp = dbp;
4295                (*cp != '\0'
4296                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4297                cp++)
4298             continue;
4299           if (cp == dbp)
4300             return;
4301         }
4302       c = *cp;
4303       *cp = '\0';
4304       name = concat (dbp, name_qualifier, "");
4305       *cp = c;
4306       make_tag (name, strlen (name), TRUE,
4307                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4308       free (name);
4309       if (c == '"')
4310         dbp = cp + 1;
4311       return;
4312     }
4313 }
4314
4315 static void
4316 Ada_funcs (inf)
4317      FILE *inf;
4318 {
4319   bool inquote = FALSE;
4320   bool skip_till_semicolumn = FALSE;
4321
4322   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4323     {
4324       while (*dbp != '\0')
4325         {
4326           /* Skip a string i.e. "abcd". */
4327           if (inquote || (*dbp == '"'))
4328             {
4329               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4330               if (dbp != NULL)
4331                 {
4332                   inquote = FALSE;
4333                   dbp += 1;
4334                   continue;     /* advance char */
4335                 }
4336               else
4337                 {
4338                   inquote = TRUE;
4339                   break;        /* advance line */
4340                 }
4341             }
4342
4343           /* Skip comments. */
4344           if (dbp[0] == '-' && dbp[1] == '-')
4345             break;              /* advance line */
4346
4347           /* Skip character enclosed in single quote i.e. 'a'
4348              and skip single quote starting an attribute i.e. 'Image. */
4349           if (*dbp == '\'')
4350             {
4351               dbp++ ;
4352               if (*dbp != '\0')
4353                 dbp++;
4354               continue;
4355             }
4356
4357           if (skip_till_semicolumn)
4358             {
4359               if (*dbp == ';')
4360                 skip_till_semicolumn = FALSE;
4361               dbp++;
4362               continue;         /* advance char */
4363             }
4364
4365           /* Search for beginning of a token.  */
4366           if (!begtoken (*dbp))
4367             {
4368               dbp++;
4369               continue;         /* advance char */
4370             }
4371
4372           /* We are at the beginning of a token. */
4373           switch (lowcase(*dbp))
4374             {
4375             case 'f':
4376               if (!packages_only && nocase_tail ("function"))
4377                 Ada_getit (inf, "/f");
4378               else
4379                 break;          /* from switch */
4380               continue;         /* advance char */
4381             case 'p':
4382               if (!packages_only && nocase_tail ("procedure"))
4383                 Ada_getit (inf, "/p");
4384               else if (nocase_tail ("package"))
4385                 Ada_getit (inf, "/s");
4386               else if (nocase_tail ("protected")) /* protected type */
4387                 Ada_getit (inf, "/t");
4388               else
4389                 break;          /* from switch */
4390               continue;         /* advance char */
4391
4392             case 'u':
4393               if (typedefs && !packages_only && nocase_tail ("use"))
4394                 {
4395                   /* when tagging types, avoid tagging  use type Pack.Typename;
4396                      for this, we will skip everything till a ; */
4397                   skip_till_semicolumn = TRUE;
4398                   continue;     /* advance char */
4399                 }
4400
4401             case 't':
4402               if (!packages_only && nocase_tail ("task"))
4403                 Ada_getit (inf, "/k");
4404               else if (typedefs && !packages_only && nocase_tail ("type"))
4405                 {
4406                   Ada_getit (inf, "/t");
4407                   while (*dbp != '\0')
4408                     dbp += 1;
4409                 }
4410               else
4411                 break;          /* from switch */
4412               continue;         /* advance char */
4413             }
4414
4415           /* Look for the end of the token. */
4416           while (!endtoken (*dbp))
4417             dbp++;
4418
4419         } /* advance char */
4420     } /* advance line */
4421 }
4422
4423 \f
4424 /*
4425  * Unix and microcontroller assembly tag handling
4426  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4427  * Idea by Bob Weiner, Motorola Inc. (1994)
4428  */
4429 static void
4430 Asm_labels (inf)
4431      FILE *inf;
4432 {
4433   register char *cp;
4434
4435   LOOP_ON_INPUT_LINES (inf, lb, cp)
4436     {
4437       /* If first char is alphabetic or one of [_.$], test for colon
4438          following identifier. */
4439       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4440         {
4441           /* Read past label. */
4442           cp++;
4443           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4444             cp++;
4445           if (*cp == ':' || iswhite (*cp))
4446             /* Found end of label, so copy it and add it to the table. */
4447             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4448                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4449         }
4450     }
4451 }
4452
4453 \f
4454 /*
4455  * Perl support
4456  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4457  * Perl variable names: /^(my|local).../
4458  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4459  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4460  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4461  */
4462 static void
4463 Perl_functions (inf)
4464      FILE *inf;
4465 {
4466   char *package = savestr ("main"); /* current package name */
4467   register char *cp;
4468
4469   LOOP_ON_INPUT_LINES (inf, lb, cp)
4470     {
4471       skip_spaces(cp);
4472
4473       if (LOOKING_AT (cp, "package"))
4474         {
4475           free (package);
4476           get_tag (cp, &package);
4477         }
4478       else if (LOOKING_AT (cp, "sub"))
4479         {
4480           char *pos;
4481           char *sp = cp;
4482
4483           while (!notinname (*cp))
4484             cp++;
4485           if (cp == sp)
4486             continue;           /* nothing found */
4487           if ((pos = etags_strchr (sp, ':')) != NULL
4488               && pos < cp && pos[1] == ':')
4489             /* The name is already qualified. */
4490             make_tag (sp, cp - sp, TRUE,
4491                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4492           else
4493             /* Qualify it. */
4494             {
4495               char savechar, *name;
4496
4497               savechar = *cp;
4498               *cp = '\0';
4499               name = concat (package, "::", sp);
4500               *cp = savechar;
4501               make_tag (name, strlen(name), TRUE,
4502                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4503               free (name);
4504             }
4505         }
4506        else if (globals)        /* only if we are tagging global vars */
4507         {
4508           /* Skip a qualifier, if any. */
4509           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4510           /* After "my" or "local", but before any following paren or space. */
4511           char *varstart = cp;
4512
4513           if (qual              /* should this be removed?  If yes, how? */
4514               && (*cp == '$' || *cp == '@' || *cp == '%'))
4515             {
4516               varstart += 1;
4517               do
4518                 cp++;
4519               while (ISALNUM (*cp) || *cp == '_');
4520             }
4521           else if (qual)
4522             {
4523               /* Should be examining a variable list at this point;
4524                  could insist on seeing an open parenthesis. */
4525               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4526                 cp++;
4527             }
4528           else
4529             continue;
4530
4531           make_tag (varstart, cp - varstart, FALSE,
4532                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4533         }
4534     }
4535   free (package);
4536 }
4537
4538
4539 /*
4540  * Python support
4541  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4542  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4543  * More ideas by seb bacon <seb@jamkit.com> (2002)
4544  */
4545 static void
4546 Python_functions (inf)
4547      FILE *inf;
4548 {
4549   register char *cp;
4550
4551   LOOP_ON_INPUT_LINES (inf, lb, cp)
4552     {
4553       cp = skip_spaces (cp);
4554       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4555         {
4556           char *name = cp;
4557           while (!notinname (*cp) && *cp != ':')
4558             cp++;
4559           make_tag (name, cp - name, TRUE,
4560                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4561         }
4562     }
4563 }
4564
4565 \f
4566 /*
4567  * PHP support
4568  * Look for:
4569  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4570  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4571  *  - /^[ \t]*define\(\"[^\"]+/
4572  * Only with --members:
4573  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4574  * Idea by Diez B. Roggisch (2001)
4575  */
4576 static void
4577 PHP_functions (inf)
4578      FILE *inf;
4579 {
4580   register char *cp, *name;
4581   bool search_identifier = FALSE;
4582
4583   LOOP_ON_INPUT_LINES (inf, lb, cp)
4584     {
4585       cp = skip_spaces (cp);
4586       name = cp;
4587       if (search_identifier
4588           && *cp != '\0')
4589         {
4590           while (!notinname (*cp))
4591             cp++;
4592           make_tag (name, cp - name, TRUE,
4593                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4594           search_identifier = FALSE;
4595         }
4596       else if (LOOKING_AT (cp, "function"))
4597         {
4598           if(*cp == '&')
4599             cp = skip_spaces (cp+1);
4600           if(*cp != '\0')
4601             {
4602               name = cp;
4603               while (!notinname (*cp))
4604                 cp++;
4605               make_tag (name, cp - name, TRUE,
4606                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4607             }
4608           else
4609             search_identifier = TRUE;
4610         }
4611       else if (LOOKING_AT (cp, "class"))
4612         {
4613           if (*cp != '\0')
4614             {
4615               name = cp;
4616               while (*cp != '\0' && !iswhite (*cp))
4617                 cp++;
4618               make_tag (name, cp - name, FALSE,
4619                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4620             }
4621           else
4622             search_identifier = TRUE;
4623         }
4624       else if (strneq (cp, "define", 6)
4625                && (cp = skip_spaces (cp+6))
4626                && *cp++ == '('
4627                && (*cp == '"' || *cp == '\''))
4628         {
4629           char quote = *cp++;
4630           name = cp;
4631           while (*cp != quote && *cp != '\0')
4632             cp++;
4633           make_tag (name, cp - name, FALSE,
4634                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4635         }
4636       else if (members
4637                && LOOKING_AT (cp, "var")
4638                && *cp == '$')
4639         {
4640           name = cp;
4641           while (!notinname(*cp))
4642             cp++;
4643           make_tag (name, cp - name, FALSE,
4644                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4645         }
4646     }
4647 }
4648
4649 \f
4650 /*
4651  * Cobol tag functions
4652  * We could look for anything that could be a paragraph name.
4653  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4654  * Idea by Corny de Souza (1993)
4655  */
4656 static void
4657 Cobol_paragraphs (inf)
4658      FILE *inf;
4659 {
4660   register char *bp, *ep;
4661
4662   LOOP_ON_INPUT_LINES (inf, lb, bp)
4663     {
4664       if (lb.len < 9)
4665         continue;
4666       bp += 8;
4667
4668       /* If eoln, compiler option or comment ignore whole line. */
4669       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4670         continue;
4671
4672       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4673         continue;
4674       if (*ep++ == '.')
4675         make_tag (bp, ep - bp, TRUE,
4676                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4677     }
4678 }
4679
4680 \f
4681 /*
4682  * Makefile support
4683  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4684  */
4685 static void
4686 Makefile_targets (inf)
4687      FILE *inf;
4688 {
4689   register char *bp;
4690
4691   LOOP_ON_INPUT_LINES (inf, lb, bp)
4692     {
4693       if (*bp == '\t' || *bp == '#')
4694         continue;
4695       while (*bp != '\0' && *bp != '=' && *bp != ':')
4696         bp++;
4697       if (*bp == ':' || (globals && *bp == '='))
4698         {
4699           /* We should detect if there is more than one tag, but we do not.
4700              We just skip initial and final spaces. */
4701           char * namestart = skip_spaces (lb.buffer);
4702           while (--bp > namestart)
4703             if (!notinname (*bp))
4704               break;
4705           make_tag (namestart, bp - namestart + 1, TRUE,
4706                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4707         }
4708     }
4709 }
4710
4711 \f
4712 /*
4713  * Pascal parsing
4714  * Original code by Mosur K. Mohan (1989)
4715  *
4716  *  Locates tags for procedures & functions.  Doesn't do any type- or
4717  *  var-definitions.  It does look for the keyword "extern" or
4718  *  "forward" immediately following the procedure statement; if found,
4719  *  the tag is skipped.
4720  */
4721 static void
4722 Pascal_functions (inf)
4723      FILE *inf;
4724 {
4725   linebuffer tline;             /* mostly copied from C_entries */
4726   long save_lcno;
4727   int save_lineno, namelen, taglen;
4728   char c, *name;
4729
4730   bool                          /* each of these flags is TRUE iff: */
4731     incomment,                  /* point is inside a comment */
4732     inquote,                    /* point is inside '..' string */
4733     get_tagname,                /* point is after PROCEDURE/FUNCTION
4734                                    keyword, so next item = potential tag */
4735     found_tag,                  /* point is after a potential tag */
4736     inparms,                    /* point is within parameter-list */
4737     verify_tag;                 /* point has passed the parm-list, so the
4738                                    next token will determine whether this
4739                                    is a FORWARD/EXTERN to be ignored, or
4740                                    whether it is a real tag */
4741
4742   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4743   name = NULL;                  /* keep compiler quiet */
4744   dbp = lb.buffer;
4745   *dbp = '\0';
4746   linebuffer_init (&tline);
4747
4748   incomment = inquote = FALSE;
4749   found_tag = FALSE;            /* have a proc name; check if extern */
4750   get_tagname = FALSE;          /* found "procedure" keyword         */
4751   inparms = FALSE;              /* found '(' after "proc"            */
4752   verify_tag = FALSE;           /* check if "extern" is ahead        */
4753
4754
4755   while (!feof (inf))           /* long main loop to get next char */
4756     {
4757       c = *dbp++;
4758       if (c == '\0')            /* if end of line */
4759         {
4760           readline (&lb, inf);
4761           dbp = lb.buffer;
4762           if (*dbp == '\0')
4763             continue;
4764           if (!((found_tag && verify_tag)
4765                 || get_tagname))
4766             c = *dbp++;         /* only if don't need *dbp pointing
4767                                    to the beginning of the name of
4768                                    the procedure or function */
4769         }
4770       if (incomment)
4771         {
4772           if (c == '}')         /* within { } comments */
4773             incomment = FALSE;
4774           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4775             {
4776               dbp++;
4777               incomment = FALSE;
4778             }
4779           continue;
4780         }
4781       else if (inquote)
4782         {
4783           if (c == '\'')
4784             inquote = FALSE;
4785           continue;
4786         }
4787       else
4788         switch (c)
4789           {
4790           case '\'':
4791             inquote = TRUE;     /* found first quote */
4792             continue;
4793           case '{':             /* found open { comment */
4794             incomment = TRUE;
4795             continue;
4796           case '(':
4797             if (*dbp == '*')    /* found open (* comment */
4798               {
4799                 incomment = TRUE;
4800                 dbp++;
4801               }
4802             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4803               inparms = TRUE;
4804             continue;
4805           case ')':             /* end of parms list */
4806             if (inparms)
4807               inparms = FALSE;
4808             continue;
4809           case ';':
4810             if (found_tag && !inparms) /* end of proc or fn stmt */
4811               {
4812                 verify_tag = TRUE;
4813                 break;
4814               }
4815             continue;
4816           }
4817       if (found_tag && verify_tag && (*dbp != ' '))
4818         {
4819           /* Check if this is an "extern" declaration. */
4820           if (*dbp == '\0')
4821             continue;
4822           if (lowcase (*dbp == 'e'))
4823             {
4824               if (nocase_tail ("extern")) /* superfluous, really! */
4825                 {
4826                   found_tag = FALSE;
4827                   verify_tag = FALSE;
4828                 }
4829             }
4830           else if (lowcase (*dbp) == 'f')
4831             {
4832               if (nocase_tail ("forward")) /* check for forward reference */
4833                 {
4834                   found_tag = FALSE;
4835                   verify_tag = FALSE;
4836                 }
4837             }
4838           if (found_tag && verify_tag) /* not external proc, so make tag */
4839             {
4840               found_tag = FALSE;
4841               verify_tag = FALSE;
4842               make_tag (name, namelen, TRUE,
4843                         tline.buffer, taglen, save_lineno, save_lcno);
4844               continue;
4845             }
4846         }
4847       if (get_tagname)          /* grab name of proc or fn */
4848         {
4849           char *cp;
4850
4851           if (*dbp == '\0')
4852             continue;
4853
4854           /* Find block name. */
4855           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4856             continue;
4857
4858           /* Save all values for later tagging. */
4859           linebuffer_setlen (&tline, lb.len);
4860           strcpy (tline.buffer, lb.buffer);
4861           save_lineno = lineno;
4862           save_lcno = linecharno;
4863           name = tline.buffer + (dbp - lb.buffer);
4864           namelen = cp - dbp;
4865           taglen = cp - lb.buffer + 1;
4866
4867           dbp = cp;             /* set dbp to e-o-token */
4868           get_tagname = FALSE;
4869           found_tag = TRUE;
4870           continue;
4871
4872           /* And proceed to check for "extern". */
4873         }
4874       else if (!incomment && !inquote && !found_tag)
4875         {
4876           /* Check for proc/fn keywords. */
4877           switch (lowcase (c))
4878             {
4879             case 'p':
4880               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4881                 get_tagname = TRUE;
4882               continue;
4883             case 'f':
4884               if (nocase_tail ("unction"))
4885                 get_tagname = TRUE;
4886               continue;
4887             }
4888         }
4889     } /* while not eof */
4890
4891   free (tline.buffer);
4892 }
4893
4894 \f
4895 /*
4896  * Lisp tag functions
4897  *  look for (def or (DEF, quote or QUOTE
4898  */
4899
4900 static void L_getit __P((void));
4901
4902 static void
4903 L_getit ()
4904 {
4905   if (*dbp == '\'')             /* Skip prefix quote */
4906     dbp++;
4907   else if (*dbp == '(')
4908   {
4909     dbp++;
4910     /* Try to skip "(quote " */
4911     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4912       /* Ok, then skip "(" before name in (defstruct (foo)) */
4913       dbp = skip_spaces (dbp);
4914   }
4915   get_tag (dbp, NULL);
4916 }
4917
4918 static void
4919 Lisp_functions (inf)
4920      FILE *inf;
4921 {
4922   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4923     {
4924       if (dbp[0] != '(')
4925         continue;
4926
4927       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4928         {
4929           dbp = skip_non_spaces (dbp);
4930           dbp = skip_spaces (dbp);
4931           L_getit ();
4932         }
4933       else
4934         {
4935           /* Check for (foo::defmumble name-defined ... */
4936           do
4937             dbp++;
4938           while (!notinname (*dbp) && *dbp != ':');
4939           if (*dbp == ':')
4940             {
4941               do
4942                 dbp++;
4943               while (*dbp == ':');
4944
4945               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4946                 {
4947                   dbp = skip_non_spaces (dbp);
4948                   dbp = skip_spaces (dbp);
4949                   L_getit ();
4950                 }
4951             }
4952         }
4953     }
4954 }
4955
4956 \f
4957 /*
4958  * Lua script language parsing
4959  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4960  *
4961  *  "function" and "local function" are tags if they start at column 1.
4962  */
4963 static void
4964 Lua_functions (inf)
4965      FILE *inf;
4966 {
4967   register char *bp;
4968
4969   LOOP_ON_INPUT_LINES (inf, lb, bp)
4970     {
4971       if (bp[0] != 'f' && bp[0] != 'l')
4972         continue;
4973
4974       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4975
4976       if (LOOKING_AT (bp, "function"))
4977         get_tag (bp, NULL);
4978     }
4979 }
4980
4981 \f
4982 /*
4983  * Postscript tags
4984  * Just look for lines where the first character is '/'
4985  * Also look at "defineps" for PSWrap
4986  * Ideas by:
4987  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4988  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4989  */
4990 static void
4991 PS_functions (inf)
4992      FILE *inf;
4993 {
4994   register char *bp, *ep;
4995
4996   LOOP_ON_INPUT_LINES (inf, lb, bp)
4997     {
4998       if (bp[0] == '/')
4999         {
5000           for (ep = bp+1;
5001                *ep != '\0' && *ep != ' ' && *ep != '{';
5002                ep++)
5003             continue;
5004           make_tag (bp, ep - bp, TRUE,
5005                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5006         }
5007       else if (LOOKING_AT (bp, "defineps"))
5008         get_tag (bp, NULL);
5009     }
5010 }
5011
5012 \f
5013 /*
5014  * Forth tags
5015  * Ignore anything after \ followed by space or in ( )
5016  * Look for words defined by :
5017  * Look for constant, code, create, defer, value, and variable
5018  * OBP extensions:  Look for buffer:, field,
5019  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5020  */
5021 static void
5022 Forth_words (inf)
5023      FILE *inf;
5024 {
5025   register char *bp;
5026
5027   LOOP_ON_INPUT_LINES (inf, lb, bp)
5028     while ((bp = skip_spaces (bp))[0] != '\0')
5029       if (bp[0] == '\\' && iswhite(bp[1]))
5030         break;                  /* read next line */
5031       else if (bp[0] == '(' && iswhite(bp[1]))
5032         do                      /* skip to ) or eol */
5033           bp++;
5034         while (*bp != ')' && *bp != '\0');
5035       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5036                || LOOKING_AT_NOCASE (bp, "constant")
5037                || LOOKING_AT_NOCASE (bp, "code")
5038                || LOOKING_AT_NOCASE (bp, "create")
5039                || LOOKING_AT_NOCASE (bp, "defer")
5040                || LOOKING_AT_NOCASE (bp, "value")
5041                || LOOKING_AT_NOCASE (bp, "variable")
5042                || LOOKING_AT_NOCASE (bp, "buffer:")
5043                || LOOKING_AT_NOCASE (bp, "field"))
5044         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5045       else
5046         bp = skip_non_spaces (bp);
5047 }
5048
5049 \f
5050 /*
5051  * Scheme tag functions
5052  * look for (def... xyzzy
5053  *          (def... (xyzzy
5054  *          (def ... ((...(xyzzy ....
5055  *          (set! xyzzy
5056  * Original code by Ken Haase (1985?)
5057  */
5058 static void
5059 Scheme_functions (inf)
5060      FILE *inf;
5061 {
5062   register char *bp;
5063
5064   LOOP_ON_INPUT_LINES (inf, lb, bp)
5065     {
5066       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5067         {
5068           bp = skip_non_spaces (bp+4);
5069           /* Skip over open parens and white space */
5070           while (notinname (*bp))
5071             bp++;
5072           get_tag (bp, NULL);
5073         }
5074       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5075         get_tag (bp, NULL);
5076     }
5077 }
5078
5079 \f
5080 /* Find tags in TeX and LaTeX input files.  */
5081
5082 /* TEX_toktab is a table of TeX control sequences that define tags.
5083  * Each entry records one such control sequence.
5084  *
5085  * Original code from who knows whom.
5086  * Ideas by:
5087  *   Stefan Monnier (2002)
5088  */
5089
5090 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5091
5092 /* Default set of control sequences to put into TEX_toktab.
5093    The value of environment var TEXTAGS is prepended to this.  */
5094 static char *TEX_defenv = "\
5095 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5096 :part:appendix:entry:index:def\
5097 :newcommand:renewcommand:newenvironment:renewenvironment";
5098
5099 static void TEX_mode __P((FILE *));
5100 static void TEX_decode_env __P((char *, char *));
5101
5102 static char TEX_esc = '\\';
5103 static char TEX_opgrp = '{';
5104 static char TEX_clgrp = '}';
5105
5106 /*
5107  * TeX/LaTeX scanning loop.
5108  */
5109 static void
5110 TeX_commands (inf)
5111      FILE *inf;
5112 {
5113   char *cp;
5114   linebuffer *key;
5115
5116   /* Select either \ or ! as escape character.  */
5117   TEX_mode (inf);
5118
5119   /* Initialize token table once from environment. */
5120   if (TEX_toktab == NULL)
5121     TEX_decode_env ("TEXTAGS", TEX_defenv);
5122
5123   LOOP_ON_INPUT_LINES (inf, lb, cp)
5124     {
5125       /* Look at each TEX keyword in line. */
5126       for (;;)
5127         {
5128           /* Look for a TEX escape. */
5129           while (*cp++ != TEX_esc)
5130             if (cp[-1] == '\0' || cp[-1] == '%')
5131               goto tex_next_line;
5132
5133           for (key = TEX_toktab; key->buffer != NULL; key++)
5134             if (strneq (cp, key->buffer, key->len))
5135               {
5136                 register char *p;
5137                 int namelen, linelen;
5138                 bool opgrp = FALSE;
5139
5140                 cp = skip_spaces (cp + key->len);
5141                 if (*cp == TEX_opgrp)
5142                   {
5143                     opgrp = TRUE;
5144                     cp++;
5145                   }
5146                 for (p = cp;
5147                      (!iswhite (*p) && *p != '#' &&
5148                       *p != TEX_opgrp && *p != TEX_clgrp);
5149                      p++)
5150                   continue;
5151                 namelen = p - cp;
5152                 linelen = lb.len;
5153                 if (!opgrp || *p == TEX_clgrp)
5154                   {
5155                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5156                       p++;
5157                     linelen = p - lb.buffer + 1;
5158                   }
5159                 make_tag (cp, namelen, TRUE,
5160                           lb.buffer, linelen, lineno, linecharno);
5161                 goto tex_next_line; /* We only tag a line once */
5162               }
5163         }
5164     tex_next_line:
5165       ;
5166     }
5167 }
5168
5169 #define TEX_LESC '\\'
5170 #define TEX_SESC '!'
5171
5172 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5173    chars accordingly. */
5174 static void
5175 TEX_mode (inf)
5176      FILE *inf;
5177 {
5178   int c;
5179
5180   while ((c = getc (inf)) != EOF)
5181     {
5182       /* Skip to next line if we hit the TeX comment char. */
5183       if (c == '%')
5184         while (c != '\n' && c != EOF)
5185           c = getc (inf);
5186       else if (c == TEX_LESC || c == TEX_SESC )
5187         break;
5188     }
5189
5190   if (c == TEX_LESC)
5191     {
5192       TEX_esc = TEX_LESC;
5193       TEX_opgrp = '{';
5194       TEX_clgrp = '}';
5195     }
5196   else
5197     {
5198       TEX_esc = TEX_SESC;
5199       TEX_opgrp = '<';
5200       TEX_clgrp = '>';
5201     }
5202   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5203      No attempt is made to correct the situation. */
5204   rewind (inf);
5205 }
5206
5207 /* Read environment and prepend it to the default string.
5208    Build token table. */
5209 static void
5210 TEX_decode_env (evarname, defenv)
5211      char *evarname;
5212      char *defenv;
5213 {
5214   register char *env, *p;
5215   int i, len;
5216
5217   /* Append default string to environment. */
5218   env = getenv (evarname);
5219   if (!env)
5220     env = defenv;
5221   else
5222     {
5223       char *oldenv = env;
5224       env = concat (oldenv, defenv, "");
5225     }
5226
5227   /* Allocate a token table */
5228   for (len = 1, p = env; p;)
5229     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5230       len++;
5231   TEX_toktab = xnew (len, linebuffer);
5232
5233   /* Unpack environment string into token table. Be careful about */
5234   /* zero-length strings (leading ':', "::" and trailing ':') */
5235   for (i = 0; *env != '\0';)
5236     {
5237       p = etags_strchr (env, ':');
5238       if (!p)                   /* End of environment string. */
5239         p = env + strlen (env);
5240       if (p - env > 0)
5241         {                       /* Only non-zero strings. */
5242           TEX_toktab[i].buffer = savenstr (env, p - env);
5243           TEX_toktab[i].len = p - env;
5244           i++;
5245         }
5246       if (*p)
5247         env = p + 1;
5248       else
5249         {
5250           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5251           TEX_toktab[i].len = 0;
5252           break;
5253         }
5254     }
5255 }
5256
5257 \f
5258 /* Texinfo support.  Dave Love, Mar. 2000.  */
5259 static void
5260 Texinfo_nodes (inf)
5261      FILE * inf;
5262 {
5263   char *cp, *start;
5264   LOOP_ON_INPUT_LINES (inf, lb, cp)
5265     if (LOOKING_AT (cp, "@node"))
5266       {
5267         start = cp;
5268         while (*cp != '\0' && *cp != ',')
5269           cp++;
5270         make_tag (start, cp - start, TRUE,
5271                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5272       }
5273 }
5274
5275 \f
5276 /*
5277  * HTML support.
5278  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5279  * Contents of <a name=xxx> are tags with name xxx.
5280  *
5281  * Francesco Potortì, 2002.
5282  */
5283 static void
5284 HTML_labels (inf)
5285      FILE * inf;
5286 {
5287   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5288   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5289   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5290   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5291   char *end;
5292
5293
5294   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5295
5296   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5297     for (;;)                    /* loop on the same line */
5298       {
5299         if (skiptag)            /* skip HTML tag */
5300           {
5301             while (*dbp != '\0' && *dbp != '>')
5302               dbp++;
5303             if (*dbp == '>')
5304               {
5305                 dbp += 1;
5306                 skiptag = FALSE;
5307                 continue;       /* look on the same line */
5308               }
5309             break;              /* go to next line */
5310           }
5311
5312         else if (intag) /* look for "name=" or "id=" */
5313           {
5314             while (*dbp != '\0' && *dbp != '>'
5315                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5316               dbp++;
5317             if (*dbp == '\0')
5318               break;            /* go to next line */
5319             if (*dbp == '>')
5320               {
5321                 dbp += 1;
5322                 intag = FALSE;
5323                 continue;       /* look on the same line */
5324               }
5325             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5326                 || LOOKING_AT_NOCASE (dbp, "id="))
5327               {
5328                 bool quoted = (dbp[0] == '"');
5329
5330                 if (quoted)
5331                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5332                     continue;
5333                 else
5334                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5335                     continue;
5336                 linebuffer_setlen (&token_name, end - dbp);
5337                 strncpy (token_name.buffer, dbp, end - dbp);
5338                 token_name.buffer[end - dbp] = '\0';
5339
5340                 dbp = end;
5341                 intag = FALSE;  /* we found what we looked for */
5342                 skiptag = TRUE; /* skip to the end of the tag */
5343                 getnext = TRUE; /* then grab the text */
5344                 continue;       /* look on the same line */
5345               }
5346             dbp += 1;
5347           }
5348
5349         else if (getnext)       /* grab next tokens and tag them */
5350           {
5351             dbp = skip_spaces (dbp);
5352             if (*dbp == '\0')
5353               break;            /* go to next line */
5354             if (*dbp == '<')
5355               {
5356                 intag = TRUE;
5357                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5358                 continue;       /* look on the same line */
5359               }
5360
5361             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5362               continue;
5363             make_tag (token_name.buffer, token_name.len, TRUE,
5364                       dbp, end - dbp, lineno, linecharno);
5365             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5366             getnext = FALSE;
5367             break;              /* go to next line */
5368           }
5369
5370         else                    /* look for an interesting HTML tag */
5371           {
5372             while (*dbp != '\0' && *dbp != '<')
5373               dbp++;
5374             if (*dbp == '\0')
5375               break;            /* go to next line */
5376             intag = TRUE;
5377             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5378               {
5379                 inanchor = TRUE;
5380                 continue;       /* look on the same line */
5381               }
5382             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5383                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5384                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5385                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5386               {
5387                 intag = FALSE;
5388                 getnext = TRUE;
5389                 continue;       /* look on the same line */
5390               }
5391             dbp += 1;
5392           }
5393       }
5394 }
5395
5396 \f
5397 /*
5398  * Prolog support
5399  *
5400  * Assumes that the predicate or rule starts at column 0.
5401  * Only the first clause of a predicate or rule is added.
5402  * Original code by Sunichirou Sugou (1989)
5403  * Rewritten by Anders Lindgren (1996)
5404  */
5405 static int prolog_pr __P((char *, char *));
5406 static void prolog_skip_comment __P((linebuffer *, FILE *));
5407 static int prolog_atom __P((char *, int));
5408
5409 static void
5410 Prolog_functions (inf)
5411      FILE *inf;
5412 {
5413   char *cp, *last;
5414   int len;
5415   int allocated;
5416
5417   allocated = 0;
5418   len = 0;
5419   last = NULL;
5420
5421   LOOP_ON_INPUT_LINES (inf, lb, cp)
5422     {
5423       if (cp[0] == '\0')        /* Empty line */
5424         continue;
5425       else if (iswhite (cp[0])) /* Not a predicate */
5426         continue;
5427       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5428         prolog_skip_comment (&lb, inf);
5429       else if ((len = prolog_pr (cp, last)) > 0)
5430         {
5431           /* Predicate or rule.  Store the function name so that we
5432              only generate a tag for the first clause.  */
5433           if (last == NULL)
5434             last = xnew(len + 1, char);
5435           else if (len + 1 > allocated)
5436             xrnew (last, len + 1, char);
5437           allocated = len + 1;
5438           strncpy (last, cp, len);
5439           last[len] = '\0';
5440         }
5441     }
5442   if (last != NULL)
5443     free (last);
5444 }
5445
5446
5447 static void
5448 prolog_skip_comment (plb, inf)
5449      linebuffer *plb;
5450      FILE *inf;
5451 {
5452   char *cp;
5453
5454   do
5455     {
5456       for (cp = plb->buffer; *cp != '\0'; cp++)
5457         if (cp[0] == '*' && cp[1] == '/')
5458           return;
5459       readline (plb, inf);
5460     }
5461   while (!feof(inf));
5462 }
5463
5464 /*
5465  * A predicate or rule definition is added if it matches:
5466  *     <beginning of line><Prolog Atom><whitespace>(
5467  * or  <beginning of line><Prolog Atom><whitespace>:-
5468  *
5469  * It is added to the tags database if it doesn't match the
5470  * name of the previous clause header.
5471  *
5472  * Return the size of the name of the predicate or rule, or 0 if no
5473  * header was found.
5474  */
5475 static int
5476 prolog_pr (s, last)
5477      char *s;
5478      char *last;                /* Name of last clause. */
5479 {
5480   int pos;
5481   int len;
5482
5483   pos = prolog_atom (s, 0);
5484   if (pos < 1)
5485     return 0;
5486
5487   len = pos;
5488   pos = skip_spaces (s + pos) - s;
5489
5490   if ((s[pos] == '.'
5491        || (s[pos] == '(' && (pos += 1))
5492        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5493       && (last == NULL          /* save only the first clause */
5494           || len != (int)strlen (last)
5495           || !strneq (s, last, len)))
5496         {
5497           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5498           return len;
5499         }
5500   else
5501     return 0;
5502 }
5503
5504 /*
5505  * Consume a Prolog atom.
5506  * Return the number of bytes consumed, or -1 if there was an error.
5507  *
5508  * A prolog atom, in this context, could be one of:
5509  * - An alphanumeric sequence, starting with a lower case letter.
5510  * - A quoted arbitrary string. Single quotes can escape themselves.
5511  *   Backslash quotes everything.
5512  */
5513 static int
5514 prolog_atom (s, pos)
5515      char *s;
5516      int pos;
5517 {
5518   int origpos;
5519
5520   origpos = pos;
5521
5522   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5523     {
5524       /* The atom is unquoted. */
5525       pos++;
5526       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5527         {
5528           pos++;
5529         }
5530       return pos - origpos;
5531     }
5532   else if (s[pos] == '\'')
5533     {
5534       pos++;
5535
5536       for (;;)
5537         {
5538           if (s[pos] == '\'')
5539             {
5540               pos++;
5541               if (s[pos] != '\'')
5542                 break;
5543               pos++;            /* A double quote */
5544             }
5545           else if (s[pos] == '\0')
5546             /* Multiline quoted atoms are ignored. */
5547             return -1;
5548           else if (s[pos] == '\\')
5549             {
5550               if (s[pos+1] == '\0')
5551                 return -1;
5552               pos += 2;
5553             }
5554           else
5555             pos++;
5556         }
5557       return pos - origpos;
5558     }
5559   else
5560     return -1;
5561 }
5562
5563 \f
5564 /*
5565  * Support for Erlang
5566  *
5567  * Generates tags for functions, defines, and records.
5568  * Assumes that Erlang functions start at column 0.
5569  * Original code by Anders Lindgren (1996)
5570  */
5571 static int erlang_func __P((char *, char *));
5572 static void erlang_attribute __P((char *));
5573 static int erlang_atom __P((char *));
5574
5575 static void
5576 Erlang_functions (inf)
5577      FILE *inf;
5578 {
5579   char *cp, *last;
5580   int len;
5581   int allocated;
5582
5583   allocated = 0;
5584   len = 0;
5585   last = NULL;
5586
5587   LOOP_ON_INPUT_LINES (inf, lb, cp)
5588     {
5589       if (cp[0] == '\0')        /* Empty line */
5590         continue;
5591       else if (iswhite (cp[0])) /* Not function nor attribute */
5592         continue;
5593       else if (cp[0] == '%')    /* comment */
5594         continue;
5595       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5596         continue;
5597       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5598         {
5599           erlang_attribute (cp);
5600           if (last != NULL)
5601             {
5602               free (last);
5603               last = NULL;
5604             }
5605         }
5606       else if ((len = erlang_func (cp, last)) > 0)
5607         {
5608           /*
5609            * Function.  Store the function name so that we only
5610            * generates a tag for the first clause.
5611            */
5612           if (last == NULL)
5613             last = xnew (len + 1, char);
5614           else if (len + 1 > allocated)
5615             xrnew (last, len + 1, char);
5616           allocated = len + 1;
5617           strncpy (last, cp, len);
5618           last[len] = '\0';
5619         }
5620     }
5621   if (last != NULL)
5622     free (last);
5623 }
5624
5625
5626 /*
5627  * A function definition is added if it matches:
5628  *     <beginning of line><Erlang Atom><whitespace>(
5629  *
5630  * It is added to the tags database if it doesn't match the
5631  * name of the previous clause header.
5632  *
5633  * Return the size of the name of the function, or 0 if no function
5634  * was found.
5635  */
5636 static int
5637 erlang_func (s, last)
5638      char *s;
5639      char *last;                /* Name of last clause. */
5640 {
5641   int pos;
5642   int len;
5643
5644   pos = erlang_atom (s);
5645   if (pos < 1)
5646     return 0;
5647
5648   len = pos;
5649   pos = skip_spaces (s + pos) - s;
5650
5651   /* Save only the first clause. */
5652   if (s[pos++] == '('
5653       && (last == NULL
5654           || len != (int)strlen (last)
5655           || !strneq (s, last, len)))
5656         {
5657           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5658           return len;
5659         }
5660
5661   return 0;
5662 }
5663
5664
5665 /*
5666  * Handle attributes.  Currently, tags are generated for defines
5667  * and records.
5668  *
5669  * They are on the form:
5670  * -define(foo, bar).
5671  * -define(Foo(M, N), M+N).
5672  * -record(graph, {vtab = notable, cyclic = true}).
5673  */
5674 static void
5675 erlang_attribute (s)
5676      char *s;
5677 {
5678   char *cp = s;
5679
5680   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5681       && *cp++ == '(')
5682     {
5683       int len = erlang_atom (skip_spaces (cp));
5684       if (len > 0)
5685         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5686     }
5687   return;
5688 }
5689
5690
5691 /*
5692  * Consume an Erlang atom (or variable).
5693  * Return the number of bytes consumed, or -1 if there was an error.
5694  */
5695 static int
5696 erlang_atom (s)
5697      char *s;
5698 {
5699   int pos = 0;
5700
5701   if (ISALPHA (s[pos]) || s[pos] == '_')
5702     {
5703       /* The atom is unquoted. */
5704       do
5705         pos++;
5706       while (ISALNUM (s[pos]) || s[pos] == '_');
5707     }
5708   else if (s[pos] == '\'')
5709     {
5710       for (pos++; s[pos] != '\''; pos++)
5711         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5712             || (s[pos] == '\\' && s[++pos] == '\0'))
5713           return 0;
5714       pos++;
5715     }
5716
5717   return pos;
5718 }
5719
5720 \f
5721 static char *scan_separators __P((char *));
5722 static void add_regex __P((char *, language *));
5723 static char *substitute __P((char *, char *, struct re_registers *));
5724
5725 /*
5726  * Take a string like "/blah/" and turn it into "blah", verifying
5727  * that the first and last characters are the same, and handling
5728  * quoted separator characters.  Actually, stops on the occurrence of
5729  * an unquoted separator.  Also process \t, \n, etc. and turn into
5730  * appropriate characters. Works in place.  Null terminates name string.
5731  * Returns pointer to terminating separator, or NULL for
5732  * unterminated regexps.
5733  */
5734 static char *
5735 scan_separators (name)
5736      char *name;
5737 {
5738   char sep = name[0];
5739   char *copyto = name;
5740   bool quoted = FALSE;
5741
5742   for (++name; *name != '\0'; ++name)
5743     {
5744       if (quoted)
5745         {
5746           switch (*name)
5747             {
5748             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5749             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5750             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5751             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5752             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5753             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5754             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5755             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5756             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5757             default:
5758               if (*name == sep)
5759                 *copyto++ = sep;
5760               else
5761                 {
5762                   /* Something else is quoted, so preserve the quote. */
5763                   *copyto++ = '\\';
5764                   *copyto++ = *name;
5765                 }
5766               break;
5767             }
5768           quoted = FALSE;
5769         }
5770       else if (*name == '\\')
5771         quoted = TRUE;
5772       else if (*name == sep)
5773         break;
5774       else
5775         *copyto++ = *name;
5776     }
5777   if (*name != sep)
5778     name = NULL;                /* signal unterminated regexp */
5779
5780   /* Terminate copied string. */
5781   *copyto = '\0';
5782   return name;
5783 }
5784
5785 /* Look at the argument of --regex or --no-regex and do the right
5786    thing.  Same for each line of a regexp file. */
5787 static void
5788 analyse_regex (regex_arg)
5789      char *regex_arg;
5790 {
5791   if (regex_arg == NULL)
5792     {
5793       free_regexps ();          /* --no-regex: remove existing regexps */
5794       return;
5795     }
5796
5797   /* A real --regexp option or a line in a regexp file. */
5798   switch (regex_arg[0])
5799     {
5800       /* Comments in regexp file or null arg to --regex. */
5801     case '\0':
5802     case ' ':
5803     case '\t':
5804       break;
5805
5806       /* Read a regex file.  This is recursive and may result in a
5807          loop, which will stop when the file descriptors are exhausted. */
5808     case '@':
5809       {
5810         FILE *regexfp;
5811         linebuffer regexbuf;
5812         char *regexfile = regex_arg + 1;
5813
5814         /* regexfile is a file containing regexps, one per line. */
5815         regexfp = fopen (regexfile, "r");
5816         if (regexfp == NULL)
5817           {
5818             pfatal (regexfile);
5819             return;
5820           }
5821         linebuffer_init (&regexbuf);
5822         while (readline_internal (&regexbuf, regexfp) > 0)
5823           analyse_regex (regexbuf.buffer);
5824         free (regexbuf.buffer);
5825         fclose (regexfp);
5826       }
5827       break;
5828
5829       /* Regexp to be used for a specific language only. */
5830     case '{':
5831       {
5832         language *lang;
5833         char *lang_name = regex_arg + 1;
5834         char *cp;
5835
5836         for (cp = lang_name; *cp != '}'; cp++)
5837           if (*cp == '\0')
5838             {
5839               error ("unterminated language name in regex: %s", regex_arg);
5840               return;
5841             }
5842         *cp++ = '\0';
5843         lang = get_language_from_langname (lang_name);
5844         if (lang == NULL)
5845           return;
5846         add_regex (cp, lang);
5847       }
5848       break;
5849
5850       /* Regexp to be used for any language. */
5851     default:
5852       add_regex (regex_arg, NULL);
5853       break;
5854     }
5855 }
5856
5857 /* Separate the regexp pattern, compile it,
5858    and care for optional name and modifiers. */
5859 static void
5860 add_regex (regexp_pattern, lang)
5861      char *regexp_pattern;
5862      language *lang;
5863 {
5864   static struct re_pattern_buffer zeropattern;
5865   char sep, *pat, *name, *modifiers;
5866   const char *err;
5867   struct re_pattern_buffer *patbuf;
5868   regexp *rp;
5869   bool
5870     force_explicit_name = TRUE, /* do not use implicit tag names */
5871     ignore_case = FALSE,        /* case is significant */
5872     multi_line = FALSE,         /* matches are done one line at a time */
5873     single_line = FALSE;        /* dot does not match newline */
5874
5875
5876   if (strlen(regexp_pattern) < 3)
5877     {
5878       error ("null regexp", (char *)NULL);
5879       return;
5880     }
5881   sep = regexp_pattern[0];
5882   name = scan_separators (regexp_pattern);
5883   if (name == NULL)
5884     {
5885       error ("%s: unterminated regexp", regexp_pattern);
5886       return;
5887     }
5888   if (name[1] == sep)
5889     {
5890       error ("null name for regexp \"%s\"", regexp_pattern);
5891       return;
5892     }
5893   modifiers = scan_separators (name);
5894   if (modifiers == NULL)        /* no terminating separator --> no name */
5895     {
5896       modifiers = name;
5897       name = "";
5898     }
5899   else
5900     modifiers += 1;             /* skip separator */
5901
5902   /* Parse regex modifiers. */
5903   for (; modifiers[0] != '\0'; modifiers++)
5904     switch (modifiers[0])
5905       {
5906       case 'N':
5907         if (modifiers == name)
5908           error ("forcing explicit tag name but no name, ignoring", NULL);
5909         force_explicit_name = TRUE;
5910         break;
5911       case 'i':
5912         ignore_case = TRUE;
5913         break;
5914       case 's':
5915         single_line = TRUE;
5916         /* FALLTHRU */
5917       case 'm':
5918         multi_line = TRUE;
5919         need_filebuf = TRUE;
5920         break;
5921       default:
5922         {
5923           char wrongmod [2];
5924           wrongmod[0] = modifiers[0];
5925           wrongmod[1] = '\0';
5926           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5927         }
5928         break;
5929       }
5930
5931   patbuf = xnew (1, struct re_pattern_buffer);
5932   *patbuf = zeropattern;
5933   if (ignore_case)
5934     {
5935       static char lc_trans[CHARS];
5936       int i;
5937       for (i = 0; i < CHARS; i++)
5938         lc_trans[i] = lowcase (i);
5939       patbuf->translate = lc_trans;     /* translation table to fold case  */
5940     }
5941
5942   if (multi_line)
5943     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5944   else
5945     pat = regexp_pattern;
5946
5947   if (single_line)
5948     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5949   else
5950     re_set_syntax (RE_SYNTAX_EMACS);
5951
5952   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5953   if (multi_line)
5954     free (pat);
5955   if (err != NULL)
5956     {
5957       error ("%s while compiling pattern", err);
5958       return;
5959     }
5960
5961   rp = p_head;
5962   p_head = xnew (1, regexp);
5963   p_head->pattern = savestr (regexp_pattern);
5964   p_head->p_next = rp;
5965   p_head->lang = lang;
5966   p_head->pat = patbuf;
5967   p_head->name = savestr (name);
5968   p_head->error_signaled = FALSE;
5969   p_head->force_explicit_name = force_explicit_name;
5970   p_head->ignore_case = ignore_case;
5971   p_head->multi_line = multi_line;
5972 }
5973
5974 /*
5975  * Do the substitutions indicated by the regular expression and
5976  * arguments.
5977  */
5978 static char *
5979 substitute (in, out, regs)
5980      char *in, *out;
5981      struct re_registers *regs;
5982 {
5983   char *result, *t;
5984   int size, dig, diglen;
5985
5986   result = NULL;
5987   size = strlen (out);
5988
5989   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5990   if (out[size - 1] == '\\')
5991     fatal ("pattern error in \"%s\"", out);
5992   for (t = etags_strchr (out, '\\');
5993        t != NULL;
5994        t = etags_strchr (t + 2, '\\'))
5995     if (ISDIGIT (t[1]))
5996       {
5997         dig = t[1] - '0';
5998         diglen = regs->end[dig] - regs->start[dig];
5999         size += diglen - 2;
6000       }
6001     else
6002       size -= 1;
6003
6004   /* Allocate space and do the substitutions. */
6005   assert (size >= 0);
6006   result = xnew (size + 1, char);
6007
6008   for (t = result; *out != '\0'; out++)
6009     if (*out == '\\' && ISDIGIT (*++out))
6010       {
6011         dig = *out - '0';
6012         diglen = regs->end[dig] - regs->start[dig];
6013         strncpy (t, in + regs->start[dig], diglen);
6014         t += diglen;
6015       }
6016     else
6017       *t++ = *out;
6018   *t = '\0';
6019
6020   assert (t <= result + size);
6021   assert (t - result == (int)strlen (result));
6022
6023   return result;
6024 }
6025
6026 /* Deallocate all regexps. */
6027 static void
6028 free_regexps ()
6029 {
6030   regexp *rp;
6031   while (p_head != NULL)
6032     {
6033       rp = p_head->p_next;
6034       free (p_head->pattern);
6035       free (p_head->name);
6036       free (p_head);
6037       p_head = rp;
6038     }
6039   return;
6040 }
6041
6042 /*
6043  * Reads the whole file as a single string from `filebuf' and looks for
6044  * multi-line regular expressions, creating tags on matches.
6045  * readline already dealt with normal regexps.
6046  *
6047  * Idea by Ben Wing <ben@666.com> (2002).
6048  */
6049 static void
6050 regex_tag_multiline ()
6051 {
6052   char *buffer = filebuf.buffer;
6053   regexp *rp;
6054   char *name;
6055
6056   for (rp = p_head; rp != NULL; rp = rp->p_next)
6057     {
6058       int match = 0;
6059
6060       if (!rp->multi_line)
6061         continue;               /* skip normal regexps */
6062
6063       /* Generic initialisations before parsing file from memory. */
6064       lineno = 1;               /* reset global line number */
6065       charno = 0;               /* reset global char number */
6066       linecharno = 0;           /* reset global char number of line start */
6067
6068       /* Only use generic regexps or those for the current language. */
6069       if (rp->lang != NULL && rp->lang != curfdp->lang)
6070         continue;
6071
6072       while (match >= 0 && match < filebuf.len)
6073         {
6074           match = re_search (rp->pat, buffer, filebuf.len, charno,
6075                              filebuf.len - match, &rp->regs);
6076           switch (match)
6077             {
6078             case -2:
6079               /* Some error. */
6080               if (!rp->error_signaled)
6081                 {
6082                   error ("regexp stack overflow while matching \"%s\"",
6083                          rp->pattern);
6084                   rp->error_signaled = TRUE;
6085                 }
6086               break;
6087             case -1:
6088               /* No match. */
6089               break;
6090             default:
6091               if (match == rp->regs.end[0])
6092                 {
6093                   if (!rp->error_signaled)
6094                     {
6095                       error ("regexp matches the empty string: \"%s\"",
6096                              rp->pattern);
6097                       rp->error_signaled = TRUE;
6098                     }
6099                   match = -3;   /* exit from while loop */
6100                   break;
6101                 }
6102
6103               /* Match occurred.  Construct a tag. */
6104               while (charno < rp->regs.end[0])
6105                 if (buffer[charno++] == '\n')
6106                   lineno++, linecharno = charno;
6107               name = rp->name;
6108               if (name[0] == '\0')
6109                 name = NULL;
6110               else /* make a named tag */
6111                 name = substitute (buffer, rp->name, &rp->regs);
6112               if (rp->force_explicit_name)
6113                 /* Force explicit tag name, if a name is there. */
6114                 pfnote (name, TRUE, buffer + linecharno,
6115                         charno - linecharno + 1, lineno, linecharno);
6116               else
6117                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6118                           charno - linecharno + 1, lineno, linecharno);
6119               break;
6120             }
6121         }
6122     }
6123 }
6124
6125 \f
6126 static bool
6127 nocase_tail (cp)
6128      char *cp;
6129 {
6130   register int len = 0;
6131
6132   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6133     cp++, len++;
6134   if (*cp == '\0' && !intoken (dbp[len]))
6135     {
6136       dbp += len;
6137       return TRUE;
6138     }
6139   return FALSE;
6140 }
6141
6142 static void
6143 get_tag (bp, namepp)
6144      register char *bp;
6145      char **namepp;
6146 {
6147   register char *cp = bp;
6148
6149   if (*bp != '\0')
6150     {
6151       /* Go till you get to white space or a syntactic break */
6152       for (cp = bp + 1; !notinname (*cp); cp++)
6153         continue;
6154       make_tag (bp, cp - bp, TRUE,
6155                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6156     }
6157
6158   if (namepp != NULL)
6159     *namepp = savenstr (bp, cp - bp);
6160 }
6161
6162 /*
6163  * Read a line of text from `stream' into `lbp', excluding the
6164  * newline or CR-NL, if any.  Return the number of characters read from
6165  * `stream', which is the length of the line including the newline.
6166  *
6167  * On DOS or Windows we do not count the CR character, if any before the
6168  * NL, in the returned length; this mirrors the behavior of Emacs on those
6169  * platforms (for text files, it translates CR-NL to NL as it reads in the
6170  * file).
6171  *
6172  * If multi-line regular expressions are requested, each line read is
6173  * appended to `filebuf'.
6174  */
6175 static long
6176 readline_internal (lbp, stream)
6177      linebuffer *lbp;
6178      register FILE *stream;
6179 {
6180   char *buffer = lbp->buffer;
6181   register char *p = lbp->buffer;
6182   register char *pend;
6183   int chars_deleted;
6184
6185   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6186
6187   for (;;)
6188     {
6189       register int c = getc (stream);
6190       if (p == pend)
6191         {
6192           /* We're at the end of linebuffer: expand it. */
6193           lbp->size *= 2;
6194           xrnew (buffer, lbp->size, char);
6195           p += buffer - lbp->buffer;
6196           pend = buffer + lbp->size;
6197           lbp->buffer = buffer;
6198         }
6199       if (c == EOF)
6200         {
6201           *p = '\0';
6202           chars_deleted = 0;
6203           break;
6204         }
6205       if (c == '\n')
6206         {
6207           if (p > buffer && p[-1] == '\r')
6208             {
6209               p -= 1;
6210 #ifdef DOS_NT
6211              /* Assume CRLF->LF translation will be performed by Emacs
6212                 when loading this file, so CRs won't appear in the buffer.
6213                 It would be cleaner to compensate within Emacs;
6214                 however, Emacs does not know how many CRs were deleted
6215                 before any given point in the file.  */
6216               chars_deleted = 1;
6217 #else
6218               chars_deleted = 2;
6219 #endif
6220             }
6221           else
6222             {
6223               chars_deleted = 1;
6224             }
6225           *p = '\0';
6226           break;
6227         }
6228       *p++ = c;
6229     }
6230   lbp->len = p - buffer;
6231
6232   if (need_filebuf              /* we need filebuf for multi-line regexps */
6233       && chars_deleted > 0)     /* not at EOF */
6234     {
6235       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6236         {
6237           /* Expand filebuf. */
6238           filebuf.size *= 2;
6239           xrnew (filebuf.buffer, filebuf.size, char);
6240         }
6241       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6242       filebuf.len += lbp->len;
6243       filebuf.buffer[filebuf.len++] = '\n';
6244       filebuf.buffer[filebuf.len] = '\0';
6245     }
6246
6247   return lbp->len + chars_deleted;
6248 }
6249
6250 /*
6251  * Like readline_internal, above, but in addition try to match the
6252  * input line against relevant regular expressions and manage #line
6253  * directives.
6254  */
6255 static void
6256 readline (lbp, stream)
6257      linebuffer *lbp;
6258      FILE *stream;
6259 {
6260   long result;
6261
6262   linecharno = charno;          /* update global char number of line start */
6263   result = readline_internal (lbp, stream); /* read line */
6264   lineno += 1;                  /* increment global line number */
6265   charno += result;             /* increment global char number */
6266
6267   /* Honour #line directives. */
6268   if (!no_line_directive)
6269     {
6270       static bool discard_until_line_directive;
6271
6272       /* Check whether this is a #line directive. */
6273       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6274         {
6275           unsigned int lno;
6276           int start = 0;
6277
6278           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6279               && start > 0)     /* double quote character found */
6280             {
6281               char *endp = lbp->buffer + start;
6282
6283               while ((endp = etags_strchr (endp, '"')) != NULL
6284                      && endp[-1] == '\\')
6285                 endp++;
6286               if (endp != NULL)
6287                 /* Ok, this is a real #line directive.  Let's deal with it. */
6288                 {
6289                   char *taggedabsname;  /* absolute name of original file */
6290                   char *taggedfname;    /* name of original file as given */
6291                   char *name;           /* temp var */
6292
6293                   discard_until_line_directive = FALSE; /* found it */
6294                   name = lbp->buffer + start;
6295                   *endp = '\0';
6296                   canonicalize_filename (name); /* for DOS */
6297                   taggedabsname = absolute_filename (name, tagfiledir);
6298                   if (filename_is_absolute (name)
6299                       || filename_is_absolute (curfdp->infname))
6300                     taggedfname = savestr (taggedabsname);
6301                   else
6302                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6303
6304                   if (streq (curfdp->taggedfname, taggedfname))
6305                     /* The #line directive is only a line number change.  We
6306                        deal with this afterwards. */
6307                     free (taggedfname);
6308                   else
6309                     /* The tags following this #line directive should be
6310                        attributed to taggedfname.  In order to do this, set
6311                        curfdp accordingly. */
6312                     {
6313                       fdesc *fdp; /* file description pointer */
6314
6315                       /* Go look for a file description already set up for the
6316                          file indicated in the #line directive.  If there is
6317                          one, use it from now until the next #line
6318                          directive. */
6319                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6320                         if (streq (fdp->infname, curfdp->infname)
6321                             && streq (fdp->taggedfname, taggedfname))
6322                           /* If we remove the second test above (after the &&)
6323                              then all entries pertaining to the same file are
6324                              coalesced in the tags file.  If we use it, then
6325                              entries pertaining to the same file but generated
6326                              from different files (via #line directives) will
6327                              go into separate sections in the tags file.  These
6328                              alternatives look equivalent.  The first one
6329                              destroys some apparently useless information. */
6330                           {
6331                             curfdp = fdp;
6332                             free (taggedfname);
6333                             break;
6334                           }
6335                       /* Else, if we already tagged the real file, skip all
6336                          input lines until the next #line directive. */
6337                       if (fdp == NULL) /* not found */
6338                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6339                           if (streq (fdp->infabsname, taggedabsname))
6340                             {
6341                               discard_until_line_directive = TRUE;
6342                               free (taggedfname);
6343                               break;
6344                             }
6345                       /* Else create a new file description and use that from
6346                          now on, until the next #line directive. */
6347                       if (fdp == NULL) /* not found */
6348                         {
6349                           fdp = fdhead;
6350                           fdhead = xnew (1, fdesc);
6351                           *fdhead = *curfdp; /* copy curr. file description */
6352                           fdhead->next = fdp;
6353                           fdhead->infname = savestr (curfdp->infname);
6354                           fdhead->infabsname = savestr (curfdp->infabsname);
6355                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6356                           fdhead->taggedfname = taggedfname;
6357                           fdhead->usecharno = FALSE;
6358                           fdhead->prop = NULL;
6359                           fdhead->written = FALSE;
6360                           curfdp = fdhead;
6361                         }
6362                     }
6363                   free (taggedabsname);
6364                   lineno = lno - 1;
6365                   readline (lbp, stream);
6366                   return;
6367                 } /* if a real #line directive */
6368             } /* if #line is followed by a a number */
6369         } /* if line begins with "#line " */
6370
6371       /* If we are here, no #line directive was found. */
6372       if (discard_until_line_directive)
6373         {
6374           if (result > 0)
6375             {
6376               /* Do a tail recursion on ourselves, thus discarding the contents
6377                  of the line buffer. */
6378               readline (lbp, stream);
6379               return;
6380             }
6381           /* End of file. */
6382           discard_until_line_directive = FALSE;
6383           return;
6384         }
6385     } /* if #line directives should be considered */
6386
6387   {
6388     int match;
6389     regexp *rp;
6390     char *name;
6391
6392     /* Match against relevant regexps. */
6393     if (lbp->len > 0)
6394       for (rp = p_head; rp != NULL; rp = rp->p_next)
6395         {
6396           /* Only use generic regexps or those for the current language.
6397              Also do not use multiline regexps, which is the job of
6398              regex_tag_multiline. */
6399           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6400               || rp->multi_line)
6401             continue;
6402
6403           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6404           switch (match)
6405             {
6406             case -2:
6407               /* Some error. */
6408               if (!rp->error_signaled)
6409                 {
6410                   error ("regexp stack overflow while matching \"%s\"",
6411                          rp->pattern);
6412                   rp->error_signaled = TRUE;
6413                 }
6414               break;
6415             case -1:
6416               /* No match. */
6417               break;
6418             case 0:
6419               /* Empty string matched. */
6420               if (!rp->error_signaled)
6421                 {
6422                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6423                   rp->error_signaled = TRUE;
6424                 }
6425               break;
6426             default:
6427               /* Match occurred.  Construct a tag. */
6428               name = rp->name;
6429               if (name[0] == '\0')
6430                 name = NULL;
6431               else /* make a named tag */
6432                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6433               if (rp->force_explicit_name)
6434                 /* Force explicit tag name, if a name is there. */
6435                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6436               else
6437                 make_tag (name, strlen (name), TRUE,
6438                           lbp->buffer, match, lineno, linecharno);
6439               break;
6440             }
6441         }
6442   }
6443 }
6444
6445 \f
6446 /*
6447  * Return a pointer to a space of size strlen(cp)+1 allocated
6448  * with xnew where the string CP has been copied.
6449  */
6450 static char *
6451 savestr (cp)
6452      char *cp;
6453 {
6454   return savenstr (cp, strlen (cp));
6455 }
6456
6457 /*
6458  * Return a pointer to a space of size LEN+1 allocated with xnew where
6459  * the string CP has been copied for at most the first LEN characters.
6460  */
6461 static char *
6462 savenstr (cp, len)
6463      char *cp;
6464      int len;
6465 {
6466   register char *dp;
6467
6468   dp = xnew (len + 1, char);
6469   strncpy (dp, cp, len);
6470   dp[len] = '\0';
6471   return dp;
6472 }
6473
6474 /*
6475  * Return the ptr in sp at which the character c last
6476  * appears; NULL if not found
6477  *
6478  * Identical to POSIX strrchr, included for portability.
6479  */
6480 static char *
6481 etags_strrchr (sp, c)
6482      register const char *sp;
6483      register int c;
6484 {
6485   register const char *r;
6486
6487   r = NULL;
6488   do
6489     {
6490       if (*sp == c)
6491         r = sp;
6492   } while (*sp++);
6493   return (char *)r;
6494 }
6495
6496 /*
6497  * Return the ptr in sp at which the character c first
6498  * appears; NULL if not found
6499  *
6500  * Identical to POSIX strchr, included for portability.
6501  */
6502 static char *
6503 etags_strchr (sp, c)
6504      register const char *sp;
6505      register int c;
6506 {
6507   do
6508     {
6509       if (*sp == c)
6510         return (char *)sp;
6511     } while (*sp++);
6512   return NULL;
6513 }
6514
6515 /*
6516  * Compare two strings, ignoring case for alphabetic characters.
6517  *
6518  * Same as BSD's strcasecmp, included for portability.
6519  */
6520 static int
6521 etags_strcasecmp (s1, s2)
6522      register const char *s1;
6523      register const char *s2;
6524 {
6525   while (*s1 != '\0'
6526          && (ISALPHA (*s1) && ISALPHA (*s2)
6527              ? lowcase (*s1) == lowcase (*s2)
6528              : *s1 == *s2))
6529     s1++, s2++;
6530
6531   return (ISALPHA (*s1) && ISALPHA (*s2)
6532           ? lowcase (*s1) - lowcase (*s2)
6533           : *s1 - *s2);
6534 }
6535
6536 /*
6537  * Compare two strings, ignoring case for alphabetic characters.
6538  * Stop after a given number of characters
6539  *
6540  * Same as BSD's strncasecmp, included for portability.
6541  */
6542 static int
6543 etags_strncasecmp (s1, s2, n)
6544      register const char *s1;
6545      register const char *s2;
6546      register int n;
6547 {
6548   while (*s1 != '\0' && n-- > 0
6549          && (ISALPHA (*s1) && ISALPHA (*s2)
6550              ? lowcase (*s1) == lowcase (*s2)
6551              : *s1 == *s2))
6552     s1++, s2++;
6553
6554   if (n < 0)
6555     return 0;
6556   else
6557     return (ISALPHA (*s1) && ISALPHA (*s2)
6558             ? lowcase (*s1) - lowcase (*s2)
6559             : *s1 - *s2);
6560 }
6561
6562 /* Skip spaces (end of string is not space), return new pointer. */
6563 static char *
6564 skip_spaces (cp)
6565      char *cp;
6566 {
6567   while (iswhite (*cp))
6568     cp++;
6569   return cp;
6570 }
6571
6572 /* Skip non spaces, except end of string, return new pointer. */
6573 static char *
6574 skip_non_spaces (cp)
6575      char *cp;
6576 {
6577   while (*cp != '\0' && !iswhite (*cp))
6578     cp++;
6579   return cp;
6580 }
6581
6582 /* Print error message and exit.  */
6583 void
6584 fatal (s1, s2)
6585      char *s1, *s2;
6586 {
6587   error (s1, s2);
6588   exit (EXIT_FAILURE);
6589 }
6590
6591 static void
6592 pfatal (s1)
6593      char *s1;
6594 {
6595   perror (s1);
6596   exit (EXIT_FAILURE);
6597 }
6598
6599 static void
6600 suggest_asking_for_help ()
6601 {
6602   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6603            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6604   exit (EXIT_FAILURE);
6605 }
6606
6607 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6608 static void
6609 error (s1, s2)
6610      const char *s1, *s2;
6611 {
6612   fprintf (stderr, "%s: ", progname);
6613   fprintf (stderr, s1, s2);
6614   fprintf (stderr, "\n");
6615 }
6616
6617 /* Return a newly-allocated string whose contents
6618    concatenate those of s1, s2, s3.  */
6619 static char *
6620 concat (s1, s2, s3)
6621      char *s1, *s2, *s3;
6622 {
6623   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6624   char *result = xnew (len1 + len2 + len3 + 1, char);
6625
6626   strcpy (result, s1);
6627   strcpy (result + len1, s2);
6628   strcpy (result + len1 + len2, s3);
6629   result[len1 + len2 + len3] = '\0';
6630
6631   return result;
6632 }
6633
6634 \f
6635 /* Does the same work as the system V getcwd, but does not need to
6636    guess the buffer size in advance. */
6637 static char *
6638 etags_getcwd ()
6639 {
6640 #ifdef HAVE_GETCWD
6641   int bufsize = 200;
6642   char *path = xnew (bufsize, char);
6643
6644   while (getcwd (path, bufsize) == NULL)
6645     {
6646       if (errno != ERANGE)
6647         pfatal ("getcwd");
6648       bufsize *= 2;
6649       free (path);
6650       path = xnew (bufsize, char);
6651     }
6652
6653   canonicalize_filename (path);
6654   return path;
6655
6656 #else /* not HAVE_GETCWD */
6657 #if MSDOS
6658
6659   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6660
6661   getwd (path);
6662
6663   for (p = path; *p != '\0'; p++)
6664     if (*p == '\\')
6665       *p = '/';
6666     else
6667       *p = lowcase (*p);
6668
6669   return strdup (path);
6670 #else /* not MSDOS */
6671   linebuffer path;
6672   FILE *pipe;
6673
6674   linebuffer_init (&path);
6675   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6676   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6677     pfatal ("pwd");
6678   pclose (pipe);
6679
6680   return path.buffer;
6681 #endif /* not MSDOS */
6682 #endif /* not HAVE_GETCWD */
6683 }
6684
6685 /* Return a newly allocated string containing the file name of FILE
6686    relative to the absolute directory DIR (which should end with a slash). */
6687 static char *
6688 relative_filename (file, dir)
6689      char *file, *dir;
6690 {
6691   char *fp, *dp, *afn, *res;
6692   int i;
6693
6694   /* Find the common root of file and dir (with a trailing slash). */
6695   afn = absolute_filename (file, cwd);
6696   fp = afn;
6697   dp = dir;
6698   while (*fp++ == *dp++)
6699     continue;
6700   fp--, dp--;                   /* back to the first differing char */
6701 #ifdef DOS_NT
6702   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6703     return afn;
6704 #endif
6705   do                            /* look at the equal chars until '/' */
6706     fp--, dp--;
6707   while (*fp != '/');
6708
6709   /* Build a sequence of "../" strings for the resulting relative file name. */
6710   i = 0;
6711   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6712     i += 1;
6713   res = xnew (3*i + strlen (fp + 1) + 1, char);
6714   res[0] = '\0';
6715   while (i-- > 0)
6716     strcat (res, "../");
6717
6718   /* Add the file name relative to the common root of file and dir. */
6719   strcat (res, fp + 1);
6720   free (afn);
6721
6722   return res;
6723 }
6724
6725 /* Return a newly allocated string containing the absolute file name
6726    of FILE given DIR (which should end with a slash). */
6727 static char *
6728 absolute_filename (file, dir)
6729      char *file, *dir;
6730 {
6731   char *slashp, *cp, *res;
6732
6733   if (filename_is_absolute (file))
6734     res = savestr (file);
6735 #ifdef DOS_NT
6736   /* We don't support non-absolute file names with a drive
6737      letter, like `d:NAME' (it's too much hassle).  */
6738   else if (file[1] == ':')
6739     fatal ("%s: relative file names with drive letters not supported", file);
6740 #endif
6741   else
6742     res = concat (dir, file, "");
6743
6744   /* Delete the "/dirname/.." and "/." substrings. */
6745   slashp = etags_strchr (res, '/');
6746   while (slashp != NULL && slashp[0] != '\0')
6747     {
6748       if (slashp[1] == '.')
6749         {
6750           if (slashp[2] == '.'
6751               && (slashp[3] == '/' || slashp[3] == '\0'))
6752             {
6753               cp = slashp;
6754               do
6755                 cp--;
6756               while (cp >= res && !filename_is_absolute (cp));
6757               if (cp < res)
6758                 cp = slashp;    /* the absolute name begins with "/.." */
6759 #ifdef DOS_NT
6760               /* Under MSDOS and NT we get `d:/NAME' as absolute
6761                  file name, so the luser could say `d:/../NAME'.
6762                  We silently treat this as `d:/NAME'.  */
6763               else if (cp[0] != '/')
6764                 cp = slashp;
6765 #endif
6766               strcpy (cp, slashp + 3);
6767               slashp = cp;
6768               continue;
6769             }
6770           else if (slashp[2] == '/' || slashp[2] == '\0')
6771             {
6772               strcpy (slashp, slashp + 2);
6773               continue;
6774             }
6775         }
6776
6777       slashp = etags_strchr (slashp + 1, '/');
6778     }
6779
6780   if (res[0] == '\0')           /* just a safety net: should never happen */
6781     {
6782       free (res);
6783       return savestr ("/");
6784     }
6785   else
6786     return res;
6787 }
6788
6789 /* Return a newly allocated string containing the absolute
6790    file name of dir where FILE resides given DIR (which should
6791    end with a slash). */
6792 static char *
6793 absolute_dirname (file, dir)
6794      char *file, *dir;
6795 {
6796   char *slashp, *res;
6797   char save;
6798
6799   canonicalize_filename (file);
6800   slashp = etags_strrchr (file, '/');
6801   if (slashp == NULL)
6802     return savestr (dir);
6803   save = slashp[1];
6804   slashp[1] = '\0';
6805   res = absolute_filename (file, dir);
6806   slashp[1] = save;
6807
6808   return res;
6809 }
6810
6811 /* Whether the argument string is an absolute file name.  The argument
6812    string must have been canonicalized with canonicalize_filename. */
6813 static bool
6814 filename_is_absolute (fn)
6815      char *fn;
6816 {
6817   return (fn[0] == '/'
6818 #ifdef DOS_NT
6819           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6820 #endif
6821           );
6822 }
6823
6824 /* Translate backslashes into slashes.  Works in place. */
6825 static void
6826 canonicalize_filename (fn)
6827      register char *fn;
6828 {
6829 #ifdef DOS_NT
6830   /* Canonicalize drive letter case.  */
6831   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6832     fn[0] = upcase (fn[0]);
6833   /* Convert backslashes to slashes.  */
6834   for (; *fn != '\0'; fn++)
6835     if (*fn == '\\')
6836       *fn = '/';
6837 #else
6838   /* No action. */
6839   fn = NULL;                    /* shut up the compiler */
6840 #endif
6841 }
6842
6843 \f
6844 /* Initialize a linebuffer for use */
6845 static void
6846 linebuffer_init (lbp)
6847      linebuffer *lbp;
6848 {
6849   lbp->size = (DEBUG) ? 3 : 200;
6850   lbp->buffer = xnew (lbp->size, char);
6851   lbp->buffer[0] = '\0';
6852   lbp->len = 0;
6853 }
6854
6855 /* Set the minimum size of a string contained in a linebuffer. */
6856 static void
6857 linebuffer_setlen (lbp, toksize)
6858      linebuffer *lbp;
6859      int toksize;
6860 {
6861   while (lbp->size <= toksize)
6862     {
6863       lbp->size *= 2;
6864       xrnew (lbp->buffer, lbp->size, char);
6865     }
6866   lbp->len = toksize;
6867 }
6868
6869 /* Like malloc but get fatal error if memory is exhausted. */
6870 static PTR
6871 xmalloc (size)
6872      unsigned int size;
6873 {
6874   PTR result = (PTR) malloc (size);
6875   if (result == NULL)
6876     fatal ("virtual memory exhausted", (char *)NULL);
6877   return result;
6878 }
6879
6880 static PTR
6881 xrealloc (ptr, size)
6882      char *ptr;
6883      unsigned int size;
6884 {
6885   PTR result = (PTR) realloc (ptr, size);
6886   if (result == NULL)
6887     fatal ("virtual memory exhausted", (char *)NULL);
6888   return result;
6889 }
6890
6891 /*
6892  * Local Variables:
6893  * indent-tabs-mode: t
6894  * tab-width: 8
6895  * fill-column: 79
6896  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6897  * End:
6898  */
6899
6900 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6901    (do not change this comment) */
6902
6903 /* etags.c ends here */