lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
   3    Free Software Foundation, Inc. and Ken Arnold
   4
   5  This file is not considered part of GNU Emacs.
   6
   7  This program is free software; you can redistribute it and/or modify
   8  it under the terms of the GNU General Public License as published by
   9  the Free Software Foundation; either version 2 of the License, or
  10  (at your option) any later version.
  11
  12  This program is distributed in the hope that it will be useful,
  13  but WITHOUT ANY WARRANTY; without even the implied warranty of
  14  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15  GNU General Public License for more details.
  16
  17  You should have received a copy of the GNU General Public License
  18  along with this program; if not, write to the Free Software Foundation,
  19  Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  20
  21 /*
  22  * Authors:
  23  *      Ctags originally by Ken Arnold.
  24  *      Fortran added by Jim Kleckner.
  25  *      Ed Pelegri-Llopart added C typedefs.
  26  *      Gnu Emacs TAGS format and modifications by RMS?
  27  * 1989 Sam Kendall added C++.
  28  * 1992 Joseph B. Wells improved C and C++ parsing.
  29  * 1993 Francesco Potortì reorganised C and C++.
  30  * 1994 Line-by-line regexp tags by Tom Tromey.
  31  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  32  * 2002 #line directives by Francesco Potortì.
  33  *
  34  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  35  */
  36
  37 /*
  38  * If you want to add support for a new language, start by looking at the LUA
  39  * language, which is the simplest.  Alternatively, consider shipping a
  40  * configuration file containing regexp definitions for etags.
  41  */
  42
  43 char pot_etags_version[] = "@(#) pot revision number is 17.4";
  44
  45 #define TRUE    1
  46 #define FALSE   0
  47
  48 #ifdef DEBUG
  49 #  undef DEBUG
  50 #  define DEBUG TRUE
  51 #else
  52 #  define DEBUG  FALSE
  53 #  define NDEBUG                /* disable assert */
  54 #endif
  55
  56 #ifdef HAVE_CONFIG_H
  57 # include <config.h>
  58   /* On some systems, Emacs defines static as nothing for the sake
  59      of unexec.  We don't want that here since we don't use unexec. */
  60 # undef static
  61 # define ETAGS_REGEXPS          /* use the regexp features */
  62 # define LONG_OPTIONS           /* accept long options */
  63 # ifndef PTR                    /* for Xemacs */
  64 #   define PTR void *
  65 # endif
  66 # ifndef __P                    /* for Xemacs */
  67 #   define __P(args) args
  68 # endif
  69 #else  /* no config.h */
  70 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  71 #   define __P(args) args       /* use prototypes */
  72 #   define PTR void *           /* for generic pointers */
  73 # else /* not standard C */
  74 #   define __P(args) ()         /* no prototypes */
  75 #   define const                /* remove const for old compilers' sake */
  76 #   define PTR long *           /* don't use void* */
  77 # endif
  78 #endif /* !HAVE_CONFIG_H */
  79
  80 #ifndef _GNU_SOURCE
  81 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  82 #endif
  83
  84 #ifdef LONG_OPTIONS
  85 #  undef LONG_OPTIONS
  86 #  define LONG_OPTIONS TRUE
  87 #else
  88 #  define LONG_OPTIONS  FALSE
  89 #endif
  90
  91 /* WIN32_NATIVE is for Xemacs.
  92    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  93 #ifdef WIN32_NATIVE
  94 # undef MSDOS
  95 # undef  WINDOWSNT
  96 # define WINDOWSNT
  97 #endif /* WIN32_NATIVE */
  98
  99 #ifdef MSDOS
 100 # undef MSDOS
 101 # define MSDOS TRUE
 102 # include <fcntl.h>
 103 # include <sys/param.h>
 104 # include <io.h>
 105 # ifndef HAVE_CONFIG_H
 106 #   define DOS_NT
 107 #   include <sys/config.h>
 108 # endif
 109 #else
 110 # define MSDOS FALSE
 111 #endif /* MSDOS */
 112
 113 #ifdef WINDOWSNT
 114 # include <stdlib.h>
 115 # include <fcntl.h>
 116 # include <string.h>
 117 # include <direct.h>
 118 # include <io.h>
 119 # define MAXPATHLEN _MAX_PATH
 120 # undef HAVE_NTGUI
 121 # undef  DOS_NT
 122 # define DOS_NT
 123 # ifndef HAVE_GETCWD
 124 #   define HAVE_GETCWD
 125 # endif /* undef HAVE_GETCWD */
 126 #else /* not WINDOWSNT */
 127 # ifdef STDC_HEADERS
 128 #  include <stdlib.h>
 129 #  include <string.h>
 130 # else /* no standard C headers */
 131     extern char *getenv ();
 132 #  ifdef VMS
 133 #   define EXIT_SUCCESS 1
 134 #   define EXIT_FAILURE 0
 135 #  else /* no VMS */
 136 #   define EXIT_SUCCESS 0
 137 #   define EXIT_FAILURE 1
 138 #  endif
 139 # endif
 140 #endif /* !WINDOWSNT */
 141
 142 #ifdef HAVE_UNISTD_H
 143 # include <unistd.h>
 144 #else
 145 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 146     extern char *getcwd (char *buf, size_t size);
 147 # endif
 148 #endif /* HAVE_UNISTD_H */
 149
 150 #include <stdio.h>
 151 #include <ctype.h>
 152 #include <errno.h>
 153 #ifndef errno
 154   extern int errno;
 155 #endif
 156 #include <sys/types.h>
 157 #include <sys/stat.h>
 158
 159 #include <assert.h>
 160 #ifdef NDEBUG
 161 # undef  assert                 /* some systems have a buggy assert.h */
 162 # define assert(x) ((void) 0)
 163 #endif
 164
 165 #if !defined (S_ISREG) && defined (S_IFREG)
 166 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 167 #endif
 168
 169 #if LONG_OPTIONS
 170 # include <getopt.h>
 171 #else
 172 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 173   extern char *optarg;
 174   extern int optind, opterr;
 175 #endif /* LONG_OPTIONS */
 176
 177 #ifdef ETAGS_REGEXPS
 178 # ifndef HAVE_CONFIG_H          /* this is a standalone compilation */
 179 #   ifdef __CYGWIN__            /* compiling on Cygwin */
 180                              !!! NOTICE !!!
 181  the regex.h distributed with Cygwin is not compatible with etags, alas!
 182 If you want regular expression support, you should delete this notice and
 183               arrange to use the GNU regex.h and regex.c.
 184 #   endif
 185 # endif
 186 # include <regex.h>
 187 #endif /* ETAGS_REGEXPS */
 188
 189 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 190  Leave it undefined to make the program "etags", which makes emacs-style
 191  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 192 #ifdef CTAGS
 193 # undef  CTAGS
 194 # define CTAGS TRUE
 195 #else
 196 # define CTAGS FALSE
 197 #endif
 198
 199 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 200 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 201 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 202 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 203
 204 #define CHARS 256               /* 2^sizeof(char) */
 205 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 206 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 207 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 208 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 209 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 210 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 211
 212 #define ISALNUM(c)      isalnum (CHAR(c))
 213 #define ISALPHA(c)      isalpha (CHAR(c))
 214 #define ISDIGIT(c)      isdigit (CHAR(c))
 215 #define ISLOWER(c)      islower (CHAR(c))
 216
 217 #define lowcase(c)      tolower (CHAR(c))
 218 #define upcase(c)       toupper (CHAR(c))
 219
 220
 221 /*
 222  *      xnew, xrnew -- allocate, reallocate storage
 223  *
 224  * SYNOPSIS:    Type *xnew (int n, Type);
 225  *              void xrnew (OldPointer, int n, Type);
 226  */
 227 #if DEBUG
 228 # include "chkmalloc.h"
 229 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 230                                                   (n) * sizeof (Type)))
 231 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 232                                         (char *) (op), (n) * sizeof (Type)))
 233 #else
 234 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 235 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 236                                         (char *) (op), (n) * sizeof (Type)))
 237 #endif
 238
 239 #define bool int
 240
 241 typedef void Lang_function __P((FILE *));
 242
 243 typedef struct
 244 {
 245   char *suffix;                 /* file name suffix for this compressor */
 246   char *command;                /* takes one arg and decompresses to stdout */
 247 } compressor;
 248
 249 typedef struct
 250 {
 251   char *name;                   /* language name */
 252   char *help;                   /* detailed help for the language */
 253   Lang_function *function;      /* parse function */
 254   char **suffixes;              /* name suffixes of this language's files */
 255   char **filenames;             /* names of this language's files */
 256   char **interpreters;          /* interpreters for this language */
 257   bool metasource;              /* source used to generate other sources */
 258 } language;
 259
 260 typedef struct fdesc
 261 {
 262   struct fdesc *next;           /* for the linked list */
 263   char *infname;                /* uncompressed input file name */
 264   char *infabsname;             /* absolute uncompressed input file name */
 265   char *infabsdir;              /* absolute dir of input file */
 266   char *taggedfname;            /* file name to write in tagfile */
 267   language *lang;               /* language of file */
 268   char *prop;                   /* file properties to write in tagfile */
 269   bool usecharno;               /* etags tags shall contain char number */
 270   bool written;                 /* entry written in the tags file */
 271 } fdesc;
 272
 273 typedef struct node_st
 274 {                               /* sorting structure */
 275   struct node_st *left, *right; /* left and right sons */
 276   fdesc *fdp;                   /* description of file to whom tag belongs */
 277   char *name;                   /* tag name */
 278   char *regex;                  /* search regexp */
 279   bool valid;                   /* write this tag on the tag file */
 280   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 281   bool been_warned;             /* warning already given for duplicated tag */
 282   int lno;                      /* line number tag is on */
 283   long cno;                     /* character number line starts on */
 284 } node;
 285
 286 /*
 287  * A `linebuffer' is a structure which holds a line of text.
 288  * `readline_internal' reads a line from a stream into a linebuffer
 289  * and works regardless of the length of the line.
 290  * SIZE is the size of BUFFER, LEN is the length of the string in
 291  * BUFFER after readline reads it.
 292  */
 293 typedef struct
 294 {
 295   long size;
 296   int len;
 297   char *buffer;
 298 } linebuffer;
 299
 300 /* Used to support mixing of --lang and file names. */
 301 typedef struct
 302 {
 303   enum {
 304     at_language,                /* a language specification */
 305     at_regexp,                  /* a regular expression */
 306     at_filename,                /* a file name */
 307     at_stdin,                   /* read from stdin here */
 308     at_end                      /* stop parsing the list */
 309   } arg_type;                   /* argument type */
 310   language *lang;               /* language associated with the argument */
 311   char *what;                   /* the argument itself */
 312 } argument;
 313
 314 #ifdef ETAGS_REGEXPS
 315 /* Structure defining a regular expression. */
 316 typedef struct regexp
 317 {
 318   struct regexp *p_next;        /* pointer to next in list */
 319   language *lang;               /* if set, use only for this language */
 320   char *pattern;                /* the regexp pattern */
 321   char *name;                   /* tag name */
 322   struct re_pattern_buffer *pat; /* the compiled pattern */
 323   struct re_registers regs;     /* re registers */
 324   bool error_signaled;          /* already signaled for this regexp */
 325   bool force_explicit_name;     /* do not allow implict tag name */
 326   bool ignore_case;             /* ignore case when matching */
 327   bool multi_line;              /* do a multi-line match on the whole file */
 328 } regexp;
 329 #endif /* ETAGS_REGEXPS */
 330
 331
 332 /* Many compilers barf on this:
 333         Lang_function Ada_funcs;
 334    so let's write it this way */
 335 static void Ada_funcs __P((FILE *));
 336 static void Asm_labels __P((FILE *));
 337 static void C_entries __P((int c_ext, FILE *));
 338 static void default_C_entries __P((FILE *));
 339 static void plain_C_entries __P((FILE *));
 340 static void Cjava_entries __P((FILE *));
 341 static void Cobol_paragraphs __P((FILE *));
 342 static void Cplusplus_entries __P((FILE *));
 343 static void Cstar_entries __P((FILE *));
 344 static void Erlang_functions __P((FILE *));
 345 static void Fortran_functions __P((FILE *));
 346 static void HTML_labels __P((FILE *));
 347 static void Lisp_functions __P((FILE *));
 348 static void Lua_functions __P((FILE *));
 349 static void Makefile_targets __P((FILE *));
 350 static void Pascal_functions __P((FILE *));
 351 static void Perl_functions __P((FILE *));
 352 static void PHP_functions __P((FILE *));
 353 static void PS_functions __P((FILE *));
 354 static void Prolog_functions __P((FILE *));
 355 static void Python_functions __P((FILE *));
 356 static void Scheme_functions __P((FILE *));
 357 static void TeX_commands __P((FILE *));
 358 static void Texinfo_nodes __P((FILE *));
 359 static void Yacc_entries __P((FILE *));
 360 static void just_read_file __P((FILE *));
 361
 362 static void print_language_names __P((void));
 363 static void print_version __P((void));
 364 static void print_help __P((argument *));
 365 int main __P((int, char **));
 366
 367 static compressor *get_compressor_from_suffix __P((char *, char **));
 368 static language *get_language_from_langname __P((const char *));
 369 static language *get_language_from_interpreter __P((char *));
 370 static language *get_language_from_filename __P((char *, bool));
 371 static void readline __P((linebuffer *, FILE *));
 372 static long readline_internal __P((linebuffer *, FILE *));
 373 static bool nocase_tail __P((char *));
 374 static void get_tag __P((char *, char **));
 375
 376 #ifdef ETAGS_REGEXPS
 377 static void analyse_regex __P((char *));
 378 static void free_regexps __P((void));
 379 static void regex_tag_multiline __P((void));
 380 #endif /* ETAGS_REGEXPS */
 381 static void error __P((const char *, const char *));
 382 static void suggest_asking_for_help __P((void));
 383 void fatal __P((char *, char *));
 384 static void pfatal __P((char *));
 385 static void add_node __P((node *, node **));
 386
 387 static void init __P((void));
 388 static void process_file_name __P((char *, language *));
 389 static void process_file __P((FILE *, char *, language *));
 390 static void find_entries __P((FILE *));
 391 static void free_tree __P((node *));
 392 static void free_fdesc __P((fdesc *));
 393 static void pfnote __P((char *, bool, char *, int, int, long));
 394 static void make_tag __P((char *, int, bool, char *, int, int, long));
 395 static void invalidate_nodes __P((fdesc *, node **));
 396 static void put_entries __P((node *));
 397
 398 static char *concat __P((char *, char *, char *));
 399 static char *skip_spaces __P((char *));
 400 static char *skip_non_spaces __P((char *));
 401 static char *savenstr __P((char *, int));
 402 static char *savestr __P((char *));
 403 static char *etags_strchr __P((const char *, int));
 404 static char *etags_strrchr __P((const char *, int));
 405 static int etags_strcasecmp __P((const char *, const char *));
 406 static int etags_strncasecmp __P((const char *, const char *, int));
 407 static char *etags_getcwd __P((void));
 408 static char *relative_filename __P((char *, char *));
 409 static char *absolute_filename __P((char *, char *));
 410 static char *absolute_dirname __P((char *, char *));
 411 static bool filename_is_absolute __P((char *f));
 412 static void canonicalize_filename __P((char *));
 413 static void linebuffer_init __P((linebuffer *));
 414 static void linebuffer_setlen __P((linebuffer *, int));
 415 static PTR xmalloc __P((unsigned int));
 416 static PTR xrealloc __P((char *, unsigned int));
 417
 418 \f
 419 static char searchar = '/';     /* use /.../ searches */
 420
 421 static char *tagfile;           /* output file */
 422 static char *progname;          /* name this program was invoked with */
 423 static char *cwd;               /* current working directory */
 424 static char *tagfiledir;        /* directory of tagfile */
 425 static FILE *tagf;              /* ioptr for tags file */
 426
 427 static fdesc *fdhead;           /* head of file description list */
 428 static fdesc *curfdp;           /* current file description */
 429 static int lineno;              /* line number of current line */
 430 static long charno;             /* current character number */
 431 static long linecharno;         /* charno of start of current line */
 432 static char *dbp;               /* pointer to start of current tag */
 433
 434 static const int invalidcharno = -1;
 435
 436 static node *nodehead;          /* the head of the binary tree of tags */
 437 static node *last_node;         /* the last node created */
 438
 439 static linebuffer lb;           /* the current line */
 440 static linebuffer filebuf;      /* a buffer containing the whole file */
 441 static linebuffer token_name;   /* a buffer containing a tag name */
 442
 443 /* boolean "functions" (see init)       */
 444 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 445 static char
 446   /* white chars */
 447   *white = " \f\t\n\r\v",
 448   /* not in a name */
 449   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 450   /* token ending chars */
 451   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 452   /* token starting chars */
 453   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 454   /* valid in-token chars */
 455   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 456
 457 static bool append_to_tagfile;  /* -a: append to tags */
 458 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 459 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 460 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 461                                 /* 0 struct/enum/union decls, and C++ */
 462                                 /* member functions. */
 463 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 464                                 /* constants and variables. */
 465                                 /* -D: opposite of -d.  Default under ctags. */
 466 static bool globals;            /* create tags for global variables */
 467 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 468 static bool members;            /* create tags for C member variables */
 469 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 470 static bool update;             /* -u: update tags */
 471 static bool vgrind_style;       /* -v: create vgrind style index output */
 472 static bool no_warnings;        /* -w: suppress warnings */
 473 static bool cxref_style;        /* -x: create cxref style output */
 474 static bool cplusplus;          /* .[hc] means C++, not C */
 475 static bool ignoreindent;       /* -I: ignore indentation in C */
 476 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 477
 478 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 479 static bool parsing_stdin;      /* --parse-stdin used */
 480
 481 #ifdef ETAGS_REGEXPS
 482 static regexp *p_head;          /* list of all regexps */
 483 static bool need_filebuf;       /* some regexes are multi-line */
 484 #else
 485 # define need_filebuf FALSE
 486 #endif /* ETAGS_REGEXPS */
 487
 488 #if LONG_OPTIONS
 489 static struct option longopts[] =
 490 {
 491   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 492   { "c++",                no_argument,       NULL,               'C'   },
 493   { "declarations",       no_argument,       &declarations,      TRUE  },
 494   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 495   { "help",               no_argument,       NULL,               'h'   },
 496   { "help",               no_argument,       NULL,               'H'   },
 497   { "ignore-indentation", no_argument,       NULL,               'I'   },
 498   { "language",           required_argument, NULL,               'l'   },
 499   { "members",            no_argument,       &members,           TRUE  },
 500   { "no-members",         no_argument,       &members,           FALSE },
 501   { "output",             required_argument, NULL,               'o'   },
 502 #ifdef ETAGS_REGEXPS
 503   { "regex",              required_argument, NULL,               'r'   },
 504   { "no-regex",           no_argument,       NULL,               'R'   },
 505   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 506 #endif /* ETAGS_REGEXPS */
 507   { "parse-stdin",        required_argument, NULL,               STDIN },
 508   { "version",            no_argument,       NULL,               'V'   },
 509
 510 #if CTAGS /* Etags options */
 511   { "backward-search",    no_argument,       NULL,               'B'   },
 512   { "cxref",              no_argument,       NULL,               'x'   },
 513   { "defines",            no_argument,       NULL,               'd'   },
 514   { "globals",            no_argument,       &globals,           TRUE  },
 515   { "typedefs",           no_argument,       NULL,               't'   },
 516   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 517   { "update",             no_argument,       NULL,               'u'   },
 518   { "vgrind",             no_argument,       NULL,               'v'   },
 519   { "no-warn",            no_argument,       NULL,               'w'   },
 520
 521 #else /* Ctags options */
 522   { "append",             no_argument,       NULL,               'a'   },
 523   { "no-defines",         no_argument,       NULL,               'D'   },
 524   { "no-globals",         no_argument,       &globals,           FALSE },
 525   { "include",            required_argument, NULL,               'i'   },
 526 #endif
 527   { NULL }
 528 };
 529 #endif /* LONG_OPTIONS */
 530
 531 static compressor compressors[] =
 532 {
 533   { "z", "gzip -d -c"},
 534   { "Z", "gzip -d -c"},
 535   { "gz", "gzip -d -c"},
 536   { "GZ", "gzip -d -c"},
 537   { "bz2", "bzip2 -d -c" },
 538   { NULL }
 539 };
 540
 541 /*
 542  * Language stuff.
 543  */
 544
 545 /* Ada code */
 546 static char *Ada_suffixes [] =
 547   { "ads", "adb", "ada", NULL };
 548 static char Ada_help [] =
 549 "In Ada code, functions, procedures, packages, tasks and types are\n\
 550 tags.  Use the `--packages-only' option to create tags for\n\
 551 packages only.\n\
 552 Ada tag names have suffixes indicating the type of entity:\n\
 553         Entity type:    Qualifier:\n\
 554         ------------    ----------\n\
 555         function        /f\n\
 556         procedure       /p\n\
 557         package spec    /s\n\
 558         package body    /b\n\
 559         type            /t\n\
 560         task            /k\n\
 561 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 562 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 563 will just search for any tag `bidule'.";
 564
 565 /* Assembly code */
 566 static char *Asm_suffixes [] =
 567   { "a",        /* Unix assembler */
 568     "asm", /* Microcontroller assembly */
 569     "def", /* BSO/Tasking definition includes  */
 570     "inc", /* Microcontroller include files */
 571     "ins", /* Microcontroller include files */
 572     "s", "sa", /* Unix assembler */
 573     "S",   /* cpp-processed Unix assembler */
 574     "src", /* BSO/Tasking C compiler output */
 575     NULL
 576   };
 577 static char Asm_help [] =
 578 "In assembler code, labels appearing at the beginning of a line,\n\
 579 followed by a colon, are tags.";
 580
 581
 582 /* Note that .c and .h can be considered C++, if the --c++ flag was
 583    given, or if the `class' or `template' keyowrds are met inside the file.
 584    That is why default_C_entries is called for these. */
 585 static char *default_C_suffixes [] =
 586   { "c", "h", NULL };
 587 static char default_C_help [] =
 588 "In C code, any C function or typedef is a tag, and so are\n\
 589 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 590 definitions and `enum' constants are tags unless you specify\n\
 591 `--no-defines'.  Global variables are tags unless you specify\n\
 592 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 593 can make the tags table file much smaller.\n\
 594 You can tag function declarations and external variables by\n\
 595 using `--declarations', and struct members by using `--members'.";
 596
 597 static char *Cplusplus_suffixes [] =
 598   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 599     "M",                        /* Objective C++ */
 600     "pdb",                      /* Postscript with C syntax */
 601     NULL };
 602 static char Cplusplus_help [] =
 603 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 604 --help --lang=c --lang=c++ for full help.)\n\
 605 In addition to C tags, member functions are also recognized, and\n\
 606 optionally member variables if you use the `--members' option.\n\
 607 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 608 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 609 `operator+'.";
 610
 611 static char *Cjava_suffixes [] =
 612   { "java", NULL };
 613 static char Cjava_help [] =
 614 "In Java code, all the tags constructs of C and C++ code are\n\
 615 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 616
 617
 618 static char *Cobol_suffixes [] =
 619   { "COB", "cob", NULL };
 620 static char Cobol_help [] =
 621 "In Cobol code, tags are paragraph names; that is, any word\n\
 622 starting in column 8 and followed by a period.";
 623
 624 static char *Cstar_suffixes [] =
 625   { "cs", "hs", NULL };
 626
 627 static char *Erlang_suffixes [] =
 628   { "erl", "hrl", NULL };
 629 static char Erlang_help [] =
 630 "In Erlang code, the tags are the functions, records and macros\n\
 631 defined in the file.";
 632
 633 static char *Fortran_suffixes [] =
 634   { "F", "f", "f90", "for", NULL };
 635 static char Fortran_help [] =
 636 "In Fortran code, functions, subroutines and block data are tags.";
 637
 638 static char *HTML_suffixes [] =
 639   { "htm", "html", "shtml", NULL };
 640 static char HTML_help [] =
 641 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 642 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 643 occurrences of `id='.";
 644
 645 static char *Lisp_suffixes [] =
 646   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 647 static char Lisp_help [] =
 648 "In Lisp code, any function defined with `defun', any variable\n\
 649 defined with `defvar' or `defconst', and in general the first\n\
 650 argument of any expression that starts with `(def' in column zero\n\
 651 is a tag.";
 652
 653 static char *Lua_suffixes [] =
 654   { "lua", "LUA", NULL };
 655 static char Lua_help [] =
 656 "In Lua scripts, all functions are tags.";
 657
 658 static char *Makefile_filenames [] =
 659   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 660 static char Makefile_help [] =
 661 "In makefiles, targets are tags; additionally, variables are tags\n\
 662 unless you specify `--no-globals'.";
 663
 664 static char *Objc_suffixes [] =
 665   { "lm",                       /* Objective lex file */
 666     "m",                        /* Objective C file */
 667      NULL };
 668 static char Objc_help [] =
 669 "In Objective C code, tags include Objective C definitions for classes,\n\
 670 class categories, methods and protocols.  Tags for variables and\n\
 671 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
 672
 673 static char *Pascal_suffixes [] =
 674   { "p", "pas", NULL };
 675 static char Pascal_help [] =
 676 "In Pascal code, the tags are the functions and procedures defined\n\
 677 in the file.";
 678
 679 static char *Perl_suffixes [] =
 680   { "pl", "pm", NULL };
 681 static char *Perl_interpreters [] =
 682   { "perl", "@PERL@", NULL };
 683 static char Perl_help [] =
 684 "In Perl code, the tags are the packages, subroutines and variables\n\
 685 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 686 `--globals' if you want to tag global variables.  Tags for\n\
 687 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 688 defined in the default package is `main::SUB'.";
 689
 690 static char *PHP_suffixes [] =
 691   { "php", "php3", "php4", NULL };
 692 static char PHP_help [] =
 693 "In PHP code, tags are functions, classes and defines.  When using\n\
 694 the `--members' option, vars are tags too.";
 695
 696 static char *plain_C_suffixes [] =
 697   { "pc",                       /* Pro*C file */
 698      NULL };
 699
 700 static char *PS_suffixes [] =
 701   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 702 static char PS_help [] =
 703 "In PostScript code, the tags are the functions.";
 704
 705 static char *Prolog_suffixes [] =
 706   { "prolog", NULL };
 707 static char Prolog_help [] =
 708 "In Prolog code, tags are predicates and rules at the beginning of\n\
 709 line.";
 710
 711 static char *Python_suffixes [] =
 712   { "py", NULL };
 713 static char Python_help [] =
 714 "In Python code, `def' or `class' at the beginning of a line\n\
 715 generate a tag.";
 716
 717 /* Can't do the `SCM' or `scm' prefix with a version number. */
 718 static char *Scheme_suffixes [] =
 719   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 720 static char Scheme_help [] =
 721 "In Scheme code, tags include anything defined with `def' or with a\n\
 722 construct whose name starts with `def'.  They also include\n\
 723 variables set with `set!' at top level in the file.";
 724
 725 static char *TeX_suffixes [] =
 726   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 727 static char TeX_help [] =
 728 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 729 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 730 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 731 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 732 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 733 \n\
 734 Other commands can be specified by setting the environment variable\n\
 735 `TEXTAGS' to a colon-separated list like, for example,\n\
 736      TEXTAGS=\"mycommand:myothercommand\".";
 737
 738
 739 static char *Texinfo_suffixes [] =
 740   { "texi", "texinfo", "txi", NULL };
 741 static char Texinfo_help [] =
 742 "for texinfo files, lines starting with @node are tagged.";
 743
 744 static char *Yacc_suffixes [] =
 745   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 746 static char Yacc_help [] =
 747 "In Bison or Yacc input files, each rule defines as a tag the\n\
 748 nonterminal it constructs.  The portions of the file that contain\n\
 749 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 750 for full help).";
 751
 752 static char auto_help [] =
 753 "`auto' is not a real language, it indicates to use\n\
 754 a default language for files base on file name suffix and file contents.";
 755
 756 static char none_help [] =
 757 "`none' is not a real language, it indicates to only do\n\
 758 regexp processing on files.";
 759
 760 static char no_lang_help [] =
 761 "No detailed help available for this language.";
 762
 763
 764 /*
 765  * Table of languages.
 766  *
 767  * It is ok for a given function to be listed under more than one
 768  * name.  I just didn't.
 769  */
 770
 771 static language lang_names [] =
 772 {
 773   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 774   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 775   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 776   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 777   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 778   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 779   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 780   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 781   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 782   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 783   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 784   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 785   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 786   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 787   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 788   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 789   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 790   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 791   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 792   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 793   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 794   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 795   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 796   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 797   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 798   { "auto",      auto_help },                      /* default guessing scheme */
 799   { "none",      none_help,      just_read_file }, /* regexp matching only */
 800   { NULL }                /* end of list */
 801 };
 802
 803 \f
 804 static void
 805 print_language_names ()
 806 {
 807   language *lang;
 808   char **name, **ext;
 809
 810   puts ("\nThese are the currently supported languages, along with the\n\
 811 default file names and dot suffixes:");
 812   for (lang = lang_names; lang->name != NULL; lang++)
 813     {
 814       printf ("  %-*s", 10, lang->name);
 815       if (lang->filenames != NULL)
 816         for (name = lang->filenames; *name != NULL; name++)
 817           printf (" %s", *name);
 818       if (lang->suffixes != NULL)
 819         for (ext = lang->suffixes; *ext != NULL; ext++)
 820           printf (" .%s", *ext);
 821       puts ("");
 822     }
 823   puts ("where `auto' means use default language for files based on file\n\
 824 name suffix, and `none' means only do regexp processing on files.\n\
 825 If no language is specified and no matching suffix is found,\n\
 826 the first line of the file is read for a sharp-bang (#!) sequence\n\
 827 followed by the name of an interpreter.  If no such sequence is found,\n\
 828 Fortran is tried first; if no tags are found, C is tried next.\n\
 829 When parsing any C file, a \"class\" or \"template\" keyword\n\
 830 switches to C++.");
 831   puts ("Compressed files are supported using gzip and bzip2.\n\
 832 \n\
 833 For detailed help on a given language use, for example,\n\
 834 etags --help --lang=ada.");
 835 }
 836
 837 #ifndef EMACS_NAME
 838 # define EMACS_NAME "standalone"
 839 #endif
 840 #ifndef VERSION
 841 # define VERSION "version"
 842 #endif
 843 static void
 844 print_version ()
 845 {
 846   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 847   puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
 848   puts ("This program is distributed under the same terms as Emacs");
 849
 850   exit (EXIT_SUCCESS);
 851 }
 852
 853 static void
 854 print_help (argbuffer)
 855      argument *argbuffer;
 856 {
 857   bool help_for_lang = FALSE;
 858
 859   for (; argbuffer->arg_type != at_end; argbuffer++)
 860     if (argbuffer->arg_type == at_language)
 861       {
 862         if (help_for_lang)
 863           puts ("");
 864         puts (argbuffer->lang->help);
 865         help_for_lang = TRUE;
 866       }
 867
 868   if (help_for_lang)
 869     exit (EXIT_SUCCESS);
 870
 871   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 872 \n\
 873 These are the options accepted by %s.\n", progname, progname);
 874   if (LONG_OPTIONS)
 875     puts ("You may use unambiguous abbreviations for the long option names.");
 876   else
 877     puts ("Long option names do not work with this executable, as it is not\n\
 878 linked with GNU getopt.");
 879   puts ("  A - as file name means read names from stdin (one per line).\n\
 880 Absolute names are stored in the output file as they are.\n\
 881 Relative ones are stored relative to the output file's directory.\n");
 882
 883   if (!CTAGS)
 884     puts ("-a, --append\n\
 885         Append tag entries to existing tags file.");
 886
 887   puts ("--packages-only\n\
 888         For Ada files, only generate tags for packages.");
 889
 890   if (CTAGS)
 891     puts ("-B, --backward-search\n\
 892         Write the search commands for the tag entries using '?', the\n\
 893         backward-search command instead of '/', the forward-search command.");
 894
 895   /* This option is mostly obsolete, because etags can now automatically
 896      detect C++.  Retained for backward compatibility and for debugging and
 897      experimentation.  In principle, we could want to tag as C++ even
 898      before any "class" or "template" keyword.
 899   puts ("-C, --c++\n\
 900         Treat files whose name suffix defaults to C language as C++ files.");
 901   */
 902
 903   puts ("--declarations\n\
 904         In C and derived languages, create tags for function declarations,");
 905   if (CTAGS)
 906     puts ("\tand create tags for extern variables if --globals is used.");
 907   else
 908     puts
 909       ("\tand create tags for extern variables unless --no-globals is used.");
 910
 911   if (CTAGS)
 912     puts ("-d, --defines\n\
 913         Create tag entries for C #define constants and enum constants, too.");
 914   else
 915     puts ("-D, --no-defines\n\
 916         Don't create tag entries for C #define constants and enum constants.\n\
 917         This makes the tags file smaller.");
 918
 919   if (!CTAGS)
 920     puts ("-i FILE, --include=FILE\n\
 921         Include a note in tag file indicating that, when searching for\n\
 922         a tag, one should also consult the tags file FILE after\n\
 923         checking the current file.");
 924
 925   puts ("-l LANG, --language=LANG\n\
 926         Force the following files to be considered as written in the\n\
 927         named language up to the next --language=LANG option.");
 928
 929   if (CTAGS)
 930     puts ("--globals\n\
 931         Create tag entries for global variables in some languages.");
 932   else
 933     puts ("--no-globals\n\
 934         Do not create tag entries for global variables in some\n\
 935         languages.  This makes the tags file smaller.");
 936   puts ("--members\n\
 937         Create tag entries for members of structures in some languages.");
 938
 939 #ifdef ETAGS_REGEXPS
 940   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 941         Make a tag for each line matching a regular expression pattern\n\
 942         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 943         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 944         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 945         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 946   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 947         For example Tcl named tags can be created with:\n\
 948           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 949         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 950         `m' means to allow multi-line matches, `s' implies `m' and\n\
 951         causes dot to match any character, including newline.");
 952   puts ("-R, --no-regex\n\
 953         Don't create tags from regexps for the following files.");
 954 #endif /* ETAGS_REGEXPS */
 955   puts ("-I, --ignore-indentation\n\
 956         In C and C++ do not assume that a closing brace in the first\n\
 957         column is the final brace of a function or structure definition.");
 958   puts ("-o FILE, --output=FILE\n\
 959         Write the tags to FILE.");
 960   puts ("--parse-stdin=NAME\n\
 961         Read from standard input and record tags as belonging to file NAME.");
 962
 963   if (CTAGS)
 964     {
 965       puts ("-t, --typedefs\n\
 966         Generate tag entries for C and Ada typedefs.");
 967       puts ("-T, --typedefs-and-c++\n\
 968         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 969         and C++ member functions.");
 970     }
 971
 972   if (CTAGS)
 973     puts ("-u, --update\n\
 974         Update the tag entries for the given files, leaving tag\n\
 975         entries for other files in place.  Currently, this is\n\
 976         implemented by deleting the existing entries for the given\n\
 977         files and then rewriting the new entries at the end of the\n\
 978         tags file.  It is often faster to simply rebuild the entire\n\
 979         tag file than to use this.");
 980
 981   if (CTAGS)
 982     {
 983       puts ("-v, --vgrind\n\
 984         Generates an index of items intended for human consumption,\n\
 985         similar to the output of vgrind.  The index is sorted, and\n\
 986         gives the page number of each item.");
 987       puts ("-w, --no-warn\n\
 988         Suppress warning messages about entries defined in multiple\n\
 989         files.");
 990       puts ("-x, --cxref\n\
 991         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 992         The output uses line numbers instead of page numbers, but\n\
 993         beyond that the differences are cosmetic; try both to see\n\
 994         which you like.");
 995     }
 996
 997   puts ("-V, --version\n\
 998         Print the version of the program.\n\
 999 -h, --help\n\
1000         Print this help message.\n\
1001         Followed by one or more `--language' options prints detailed\n\
1002         help about tag generation for the specified languages.");
1003
1004   print_language_names ();
1005
1006   puts ("");
1007   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1008
1009   exit (EXIT_SUCCESS);
1010 }
1011
1012 \f
1013 #ifdef VMS                      /* VMS specific functions */
1014
1015 #define EOS     '\0'
1016
1017 /* This is a BUG!  ANY arbitrary limit is a BUG!
1018    Won't someone please fix this?  */
1019 #define MAX_FILE_SPEC_LEN       255
1020 typedef struct  {
1021   short   curlen;
1022   char    body[MAX_FILE_SPEC_LEN + 1];
1023 } vspec;
1024
1025 /*
1026  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1027  returning in each successive call the next file name matching the input
1028  spec. The function expects that each in_spec passed
1029  to it will be processed to completion; in particular, up to and
1030  including the call following that in which the last matching name
1031  is returned, the function ignores the value of in_spec, and will
1032  only start processing a new spec with the following call.
1033  If an error occurs, on return out_spec contains the value
1034  of in_spec when the error occurred.
1035
1036  With each successive file name returned in out_spec, the
1037  function's return value is one. When there are no more matching
1038  names the function returns zero. If on the first call no file
1039  matches in_spec, or there is any other error, -1 is returned.
1040 */
1041
1042 #include        <rmsdef.h>
1043 #include        <descrip.h>
1044 #define         OUTSIZE MAX_FILE_SPEC_LEN
1045 static short
1046 fn_exp (out, in)
1047      vspec *out;
1048      char *in;
1049 {
1050   static long context = 0;
1051   static struct dsc$descriptor_s o;
1052   static struct dsc$descriptor_s i;
1053   static bool pass1 = TRUE;
1054   long status;
1055   short retval;
1056
1057   if (pass1)
1058     {
1059       pass1 = FALSE;
1060       o.dsc$a_pointer = (char *) out;
1061       o.dsc$w_length = (short)OUTSIZE;
1062       i.dsc$a_pointer = in;
1063       i.dsc$w_length = (short)strlen(in);
1064       i.dsc$b_dtype = DSC$K_DTYPE_T;
1065       i.dsc$b_class = DSC$K_CLASS_S;
1066       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1067       o.dsc$b_class = DSC$K_CLASS_VS;
1068     }
1069   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1070     {
1071       out->body[out->curlen] = EOS;
1072       return 1;
1073     }
1074   else if (status == RMS$_NMF)
1075     retval = 0;
1076   else
1077     {
1078       strcpy(out->body, in);
1079       retval = -1;
1080     }
1081   lib$find_file_end(&context);
1082   pass1 = TRUE;
1083   return retval;
1084 }
1085
1086 /*
1087   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1088   name of each file specified by the provided arg expanding wildcards.
1089 */
1090 static char *
1091 gfnames (arg, p_error)
1092      char *arg;
1093      bool *p_error;
1094 {
1095   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1096
1097   switch (fn_exp (&filename, arg))
1098     {
1099     case 1:
1100       *p_error = FALSE;
1101       return filename.body;
1102     case 0:
1103       *p_error = FALSE;
1104       return NULL;
1105     default:
1106       *p_error = TRUE;
1107       return filename.body;
1108     }
1109 }
1110
1111 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1112 system (cmd)
1113      char *cmd;
1114 {
1115   error ("%s", "system() function not implemented under VMS");
1116 }
1117 #endif
1118
1119 #define VERSION_DELIM   ';'
1120 char *massage_name (s)
1121      char *s;
1122 {
1123   char *start = s;
1124
1125   for ( ; *s; s++)
1126     if (*s == VERSION_DELIM)
1127       {
1128         *s = EOS;
1129         break;
1130       }
1131     else
1132       *s = lowcase (*s);
1133   return start;
1134 }
1135 #endif /* VMS */
1136
1137 \f
1138 int
1139 main (argc, argv)
1140      int argc;
1141      char *argv[];
1142 {
1143   int i;
1144   unsigned int nincluded_files;
1145   char **included_files;
1146   argument *argbuffer;
1147   int current_arg, file_count;
1148   linebuffer filename_lb;
1149   bool help_asked = FALSE;
1150 #ifdef VMS
1151   bool got_err;
1152 #endif
1153  char *optstring;
1154  int opt;
1155
1156
1157 #ifdef DOS_NT
1158   _fmode = O_BINARY;   /* all of files are treated as binary files */
1159 #endif /* DOS_NT */
1160
1161   progname = argv[0];
1162   nincluded_files = 0;
1163   included_files = xnew (argc, char *);
1164   current_arg = 0;
1165   file_count = 0;
1166
1167   /* Allocate enough no matter what happens.  Overkill, but each one
1168      is small. */
1169   argbuffer = xnew (argc, argument);
1170
1171   /*
1172    * If etags, always find typedefs and structure tags.  Why not?
1173    * Also default to find macro constants, enum constants and
1174    * global variables.
1175    */
1176   if (!CTAGS)
1177     {
1178       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1179       globals = TRUE;
1180     }
1181
1182   optstring = "-";
1183 #ifdef ETAGS_REGEXPS
1184   optstring = "-r:Rc:";
1185 #endif /* ETAGS_REGEXPS */
1186   if (LONG_OPTIONS)
1187     optstring += 1;
1188   optstring = concat (optstring,
1189                       "Cf:Il:o:SVhH",
1190                       (CTAGS) ? "BxdtTuvw" : "aDi:");
1191
1192   while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1193     switch (opt)
1194       {
1195       case 0:
1196         /* If getopt returns 0, then it has already processed a
1197            long-named option.  We should do nothing.  */
1198         break;
1199
1200       case 1:
1201         /* This means that a file name has been seen.  Record it. */
1202         argbuffer[current_arg].arg_type = at_filename;
1203         argbuffer[current_arg].what     = optarg;
1204         ++current_arg;
1205         ++file_count;
1206         break;
1207
1208       case STDIN:
1209         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1210         argbuffer[current_arg].arg_type = at_stdin;
1211         argbuffer[current_arg].what     = optarg;
1212         ++current_arg;
1213         ++file_count;
1214         if (parsing_stdin)
1215           fatal ("cannot parse standard input more than once", (char *)NULL);
1216         parsing_stdin = TRUE;
1217         break;
1218
1219         /* Common options. */
1220       case 'C': cplusplus = TRUE;               break;
1221       case 'f':         /* for compatibility with old makefiles */
1222       case 'o':
1223         if (tagfile)
1224           {
1225             error ("-o option may only be given once.", (char *)NULL);
1226             suggest_asking_for_help ();
1227             /* NOTREACHED */
1228           }
1229         tagfile = optarg;
1230         break;
1231       case 'I':
1232       case 'S':         /* for backward compatibility */
1233         ignoreindent = TRUE;
1234         break;
1235       case 'l':
1236         {
1237           language *lang = get_language_from_langname (optarg);
1238           if (lang != NULL)
1239             {
1240               argbuffer[current_arg].lang = lang;
1241               argbuffer[current_arg].arg_type = at_language;
1242               ++current_arg;
1243             }
1244         }
1245         break;
1246       case 'c':
1247         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1248         optarg = concat (optarg, "i", ""); /* memory leak here */
1249         /* FALLTHRU */
1250       case 'r':
1251         argbuffer[current_arg].arg_type = at_regexp;
1252         argbuffer[current_arg].what = optarg;
1253         ++current_arg;
1254         break;
1255       case 'R':
1256         argbuffer[current_arg].arg_type = at_regexp;
1257         argbuffer[current_arg].what = NULL;
1258         ++current_arg;
1259         break;
1260       case 'V':
1261         print_version ();
1262         break;
1263       case 'h':
1264       case 'H':
1265         help_asked = TRUE;
1266         break;
1267
1268         /* Etags options */
1269       case 'a': append_to_tagfile = TRUE;                       break;
1270       case 'D': constantypedefs = FALSE;                        break;
1271       case 'i': included_files[nincluded_files++] = optarg;     break;
1272
1273         /* Ctags options. */
1274       case 'B': searchar = '?';                                 break;
1275       case 'd': constantypedefs = TRUE;                         break;
1276       case 't': typedefs = TRUE;                                break;
1277       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1278       case 'u': update = TRUE;                                  break;
1279       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1280       case 'x': cxref_style = TRUE;                             break;
1281       case 'w': no_warnings = TRUE;                             break;
1282       default:
1283         suggest_asking_for_help ();
1284         /* NOTREACHED */
1285       }
1286
1287   for (; optind < argc; optind++)
1288     {
1289       argbuffer[current_arg].arg_type = at_filename;
1290       argbuffer[current_arg].what = argv[optind];
1291       ++current_arg;
1292       ++file_count;
1293     }
1294
1295   argbuffer[current_arg].arg_type = at_end;
1296
1297   if (help_asked)
1298     print_help (argbuffer);
1299     /* NOTREACHED */
1300
1301   if (nincluded_files == 0 && file_count == 0)
1302     {
1303       error ("no input files specified.", (char *)NULL);
1304       suggest_asking_for_help ();
1305       /* NOTREACHED */
1306     }
1307
1308   if (tagfile == NULL)
1309     tagfile = CTAGS ? "tags" : "TAGS";
1310   cwd = etags_getcwd ();        /* the current working directory */
1311   if (cwd[strlen (cwd) - 1] != '/')
1312     {
1313       char *oldcwd = cwd;
1314       cwd = concat (oldcwd, "/", "");
1315       free (oldcwd);
1316     }
1317   if (streq (tagfile, "-"))
1318     tagfiledir = cwd;
1319   else
1320     tagfiledir = absolute_dirname (tagfile, cwd);
1321
1322   init ();                      /* set up boolean "functions" */
1323
1324   linebuffer_init (&lb);
1325   linebuffer_init (&filename_lb);
1326   linebuffer_init (&filebuf);
1327   linebuffer_init (&token_name);
1328
1329   if (!CTAGS)
1330     {
1331       if (streq (tagfile, "-"))
1332         {
1333           tagf = stdout;
1334 #ifdef DOS_NT
1335           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1336              doesn't take effect until after `stdout' is already open). */
1337           if (!isatty (fileno (stdout)))
1338             setmode (fileno (stdout), O_BINARY);
1339 #endif /* DOS_NT */
1340         }
1341       else
1342         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1343       if (tagf == NULL)
1344         pfatal (tagfile);
1345     }
1346
1347   /*
1348    * Loop through files finding functions.
1349    */
1350   for (i = 0; i < current_arg; i++)
1351     {
1352       static language *lang;    /* non-NULL if language is forced */
1353       char *this_file;
1354
1355       switch (argbuffer[i].arg_type)
1356         {
1357         case at_language:
1358           lang = argbuffer[i].lang;
1359           break;
1360 #ifdef ETAGS_REGEXPS
1361         case at_regexp:
1362           analyse_regex (argbuffer[i].what);
1363           break;
1364 #endif
1365         case at_filename:
1366 #ifdef VMS
1367           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1368             {
1369               if (got_err)
1370                 {
1371                   error ("can't find file %s\n", this_file);
1372                   argc--, argv++;
1373                 }
1374               else
1375                 {
1376                   this_file = massage_name (this_file);
1377                 }
1378 #else
1379               this_file = argbuffer[i].what;
1380 #endif
1381               /* Input file named "-" means read file names from stdin
1382                  (one per line) and use them. */
1383               if (streq (this_file, "-"))
1384                 {
1385                   if (parsing_stdin)
1386                     fatal ("cannot parse standard input AND read file names from it",
1387                            (char *)NULL);
1388                   while (readline_internal (&filename_lb, stdin) > 0)
1389                     process_file_name (filename_lb.buffer, lang);
1390                 }
1391               else
1392                 process_file_name (this_file, lang);
1393 #ifdef VMS
1394             }
1395 #endif
1396           break;
1397         case at_stdin:
1398           this_file = argbuffer[i].what;
1399           process_file (stdin, this_file, lang);
1400           break;
1401         }
1402     }
1403
1404 #ifdef ETAGS_REGEXPS
1405   free_regexps ();
1406 #endif /* ETAGS_REGEXPS */
1407   free (lb.buffer);
1408   free (filebuf.buffer);
1409   free (token_name.buffer);
1410
1411   if (!CTAGS || cxref_style)
1412     {
1413       put_entries (nodehead);   /* write the remainig tags (ETAGS) */
1414       free_tree (nodehead);
1415       nodehead = NULL;
1416       if (!CTAGS)
1417         {
1418           fdesc *fdp;
1419
1420           /* Output file entries that have no tags. */
1421           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1422             if (!fdp->written)
1423               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1424
1425           while (nincluded_files-- > 0)
1426             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1427         }
1428
1429       if (fclose (tagf) == EOF)
1430         pfatal (tagfile);
1431       exit (EXIT_SUCCESS);
1432     }
1433
1434   if (update)
1435     {
1436       char cmd[BUFSIZ];
1437       for (i = 0; i < current_arg; ++i)
1438         {
1439           switch (argbuffer[i].arg_type)
1440             {
1441             case at_filename:
1442             case at_stdin:
1443               break;
1444             default:
1445               continue;         /* the for loop */
1446             }
1447           sprintf (cmd,
1448                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1449                    tagfile, argbuffer[i].what, tagfile);
1450           if (system (cmd) != EXIT_SUCCESS)
1451             fatal ("failed to execute shell command", (char *)NULL);
1452         }
1453       append_to_tagfile = TRUE;
1454     }
1455
1456   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1457   if (tagf == NULL)
1458     pfatal (tagfile);
1459   put_entries (nodehead);       /* write all the tags (CTAGS) */
1460   free_tree (nodehead);
1461   nodehead = NULL;
1462   if (fclose (tagf) == EOF)
1463     pfatal (tagfile);
1464
1465   if (update)
1466     {
1467       char cmd[2*BUFSIZ+10];
1468       sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1469       exit (system (cmd));
1470     }
1471   return EXIT_SUCCESS;
1472 }
1473
1474
1475 /*
1476  * Return a compressor given the file name.  If EXTPTR is non-zero,
1477  * return a pointer into FILE where the compressor-specific
1478  * extension begins.  If no compressor is found, NULL is returned
1479  * and EXTPTR is not significant.
1480  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1481  */
1482 static compressor *
1483 get_compressor_from_suffix (file, extptr)
1484      char *file;
1485      char **extptr;
1486 {
1487   compressor *compr;
1488   char *slash, *suffix;
1489
1490   /* This relies on FN to be after canonicalize_filename,
1491      so we don't need to consider backslashes on DOS_NT.  */
1492   slash = etags_strrchr (file, '/');
1493   suffix = etags_strrchr (file, '.');
1494   if (suffix == NULL || suffix < slash)
1495     return NULL;
1496   if (extptr != NULL)
1497     *extptr = suffix;
1498   suffix += 1;
1499   /* Let those poor souls who live with DOS 8+3 file name limits get
1500      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1501      Only the first do loop is run if not MSDOS */
1502   do
1503     {
1504       for (compr = compressors; compr->suffix != NULL; compr++)
1505         if (streq (compr->suffix, suffix))
1506           return compr;
1507       if (!MSDOS)
1508         break;                  /* do it only once: not really a loop */
1509       if (extptr != NULL)
1510         *extptr = ++suffix;
1511     } while (*suffix != '\0');
1512   return NULL;
1513 }
1514
1515
1516
1517 /*
1518  * Return a language given the name.
1519  */
1520 static language *
1521 get_language_from_langname (name)
1522      const char *name;
1523 {
1524   language *lang;
1525
1526   if (name == NULL)
1527     error ("empty language name", (char *)NULL);
1528   else
1529     {
1530       for (lang = lang_names; lang->name != NULL; lang++)
1531         if (streq (name, lang->name))
1532           return lang;
1533       error ("unknown language \"%s\"", name);
1534     }
1535
1536   return NULL;
1537 }
1538
1539
1540 /*
1541  * Return a language given the interpreter name.
1542  */
1543 static language *
1544 get_language_from_interpreter (interpreter)
1545      char *interpreter;
1546 {
1547   language *lang;
1548   char **iname;
1549
1550   if (interpreter == NULL)
1551     return NULL;
1552   for (lang = lang_names; lang->name != NULL; lang++)
1553     if (lang->interpreters != NULL)
1554       for (iname = lang->interpreters; *iname != NULL; iname++)
1555         if (streq (*iname, interpreter))
1556             return lang;
1557
1558   return NULL;
1559 }
1560
1561
1562
1563 /*
1564  * Return a language given the file name.
1565  */
1566 static language *
1567 get_language_from_filename (file, case_sensitive)
1568      char *file;
1569      bool case_sensitive;
1570 {
1571   language *lang;
1572   char **name, **ext, *suffix;
1573
1574   /* Try whole file name first. */
1575   for (lang = lang_names; lang->name != NULL; lang++)
1576     if (lang->filenames != NULL)
1577       for (name = lang->filenames; *name != NULL; name++)
1578         if ((case_sensitive)
1579             ? streq (*name, file)
1580             : strcaseeq (*name, file))
1581           return lang;
1582
1583   /* If not found, try suffix after last dot. */
1584   suffix = etags_strrchr (file, '.');
1585   if (suffix == NULL)
1586     return NULL;
1587   suffix += 1;
1588   for (lang = lang_names; lang->name != NULL; lang++)
1589     if (lang->suffixes != NULL)
1590       for (ext = lang->suffixes; *ext != NULL; ext++)
1591         if ((case_sensitive)
1592             ? streq (*ext, suffix)
1593             : strcaseeq (*ext, suffix))
1594           return lang;
1595   return NULL;
1596 }
1597
1598 \f
1599 /*
1600  * This routine is called on each file argument.
1601  */
1602 static void
1603 process_file_name (file, lang)
1604      char *file;
1605      language *lang;
1606 {
1607   struct stat stat_buf;
1608   FILE *inf;
1609   fdesc *fdp;
1610   compressor *compr;
1611   char *compressed_name, *uncompressed_name;
1612   char *ext, *real_name;
1613   int retval;
1614
1615   canonicalize_filename (file);
1616   if (streq (file, tagfile) && !streq (tagfile, "-"))
1617     {
1618       error ("skipping inclusion of %s in self.", file);
1619       return;
1620     }
1621   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1622     {
1623       compressed_name = NULL;
1624       real_name = uncompressed_name = savestr (file);
1625     }
1626   else
1627     {
1628       real_name = compressed_name = savestr (file);
1629       uncompressed_name = savenstr (file, ext - file);
1630     }
1631
1632   /* If the canonicalized uncompressed name
1633      has already been dealt with, skip it silently. */
1634   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1635     {
1636       assert (fdp->infname != NULL);
1637       if (streq (uncompressed_name, fdp->infname))
1638         goto cleanup;
1639     }
1640
1641   if (stat (real_name, &stat_buf) != 0)
1642     {
1643       /* Reset real_name and try with a different name. */
1644       real_name = NULL;
1645       if (compressed_name != NULL) /* try with the given suffix */
1646         {
1647           if (stat (uncompressed_name, &stat_buf) == 0)
1648             real_name = uncompressed_name;
1649         }
1650       else                      /* try all possible suffixes */
1651         {
1652           for (compr = compressors; compr->suffix != NULL; compr++)
1653             {
1654               compressed_name = concat (file, ".", compr->suffix);
1655               if (stat (compressed_name, &stat_buf) != 0)
1656                 {
1657                   if (MSDOS)
1658                     {
1659                       char *suf = compressed_name + strlen (file);
1660                       size_t suflen = strlen (compr->suffix) + 1;
1661                       for ( ; suf[1]; suf++, suflen--)
1662                         {
1663                           memmove (suf, suf + 1, suflen);
1664                           if (stat (compressed_name, &stat_buf) == 0)
1665                             {
1666                               real_name = compressed_name;
1667                               break;
1668                             }
1669                         }
1670                       if (real_name != NULL)
1671                         break;
1672                     } /* MSDOS */
1673                   free (compressed_name);
1674                   compressed_name = NULL;
1675                 }
1676               else
1677                 {
1678                   real_name = compressed_name;
1679                   break;
1680                 }
1681             }
1682         }
1683       if (real_name == NULL)
1684         {
1685           perror (file);
1686           goto cleanup;
1687         }
1688     } /* try with a different name */
1689
1690   if (!S_ISREG (stat_buf.st_mode))
1691     {
1692       error ("skipping %s: it is not a regular file.", real_name);
1693       goto cleanup;
1694     }
1695   if (real_name == compressed_name)
1696     {
1697       char *cmd = concat (compr->command, " ", real_name);
1698       inf = (FILE *) popen (cmd, "r");
1699       free (cmd);
1700     }
1701   else
1702     inf = fopen (real_name, "r");
1703   if (inf == NULL)
1704     {
1705       perror (real_name);
1706       goto cleanup;
1707     }
1708
1709   process_file (inf, uncompressed_name, lang);
1710
1711   if (real_name == compressed_name)
1712     retval = pclose (inf);
1713   else
1714     retval = fclose (inf);
1715   if (retval < 0)
1716     pfatal (file);
1717
1718  cleanup:
1719   if (compressed_name) free (compressed_name);
1720   if (uncompressed_name) free (uncompressed_name);
1721   last_node = NULL;
1722   curfdp = NULL;
1723   return;
1724 }
1725
1726 static void
1727 process_file (fh, fn, lang)
1728      FILE *fh;
1729      char *fn;
1730      language *lang;
1731 {
1732   static const fdesc emptyfdesc;
1733   fdesc *fdp;
1734
1735   /* Create a new input file description entry. */
1736   fdp = xnew (1, fdesc);
1737   *fdp = emptyfdesc;
1738   fdp->next = fdhead;
1739   fdp->infname = savestr (fn);
1740   fdp->lang = lang;
1741   fdp->infabsname = absolute_filename (fn, cwd);
1742   fdp->infabsdir = absolute_dirname (fn, cwd);
1743   if (filename_is_absolute (fn))
1744     {
1745       /* An absolute file name.  Canonicalize it. */
1746       fdp->taggedfname = absolute_filename (fn, NULL);
1747     }
1748   else
1749     {
1750       /* A file name relative to cwd.  Make it relative
1751          to the directory of the tags file. */
1752       fdp->taggedfname = relative_filename (fn, tagfiledir);
1753     }
1754   fdp->usecharno = TRUE;        /* use char position when making tags */
1755   fdp->prop = NULL;
1756   fdp->written = FALSE;         /* not written on tags file yet */
1757
1758   fdhead = fdp;
1759   curfdp = fdhead;              /* the current file description */
1760
1761   find_entries (fh);
1762
1763   /* If not Ctags, and if this is not metasource and if it contained no #line
1764      directives, we can write the tags and free all nodes pointing to
1765      curfdp. */
1766   if (!CTAGS
1767       && curfdp->usecharno      /* no #line directives in this file */
1768       && !curfdp->lang->metasource)
1769     {
1770       node *np, *prev;
1771
1772       /* Look for the head of the sublist relative to this file.  See add_node
1773          for the structure of the node tree. */
1774       prev = NULL;
1775       for (np = nodehead; np != NULL; prev = np, np = np->left)
1776         if (np->fdp == curfdp)
1777           break;
1778
1779       /* If we generated tags for this file, write and delete them. */
1780       if (np != NULL)
1781         {
1782           /* This is the head of the last sublist, if any.  The following
1783              instructions depend on this being true. */
1784           assert (np->left == NULL);
1785
1786           assert (fdhead == curfdp);
1787           assert (last_node->fdp == curfdp);
1788           put_entries (np);     /* write tags for file curfdp->taggedfname */
1789           free_tree (np);       /* remove the written nodes */
1790           if (prev == NULL)
1791             nodehead = NULL;    /* no nodes left */
1792           else
1793             prev->left = NULL;  /* delete the pointer to the sublist */
1794         }
1795     }
1796 }
1797
1798 /*
1799  * This routine sets up the boolean pseudo-functions which work
1800  * by setting boolean flags dependent upon the corresponding character.
1801  * Every char which is NOT in that string is not a white char.  Therefore,
1802  * all of the array "_wht" is set to FALSE, and then the elements
1803  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1804  * of a char is TRUE if it is the string "white", else FALSE.
1805  */
1806 static void
1807 init ()
1808 {
1809   register char *sp;
1810   register int i;
1811
1812   for (i = 0; i < CHARS; i++)
1813     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1814   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1815   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1816   notinname('\0') = notinname('\n');
1817   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1818   begtoken('\0') = begtoken('\n');
1819   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1820   intoken('\0') = intoken('\n');
1821   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1822   endtoken('\0') = endtoken('\n');
1823 }
1824
1825 /*
1826  * This routine opens the specified file and calls the function
1827  * which finds the function and type definitions.
1828  */
1829 static void
1830 find_entries (inf)
1831      FILE *inf;
1832 {
1833   char *cp;
1834   language *lang = curfdp->lang;
1835   Lang_function *parser = NULL;
1836
1837   /* If user specified a language, use it. */
1838   if (lang != NULL && lang->function != NULL)
1839     {
1840       parser = lang->function;
1841     }
1842
1843   /* Else try to guess the language given the file name. */
1844   if (parser == NULL)
1845     {
1846       lang = get_language_from_filename (curfdp->infname, TRUE);
1847       if (lang != NULL && lang->function != NULL)
1848         {
1849           curfdp->lang = lang;
1850           parser = lang->function;
1851         }
1852     }
1853
1854   /* Else look for sharp-bang as the first two characters. */
1855   if (parser == NULL
1856       && readline_internal (&lb, inf) > 0
1857       && lb.len >= 2
1858       && lb.buffer[0] == '#'
1859       && lb.buffer[1] == '!')
1860     {
1861       char *lp;
1862
1863       /* Set lp to point at the first char after the last slash in the
1864          line or, if no slashes, at the first nonblank.  Then set cp to
1865          the first successive blank and terminate the string. */
1866       lp = etags_strrchr (lb.buffer+2, '/');
1867       if (lp != NULL)
1868         lp += 1;
1869       else
1870         lp = skip_spaces (lb.buffer + 2);
1871       cp = skip_non_spaces (lp);
1872       *cp = '\0';
1873
1874       if (strlen (lp) > 0)
1875         {
1876           lang = get_language_from_interpreter (lp);
1877           if (lang != NULL && lang->function != NULL)
1878             {
1879               curfdp->lang = lang;
1880               parser = lang->function;
1881             }
1882         }
1883     }
1884
1885   /* We rewind here, even if inf may be a pipe.  We fail if the
1886      length of the first line is longer than the pipe block size,
1887      which is unlikely. */
1888   rewind (inf);
1889
1890   /* Else try to guess the language given the case insensitive file name. */
1891   if (parser == NULL)
1892     {
1893       lang = get_language_from_filename (curfdp->infname, FALSE);
1894       if (lang != NULL && lang->function != NULL)
1895         {
1896           curfdp->lang = lang;
1897           parser = lang->function;
1898         }
1899     }
1900
1901   /* Else try Fortran or C. */
1902   if (parser == NULL)
1903     {
1904       node *old_last_node = last_node;
1905
1906       curfdp->lang = get_language_from_langname ("fortran");
1907       find_entries (inf);
1908
1909       if (old_last_node == last_node)
1910         /* No Fortran entries found.  Try C. */
1911         {
1912           /* We do not tag if rewind fails.
1913              Only the file name will be recorded in the tags file. */
1914           rewind (inf);
1915           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1916           find_entries (inf);
1917         }
1918       return;
1919     }
1920
1921   if (!no_line_directive
1922       && curfdp->lang != NULL && curfdp->lang->metasource)
1923     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1924        file, or anyway we parsed a file that is automatically generated from
1925        this one.  If this is the case, the bingo.c file contained #line
1926        directives that generated tags pointing to this file.  Let's delete
1927        them all before parsing this file, which is the real source. */
1928     {
1929       fdesc **fdpp = &fdhead;
1930       while (*fdpp != NULL)
1931         if (*fdpp != curfdp
1932             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1933           /* We found one of those!  We must delete both the file description
1934              and all tags referring to it. */
1935           {
1936             fdesc *badfdp = *fdpp;
1937
1938             /* Delete the tags referring to badfdp->taggedfname
1939                that were obtained from badfdp->infname. */
1940             invalidate_nodes (badfdp, &nodehead);
1941
1942             *fdpp = badfdp->next; /* remove the bad description from the list */
1943             free_fdesc (badfdp);
1944           }
1945         else
1946           fdpp = &(*fdpp)->next; /* advance the list pointer */
1947     }
1948
1949   assert (parser != NULL);
1950
1951   /* Generic initialisations before reading from file. */
1952   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1953
1954   /* Generic initialisations before parsing file with readline. */
1955   lineno = 0;                  /* reset global line number */
1956   charno = 0;                  /* reset global char number */
1957   linecharno = 0;              /* reset global char number of line start */
1958
1959   parser (inf);
1960
1961 #ifdef ETAGS_REGEXPS
1962   regex_tag_multiline ();
1963 #endif /* ETAGS_REGEXPS */
1964 }
1965
1966 \f
1967 /*
1968  * Check whether an implicitly named tag should be created,
1969  * then call `pfnote'.
1970  * NAME is a string that is internally copied by this function.
1971  *
1972  * TAGS format specification
1973  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1974  * The following is explained in some more detail in etc/ETAGS.EBNF.
1975  *
1976  * make_tag creates tags with "implicit tag names" (unnamed tags)
1977  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1978  *  1. NAME does not contain any of the characters in NONAM;
1979  *  2. LINESTART contains name as either a rightmost, or rightmost but
1980  *     one character, substring;
1981  *  3. the character, if any, immediately before NAME in LINESTART must
1982  *     be a character in NONAM;
1983  *  4. the character, if any, immediately after NAME in LINESTART must
1984  *     also be a character in NONAM.
1985  *
1986  * The implementation uses the notinname() macro, which recognises the
1987  * characters stored in the string `nonam'.
1988  * etags.el needs to use the same characters that are in NONAM.
1989  */
1990 static void
1991 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1992      char *name;                /* tag name, or NULL if unnamed */
1993      int namelen;               /* tag length */
1994      bool is_func;              /* tag is a function */
1995      char *linestart;           /* start of the line where tag is */
1996      int linelen;               /* length of the line where tag is */
1997      int lno;                   /* line number */
1998      long cno;                  /* character number */
1999 {
2000   bool named = (name != NULL && namelen > 0);
2001
2002   if (!CTAGS && named)          /* maybe set named to false */
2003     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2004        such that etags.el can guess a name from it. */
2005     {
2006       int i;
2007       register char *cp = name;
2008
2009       for (i = 0; i < namelen; i++)
2010         if (notinname (*cp++))
2011           break;
2012       if (i == namelen)                         /* rule #1 */
2013         {
2014           cp = linestart + linelen - namelen;
2015           if (notinname (linestart[linelen-1]))
2016             cp -= 1;                            /* rule #4 */
2017           if (cp >= linestart                   /* rule #2 */
2018               && (cp == linestart
2019                   || notinname (cp[-1]))        /* rule #3 */
2020               && strneq (name, cp, namelen))    /* rule #2 */
2021             named = FALSE;      /* use implicit tag name */
2022         }
2023     }
2024
2025   if (named)
2026     name = savenstr (name, namelen);
2027   else
2028     name = NULL;
2029   pfnote (name, is_func, linestart, linelen, lno, cno);
2030 }
2031
2032 /* Record a tag. */
2033 static void
2034 pfnote (name, is_func, linestart, linelen, lno, cno)
2035      char *name;                /* tag name, or NULL if unnamed */
2036      bool is_func;              /* tag is a function */
2037      char *linestart;           /* start of the line where tag is */
2038      int linelen;               /* length of the line where tag is */
2039      int lno;                   /* line number */
2040      long cno;                  /* character number */
2041 {
2042   register node *np;
2043
2044   assert (name == NULL || name[0] != '\0');
2045   if (CTAGS && name == NULL)
2046     return;
2047
2048   np = xnew (1, node);
2049
2050   /* If ctags mode, change name "main" to M<thisfilename>. */
2051   if (CTAGS && !cxref_style && streq (name, "main"))
2052     {
2053       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2054       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2055       fp = etags_strrchr (np->name, '.');
2056       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2057         fp[0] = '\0';
2058     }
2059   else
2060     np->name = name;
2061   np->valid = TRUE;
2062   np->been_warned = FALSE;
2063   np->fdp = curfdp;
2064   np->is_func = is_func;
2065   np->lno = lno;
2066   if (np->fdp->usecharno)
2067     /* Our char numbers are 0-base, because of C language tradition?
2068        ctags compatibility?  old versions compatibility?   I don't know.
2069        Anyway, since emacs's are 1-base we expect etags.el to take care
2070        of the difference.  If we wanted to have 1-based numbers, we would
2071        uncomment the +1 below. */
2072     np->cno = cno /* + 1 */ ;
2073   else
2074     np->cno = invalidcharno;
2075   np->left = np->right = NULL;
2076   if (CTAGS && !cxref_style)
2077     {
2078       if (strlen (linestart) < 50)
2079         np->regex = concat (linestart, "$", "");
2080       else
2081         np->regex = savenstr (linestart, 50);
2082     }
2083   else
2084     np->regex = savenstr (linestart, linelen);
2085
2086   add_node (np, &nodehead);
2087 }
2088
2089 /*
2090  * free_tree ()
2091  *      recurse on left children, iterate on right children.
2092  */
2093 static void
2094 free_tree (np)
2095      register node *np;
2096 {
2097   while (np)
2098     {
2099       register node *node_right = np->right;
2100       free_tree (np->left);
2101       if (np->name != NULL)
2102         free (np->name);
2103       free (np->regex);
2104       free (np);
2105       np = node_right;
2106     }
2107 }
2108
2109 /*
2110  * free_fdesc ()
2111  *      delete a file description
2112  */
2113 static void
2114 free_fdesc (fdp)
2115      register fdesc *fdp;
2116 {
2117   if (fdp->infname != NULL) free (fdp->infname);
2118   if (fdp->infabsname != NULL) free (fdp->infabsname);
2119   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2120   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2121   if (fdp->prop != NULL) free (fdp->prop);
2122   free (fdp);
2123 }
2124
2125 /*
2126  * add_node ()
2127  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2128  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2129  *      balancing.
2130  *
2131  *      add_node is the only function allowed to add nodes, so it can
2132  *      maintain state.
2133  */
2134 static void
2135 add_node (np, cur_node_p)
2136      node *np, **cur_node_p;
2137 {
2138   register int dif;
2139   register node *cur_node = *cur_node_p;
2140
2141   if (cur_node == NULL)
2142     {
2143       *cur_node_p = np;
2144       last_node = np;
2145       return;
2146     }
2147
2148   if (!CTAGS)
2149     /* Etags Mode */
2150     {
2151       /* For each file name, tags are in a linked sublist on the right
2152          pointer.  The first tags of different files are a linked list
2153          on the left pointer.  last_node points to the end of the last
2154          used sublist. */
2155       if (last_node != NULL && last_node->fdp == np->fdp)
2156         {
2157           /* Let's use the same sublist as the last added node. */
2158           assert (last_node->right == NULL);
2159           last_node->right = np;
2160           last_node = np;
2161         }
2162       else if (cur_node->fdp == np->fdp)
2163         {
2164           /* Scanning the list we found the head of a sublist which is
2165              good for us.  Let's scan this sublist. */
2166           add_node (np, &cur_node->right);
2167         }
2168       else
2169         /* The head of this sublist is not good for us.  Let's try the
2170            next one. */
2171         add_node (np, &cur_node->left);
2172     } /* if ETAGS mode */
2173
2174   else
2175     {
2176       /* Ctags Mode */
2177       dif = strcmp (np->name, cur_node->name);
2178
2179       /*
2180        * If this tag name matches an existing one, then
2181        * do not add the node, but maybe print a warning.
2182        */
2183       if (!dif)
2184         {
2185           if (np->fdp == cur_node->fdp)
2186             {
2187               if (!no_warnings)
2188                 {
2189                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2190                            np->fdp->infname, lineno, np->name);
2191                   fprintf (stderr, "Second entry ignored\n");
2192                 }
2193             }
2194           else if (!cur_node->been_warned && !no_warnings)
2195             {
2196               fprintf
2197                 (stderr,
2198                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2199                  np->fdp->infname, cur_node->fdp->infname, np->name);
2200               cur_node->been_warned = TRUE;
2201             }
2202           return;
2203         }
2204
2205       /* Actually add the node */
2206       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2207     } /* if CTAGS mode */
2208 }
2209
2210 /*
2211  * invalidate_nodes ()
2212  *      Scan the node tree and invalidate all nodes pointing to the
2213  *      given file description (CTAGS case) or free them (ETAGS case).
2214  */
2215 static void
2216 invalidate_nodes (badfdp, npp)
2217      fdesc *badfdp;
2218      node **npp;
2219 {
2220   node *np = *npp;
2221
2222   if (np == NULL)
2223     return;
2224
2225   if (CTAGS)
2226     {
2227       if (np->left != NULL)
2228         invalidate_nodes (badfdp, &np->left);
2229       if (np->fdp == badfdp)
2230         np->valid = FALSE;
2231       if (np->right != NULL)
2232         invalidate_nodes (badfdp, &np->right);
2233     }
2234   else
2235     {
2236       assert (np->fdp != NULL);
2237       if (np->fdp == badfdp)
2238         {
2239           *npp = np->left;      /* detach the sublist from the list */
2240           np->left = NULL;      /* isolate it */
2241           free_tree (np);       /* free it */
2242           invalidate_nodes (badfdp, npp);
2243         }
2244       else
2245         invalidate_nodes (badfdp, &np->left);
2246     }
2247 }
2248
2249 \f
2250 static int total_size_of_entries __P((node *));
2251 static int number_len __P((long));
2252
2253 /* Length of a non-negative number's decimal representation. */
2254 static int
2255 number_len (num)
2256      long num;
2257 {
2258   int len = 1;
2259   while ((num /= 10) > 0)
2260     len += 1;
2261   return len;
2262 }
2263
2264 /*
2265  * Return total number of characters that put_entries will output for
2266  * the nodes in the linked list at the right of the specified node.
2267  * This count is irrelevant with etags.el since emacs 19.34 at least,
2268  * but is still supplied for backward compatibility.
2269  */
2270 static int
2271 total_size_of_entries (np)
2272      register node *np;
2273 {
2274   register int total = 0;
2275
2276   for (; np != NULL; np = np->right)
2277     if (np->valid)
2278       {
2279         total += strlen (np->regex) + 1;                /* pat\177 */
2280         if (np->name != NULL)
2281           total += strlen (np->name) + 1;               /* name\001 */
2282         total += number_len ((long) np->lno) + 1;       /* lno, */
2283         if (np->cno != invalidcharno)                   /* cno */
2284           total += number_len (np->cno);
2285         total += 1;                                     /* newline */
2286       }
2287
2288   return total;
2289 }
2290
2291 static void
2292 put_entries (np)
2293      register node *np;
2294 {
2295   register char *sp;
2296   static fdesc *fdp = NULL;
2297
2298   if (np == NULL)
2299     return;
2300
2301   /* Output subentries that precede this one */
2302   if (CTAGS)
2303     put_entries (np->left);
2304
2305   /* Output this entry */
2306   if (np->valid)
2307     {
2308       if (!CTAGS)
2309         {
2310           /* Etags mode */
2311           if (fdp != np->fdp)
2312             {
2313               fdp = np->fdp;
2314               fprintf (tagf, "\f\n%s,%d\n",
2315                        fdp->taggedfname, total_size_of_entries (np));
2316               fdp->written = TRUE;
2317             }
2318           fputs (np->regex, tagf);
2319           fputc ('\177', tagf);
2320           if (np->name != NULL)
2321             {
2322               fputs (np->name, tagf);
2323               fputc ('\001', tagf);
2324             }
2325           fprintf (tagf, "%d,", np->lno);
2326           if (np->cno != invalidcharno)
2327             fprintf (tagf, "%ld", np->cno);
2328           fputs ("\n", tagf);
2329         }
2330       else
2331         {
2332           /* Ctags mode */
2333           if (np->name == NULL)
2334             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2335
2336           if (cxref_style)
2337             {
2338               if (vgrind_style)
2339                 fprintf (stdout, "%s %s %d\n",
2340                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2341               else
2342                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2343                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2344             }
2345           else
2346             {
2347               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2348
2349               if (np->is_func)
2350                 {               /* function or #define macro with args */
2351                   putc (searchar, tagf);
2352                   putc ('^', tagf);
2353
2354                   for (sp = np->regex; *sp; sp++)
2355                     {
2356                       if (*sp == '\\' || *sp == searchar)
2357                         putc ('\\', tagf);
2358                       putc (*sp, tagf);
2359                     }
2360                   putc (searchar, tagf);
2361                 }
2362               else
2363                 {               /* anything else; text pattern inadequate */
2364                   fprintf (tagf, "%d", np->lno);
2365                 }
2366               putc ('\n', tagf);
2367             }
2368         }
2369     } /* if this node contains a valid tag */
2370
2371   /* Output subentries that follow this one */
2372   put_entries (np->right);
2373   if (!CTAGS)
2374     put_entries (np->left);
2375 }
2376
2377 \f
2378 /* C extensions. */
2379 #define C_EXT   0x00fff         /* C extensions */
2380 #define C_PLAIN 0x00000         /* C */
2381 #define C_PLPL  0x00001         /* C++ */
2382 #define C_STAR  0x00003         /* C* */
2383 #define C_JAVA  0x00005         /* JAVA */
2384 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2385 #define YACC    0x10000         /* yacc file */
2386
2387 /*
2388  * The C symbol tables.
2389  */
2390 enum sym_type
2391 {
2392   st_none,
2393   st_C_objprot, st_C_objimpl, st_C_objend,
2394   st_C_gnumacro,
2395   st_C_ignore, st_C_attribute,
2396   st_C_javastruct,
2397   st_C_operator,
2398   st_C_class, st_C_template,
2399   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2400 };
2401
2402 static unsigned int hash __P((const char *, unsigned int));
2403 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2404 static enum sym_type C_symtype __P((char *, int, int));
2405
2406 /* Feed stuff between (but not including) %[ and %] lines to:
2407      gperf -m 5
2408 %[
2409 %compare-strncmp
2410 %enum
2411 %struct-type
2412 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2413 %%
2414 if,             0,                      st_C_ignore
2415 for,            0,                      st_C_ignore
2416 while,          0,                      st_C_ignore
2417 switch,         0,                      st_C_ignore
2418 return,         0,                      st_C_ignore
2419 __attribute__,  0,                      st_C_attribute
2420 @interface,     0,                      st_C_objprot
2421 @protocol,      0,                      st_C_objprot
2422 @implementation,0,                      st_C_objimpl
2423 @end,           0,                      st_C_objend
2424 import,         (C_JAVA & !C_PLPL),     st_C_ignore
2425 package,        (C_JAVA & !C_PLPL),     st_C_ignore
2426 friend,         C_PLPL,                 st_C_ignore
2427 extends,        (C_JAVA & !C_PLPL),     st_C_javastruct
2428 implements,     (C_JAVA & !C_PLPL),     st_C_javastruct
2429 interface,      (C_JAVA & !C_PLPL),     st_C_struct
2430 class,          0,                      st_C_class
2431 namespace,      C_PLPL,                 st_C_struct
2432 domain,         C_STAR,                 st_C_struct
2433 union,          0,                      st_C_struct
2434 struct,         0,                      st_C_struct
2435 extern,         0,                      st_C_extern
2436 enum,           0,                      st_C_enum
2437 typedef,        0,                      st_C_typedef
2438 define,         0,                      st_C_define
2439 operator,       C_PLPL,                 st_C_operator
2440 template,       0,                      st_C_template
2441 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2442 DEFUN,          0,                      st_C_gnumacro
2443 SYSCALL,        0,                      st_C_gnumacro
2444 ENTRY,          0,                      st_C_gnumacro
2445 PSEUDO,         0,                      st_C_gnumacro
2446 # These are defined inside C functions, so currently they are not met.
2447 # EXFUN used in glibc, DEFVAR_* in emacs.
2448 #EXFUN,         0,                      st_C_gnumacro
2449 #DEFVAR_,       0,                      st_C_gnumacro
2450 %]
2451 and replace lines between %< and %> with its output, then:
2452  - remove the #if characterset check
2453  - make in_word_set static and not inline. */
2454 /*%<*/
2455 /* C code produced by gperf version 3.0.1 */
2456 /* Command-line: gperf -m 5  */
2457 /* Computed positions: -k'1-2' */
2458
2459 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2460 /* maximum key range = 31, duplicates = 0 */
2461
2462 #ifdef __GNUC__
2463 __inline
2464 #else
2465 #ifdef __cplusplus
2466 inline
2467 #endif
2468 #endif
2469 static unsigned int
2470 hash (str, len)
2471      register const char *str;
2472      register unsigned int len;
2473 {
2474   static unsigned char asso_values[] =
2475     {
2476       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2477       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2478       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2479       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2480       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2481       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2482       34, 34, 34, 34,  1, 34, 34, 34, 14, 14,
2483       34, 34, 34, 34, 34, 34, 34, 34, 13, 34,
2484       13, 34, 34, 12, 34, 34, 34, 34, 34, 11,
2485       34, 34, 34, 34, 34,  8, 34, 11, 34, 12,
2486       11,  0,  1, 34,  7,  0, 34, 34, 11,  9,
2487        0,  4,  0, 34,  7,  4, 14, 21, 34, 15,
2488        0,  2, 34, 34, 34, 34, 34, 34, 34, 34,
2489       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2490       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2491       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2492       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2493       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2494       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2495       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2496       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2497       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2498       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2499       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2500       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2501       34, 34, 34, 34, 34, 34
2502     };
2503   return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
2504 }
2505
2506 static struct C_stab_entry *
2507 in_word_set (str, len)
2508      register const char *str;
2509      register unsigned int len;
2510 {
2511   enum
2512     {
2513       TOTAL_KEYWORDS = 31,
2514       MIN_WORD_LENGTH = 2,
2515       MAX_WORD_LENGTH = 15,
2516       MIN_HASH_VALUE = 3,
2517       MAX_HASH_VALUE = 33
2518     };
2519
2520   static struct C_stab_entry wordlist[] =
2521     {
2522       {""}, {""}, {""},
2523       {"if",            0,                      st_C_ignore},
2524       {"enum",          0,                      st_C_enum},
2525       {"@end",          0,                      st_C_objend},
2526       {"extern",                0,                      st_C_extern},
2527       {"extends",       (C_JAVA & !C_PLPL),     st_C_javastruct},
2528       {"for",           0,                      st_C_ignore},
2529       {"interface",     (C_JAVA & !C_PLPL),     st_C_struct},
2530       {"@protocol",     0,                      st_C_objprot},
2531       {"@interface",    0,                      st_C_objprot},
2532       {"operator",      C_PLPL,                 st_C_operator},
2533       {"return",                0,                      st_C_ignore},
2534       {"friend",                C_PLPL,                 st_C_ignore},
2535       {"import",                (C_JAVA & !C_PLPL),     st_C_ignore},
2536       {"@implementation",0,                     st_C_objimpl},
2537       {"define",                0,                      st_C_define},
2538       {"package",       (C_JAVA & !C_PLPL),     st_C_ignore},
2539       {"implements",    (C_JAVA & !C_PLPL),     st_C_javastruct},
2540       {"namespace",     C_PLPL,                 st_C_struct},
2541       {"domain",                C_STAR,                 st_C_struct},
2542       {"template",      0,                      st_C_template},
2543       {"typedef",       0,                      st_C_typedef},
2544       {"struct",                0,                      st_C_struct},
2545       {"switch",                0,                      st_C_ignore},
2546       {"union",         0,                      st_C_struct},
2547       {"while",         0,                      st_C_ignore},
2548       {"class",         0,                      st_C_class},
2549       {"__attribute__", 0,                      st_C_attribute},
2550       {"SYSCALL",       0,                      st_C_gnumacro},
2551       {"PSEUDO",                0,                      st_C_gnumacro},
2552       {"ENTRY",         0,                      st_C_gnumacro},
2553       {"DEFUN",         0,                      st_C_gnumacro}
2554     };
2555
2556   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2557     {
2558       register int key = hash (str, len);
2559
2560       if (key <= MAX_HASH_VALUE && key >= 0)
2561         {
2562           register const char *s = wordlist[key].name;
2563
2564           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2565             return &wordlist[key];
2566         }
2567     }
2568   return 0;
2569 }
2570 /*%>*/
2571
2572 static enum sym_type
2573 C_symtype (str, len, c_ext)
2574      char *str;
2575      int len;
2576      int c_ext;
2577 {
2578   register struct C_stab_entry *se = in_word_set (str, len);
2579
2580   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2581     return st_none;
2582   return se->type;
2583 }
2584
2585 \f
2586 /*
2587  * Ignoring __attribute__ ((list))
2588  */
2589 static bool inattribute;        /* looking at an __attribute__ construct */
2590
2591 /*
2592  * C functions and variables are recognized using a simple
2593  * finite automaton.  fvdef is its state variable.
2594  */
2595 static enum
2596 {
2597   fvnone,                       /* nothing seen */
2598   fdefunkey,                    /* Emacs DEFUN keyword seen */
2599   fdefunname,                   /* Emacs DEFUN name seen */
2600   foperator,                    /* func: operator keyword seen (cplpl) */
2601   fvnameseen,                   /* function or variable name seen */
2602   fstartlist,                   /* func: just after open parenthesis */
2603   finlist,                      /* func: in parameter list */
2604   flistseen,                    /* func: after parameter list */
2605   fignore,                      /* func: before open brace */
2606   vignore                       /* var-like: ignore until ';' */
2607 } fvdef;
2608
2609 static bool fvextern;           /* func or var: extern keyword seen; */
2610
2611 /*
2612  * typedefs are recognized using a simple finite automaton.
2613  * typdef is its state variable.
2614  */
2615 static enum
2616 {
2617   tnone,                        /* nothing seen */
2618   tkeyseen,                     /* typedef keyword seen */
2619   ttypeseen,                    /* defined type seen */
2620   tinbody,                      /* inside typedef body */
2621   tend,                         /* just before typedef tag */
2622   tignore                       /* junk after typedef tag */
2623 } typdef;
2624
2625 /*
2626  * struct-like structures (enum, struct and union) are recognized
2627  * using another simple finite automaton.  `structdef' is its state
2628  * variable.
2629  */
2630 static enum
2631 {
2632   snone,                        /* nothing seen yet,
2633                                    or in struct body if bracelev > 0 */
2634   skeyseen,                     /* struct-like keyword seen */
2635   stagseen,                     /* struct-like tag seen */
2636   scolonseen                    /* colon seen after struct-like tag */
2637 } structdef;
2638
2639 /*
2640  * When objdef is different from onone, objtag is the name of the class.
2641  */
2642 static char *objtag = "<uninited>";
2643
2644 /*
2645  * Yet another little state machine to deal with preprocessor lines.
2646  */
2647 static enum
2648 {
2649   dnone,                        /* nothing seen */
2650   dsharpseen,                   /* '#' seen as first char on line */
2651   ddefineseen,                  /* '#' and 'define' seen */
2652   dignorerest                   /* ignore rest of line */
2653 } definedef;
2654
2655 /*
2656  * State machine for Objective C protocols and implementations.
2657  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2658  */
2659 static enum
2660 {
2661   onone,                        /* nothing seen */
2662   oprotocol,                    /* @interface or @protocol seen */
2663   oimplementation,              /* @implementations seen */
2664   otagseen,                     /* class name seen */
2665   oparenseen,                   /* parenthesis before category seen */
2666   ocatseen,                     /* category name seen */
2667   oinbody,                      /* in @implementation body */
2668   omethodsign,                  /* in @implementation body, after +/- */
2669   omethodtag,                   /* after method name */
2670   omethodcolon,                 /* after method colon */
2671   omethodparm,                  /* after method parameter */
2672   oignore                       /* wait for @end */
2673 } objdef;
2674
2675
2676 /*
2677  * Use this structure to keep info about the token read, and how it
2678  * should be tagged.  Used by the make_C_tag function to build a tag.
2679  */
2680 static struct tok
2681 {
2682   char *line;                   /* string containing the token */
2683   int offset;                   /* where the token starts in LINE */
2684   int length;                   /* token length */
2685   /*
2686     The previous members can be used to pass strings around for generic
2687     purposes.  The following ones specifically refer to creating tags.  In this
2688     case the token contained here is the pattern that will be used to create a
2689     tag.
2690   */
2691   bool valid;                   /* do not create a tag; the token should be
2692                                    invalidated whenever a state machine is
2693                                    reset prematurely */
2694   bool named;                   /* create a named tag */
2695   int lineno;                   /* source line number of tag */
2696   long linepos;                 /* source char number of tag */
2697 } token;                        /* latest token read */
2698
2699 /*
2700  * Variables and functions for dealing with nested structures.
2701  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2702  */
2703 static void pushclass_above __P((int, char *, int));
2704 static void popclass_above __P((int));
2705 static void write_classname __P((linebuffer *, char *qualifier));
2706
2707 static struct {
2708   char **cname;                 /* nested class names */
2709   int *bracelev;                /* nested class brace level */
2710   int nl;                       /* class nesting level (elements used) */
2711   int size;                     /* length of the array */
2712 } cstack;                       /* stack for nested declaration tags */
2713 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2714 #define nestlev         (cstack.nl)
2715 /* After struct keyword or in struct body, not inside a nested function. */
2716 #define instruct        (structdef == snone && nestlev > 0                      \
2717                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2718
2719 static void
2720 pushclass_above (bracelev, str, len)
2721      int bracelev;
2722      char *str;
2723      int len;
2724 {
2725   int nl;
2726
2727   popclass_above (bracelev);
2728   nl = cstack.nl;
2729   if (nl >= cstack.size)
2730     {
2731       int size = cstack.size *= 2;
2732       xrnew (cstack.cname, size, char *);
2733       xrnew (cstack.bracelev, size, int);
2734     }
2735   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2736   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2737   cstack.bracelev[nl] = bracelev;
2738   cstack.nl = nl + 1;
2739 }
2740
2741 static void
2742 popclass_above (bracelev)
2743      int bracelev;
2744 {
2745   int nl;
2746
2747   for (nl = cstack.nl - 1;
2748        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2749        nl--)
2750     {
2751       if (cstack.cname[nl] != NULL)
2752         free (cstack.cname[nl]);
2753       cstack.nl = nl;
2754     }
2755 }
2756
2757 static void
2758 write_classname (cn, qualifier)
2759      linebuffer *cn;
2760      char *qualifier;
2761 {
2762   int i, len;
2763   int qlen = strlen (qualifier);
2764
2765   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2766     {
2767       len = 0;
2768       cn->len = 0;
2769       cn->buffer[0] = '\0';
2770     }
2771   else
2772     {
2773       len = strlen (cstack.cname[0]);
2774       linebuffer_setlen (cn, len);
2775       strcpy (cn->buffer, cstack.cname[0]);
2776     }
2777   for (i = 1; i < cstack.nl; i++)
2778     {
2779       char *s;
2780       int slen;
2781
2782       s = cstack.cname[i];
2783       if (s == NULL)
2784         continue;
2785       slen = strlen (s);
2786       len += slen + qlen;
2787       linebuffer_setlen (cn, len);
2788       strncat (cn->buffer, qualifier, qlen);
2789       strncat (cn->buffer, s, slen);
2790     }
2791 }
2792
2793 \f
2794 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2795 static void make_C_tag __P((bool));
2796
2797 /*
2798  * consider_token ()
2799  *      checks to see if the current token is at the start of a
2800  *      function or variable, or corresponds to a typedef, or
2801  *      is a struct/union/enum tag, or #define, or an enum constant.
2802  *
2803  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2804  *      with args.  C_EXTP points to which language we are looking at.
2805  *
2806  * Globals
2807  *      fvdef                   IN OUT
2808  *      structdef               IN OUT
2809  *      definedef               IN OUT
2810  *      typdef                  IN OUT
2811  *      objdef                  IN OUT
2812  */
2813
2814 static bool
2815 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2816      register char *str;        /* IN: token pointer */
2817      register int len;          /* IN: token length */
2818      register int c;            /* IN: first char after the token */
2819      int *c_extp;               /* IN, OUT: C extensions mask */
2820      int bracelev;              /* IN: brace level */
2821      int parlev;                /* IN: parenthesis level */
2822      bool *is_func_or_var;      /* OUT: function or variable found */
2823 {
2824   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2825      structtype is the type of the preceding struct-like keyword, and
2826      structbracelev is the brace level where it has been seen. */
2827   static enum sym_type structtype;
2828   static int structbracelev;
2829   static enum sym_type toktype;
2830
2831
2832   toktype = C_symtype (str, len, *c_extp);
2833
2834   /*
2835    * Skip __attribute__
2836    */
2837   if (toktype == st_C_attribute)
2838     {
2839       inattribute = TRUE;
2840       return FALSE;
2841      }
2842
2843    /*
2844     * Advance the definedef state machine.
2845     */
2846    switch (definedef)
2847      {
2848      case dnone:
2849        /* We're not on a preprocessor line. */
2850        if (toktype == st_C_gnumacro)
2851          {
2852            fvdef = fdefunkey;
2853            return FALSE;
2854          }
2855        break;
2856      case dsharpseen:
2857        if (toktype == st_C_define)
2858          {
2859            definedef = ddefineseen;
2860          }
2861        else
2862          {
2863            definedef = dignorerest;
2864          }
2865        return FALSE;
2866      case ddefineseen:
2867        /*
2868         * Make a tag for any macro, unless it is a constant
2869         * and constantypedefs is FALSE.
2870         */
2871        definedef = dignorerest;
2872        *is_func_or_var = (c == '(');
2873        if (!*is_func_or_var && !constantypedefs)
2874          return FALSE;
2875        else
2876          return TRUE;
2877      case dignorerest:
2878        return FALSE;
2879      default:
2880        error ("internal error: definedef value.", (char *)NULL);
2881      }
2882
2883    /*
2884     * Now typedefs
2885     */
2886    switch (typdef)
2887      {
2888      case tnone:
2889        if (toktype == st_C_typedef)
2890          {
2891            if (typedefs)
2892              typdef = tkeyseen;
2893            fvextern = FALSE;
2894            fvdef = fvnone;
2895            return FALSE;
2896          }
2897        break;
2898      case tkeyseen:
2899        switch (toktype)
2900          {
2901          case st_none:
2902          case st_C_class:
2903          case st_C_struct:
2904          case st_C_enum:
2905            typdef = ttypeseen;
2906          }
2907        break;
2908      case ttypeseen:
2909        if (structdef == snone && fvdef == fvnone)
2910          {
2911            fvdef = fvnameseen;
2912            return TRUE;
2913          }
2914        break;
2915      case tend:
2916        switch (toktype)
2917          {
2918          case st_C_class:
2919          case st_C_struct:
2920          case st_C_enum:
2921            return FALSE;
2922          }
2923        return TRUE;
2924      }
2925
2926    /*
2927     * This structdef business is NOT invoked when we are ctags and the
2928     * file is plain C.  This is because a struct tag may have the same
2929     * name as another tag, and this loses with ctags.
2930     */
2931    switch (toktype)
2932      {
2933      case st_C_javastruct:
2934        if (structdef == stagseen)
2935          structdef = scolonseen;
2936        return FALSE;
2937      case st_C_template:
2938      case st_C_class:
2939        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2940            && bracelev == 0
2941            && definedef == dnone && structdef == snone
2942            && typdef == tnone && fvdef == fvnone)
2943          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2944        if (toktype == st_C_template)
2945          break;
2946        /* FALLTHRU */
2947      case st_C_struct:
2948      case st_C_enum:
2949        if (parlev == 0
2950            && fvdef != vignore
2951            && (typdef == tkeyseen
2952                || (typedefs_or_cplusplus && structdef == snone)))
2953          {
2954            structdef = skeyseen;
2955            structtype = toktype;
2956            structbracelev = bracelev;
2957            if (fvdef == fvnameseen)
2958              fvdef = fvnone;
2959          }
2960        return FALSE;
2961      }
2962
2963    if (structdef == skeyseen)
2964      {
2965        structdef = stagseen;
2966        return TRUE;
2967      }
2968
2969    if (typdef != tnone)
2970      definedef = dnone;
2971
2972    /* Detect Objective C constructs. */
2973    switch (objdef)
2974      {
2975      case onone:
2976        switch (toktype)
2977          {
2978          case st_C_objprot:
2979            objdef = oprotocol;
2980            return FALSE;
2981          case st_C_objimpl:
2982            objdef = oimplementation;
2983            return FALSE;
2984          }
2985        break;
2986      case oimplementation:
2987        /* Save the class tag for functions or variables defined inside. */
2988        objtag = savenstr (str, len);
2989        objdef = oinbody;
2990        return FALSE;
2991      case oprotocol:
2992        /* Save the class tag for categories. */
2993        objtag = savenstr (str, len);
2994        objdef = otagseen;
2995        *is_func_or_var = TRUE;
2996        return TRUE;
2997      case oparenseen:
2998        objdef = ocatseen;
2999        *is_func_or_var = TRUE;
3000        return TRUE;
3001      case oinbody:
3002        break;
3003      case omethodsign:
3004        if (parlev == 0)
3005          {
3006            fvdef = fvnone;
3007            objdef = omethodtag;
3008            linebuffer_setlen (&token_name, len);
3009            strncpy (token_name.buffer, str, len);
3010            token_name.buffer[len] = '\0';
3011            return TRUE;
3012          }
3013        return FALSE;
3014      case omethodcolon:
3015        if (parlev == 0)
3016          objdef = omethodparm;
3017        return FALSE;
3018      case omethodparm:
3019        if (parlev == 0)
3020          {
3021            fvdef = fvnone;
3022            objdef = omethodtag;
3023            linebuffer_setlen (&token_name, token_name.len + len);
3024            strncat (token_name.buffer, str, len);
3025            return TRUE;
3026          }
3027        return FALSE;
3028      case oignore:
3029        if (toktype == st_C_objend)
3030          {
3031            /* Memory leakage here: the string pointed by objtag is
3032               never released, because many tests would be needed to
3033               avoid breaking on incorrect input code.  The amount of
3034               memory leaked here is the sum of the lengths of the
3035               class tags.
3036            free (objtag); */
3037            objdef = onone;
3038          }
3039        return FALSE;
3040      }
3041
3042    /* A function, variable or enum constant? */
3043    switch (toktype)
3044      {
3045      case st_C_extern:
3046        fvextern = TRUE;
3047        switch  (fvdef)
3048          {
3049          case finlist:
3050          case flistseen:
3051          case fignore:
3052          case vignore:
3053            break;
3054          default:
3055            fvdef = fvnone;
3056          }
3057        return FALSE;
3058      case st_C_ignore:
3059        fvextern = FALSE;
3060        fvdef = vignore;
3061        return FALSE;
3062      case st_C_operator:
3063        fvdef = foperator;
3064        *is_func_or_var = TRUE;
3065        return TRUE;
3066      case st_none:
3067        if (constantypedefs
3068            && structdef == snone
3069            && structtype == st_C_enum && bracelev > structbracelev)
3070          return TRUE;           /* enum constant */
3071        switch (fvdef)
3072          {
3073          case fdefunkey:
3074            if (bracelev > 0)
3075              break;
3076            fvdef = fdefunname;  /* GNU macro */
3077            *is_func_or_var = TRUE;
3078            return TRUE;
3079          case fvnone:
3080            switch (typdef)
3081              {
3082              case ttypeseen:
3083                return FALSE;
3084              case tnone:
3085                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3086                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3087                  {
3088                    fvdef = vignore;
3089                    return FALSE;
3090                  }
3091                break;
3092              }
3093           /* FALLTHRU */
3094           case fvnameseen:
3095           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3096             {
3097               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3098                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3099               fvdef = foperator;
3100               *is_func_or_var = TRUE;
3101               return TRUE;
3102             }
3103           if (bracelev > 0 && !instruct)
3104             break;
3105           fvdef = fvnameseen;   /* function or variable */
3106           *is_func_or_var = TRUE;
3107           return TRUE;
3108         }
3109       break;
3110     }
3111
3112   return FALSE;
3113 }
3114
3115 \f
3116 /*
3117  * C_entries often keeps pointers to tokens or lines which are older than
3118  * the line currently read.  By keeping two line buffers, and switching
3119  * them at end of line, it is possible to use those pointers.
3120  */
3121 static struct
3122 {
3123   long linepos;
3124   linebuffer lb;
3125 } lbs[2];
3126
3127 #define current_lb_is_new (newndx == curndx)
3128 #define switch_line_buffers() (curndx = 1 - curndx)
3129
3130 #define curlb (lbs[curndx].lb)
3131 #define newlb (lbs[newndx].lb)
3132 #define curlinepos (lbs[curndx].linepos)
3133 #define newlinepos (lbs[newndx].linepos)
3134
3135 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3136 #define cplpl (c_ext & C_PLPL)
3137 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3138
3139 #define CNL_SAVE_DEFINEDEF()                                            \
3140 do {                                                                    \
3141   curlinepos = charno;                                                  \
3142   readline (&curlb, inf);                                               \
3143   lp = curlb.buffer;                                                    \
3144   quotednl = FALSE;                                                     \
3145   newndx = curndx;                                                      \
3146 } while (0)
3147
3148 #define CNL()                                                           \
3149 do {                                                                    \
3150   CNL_SAVE_DEFINEDEF();                                                 \
3151   if (savetoken.valid)                                                  \
3152     {                                                                   \
3153       token = savetoken;                                                \
3154       savetoken.valid = FALSE;                                          \
3155     }                                                                   \
3156   definedef = dnone;                                                    \
3157 } while (0)
3158
3159
3160 static void
3161 make_C_tag (isfun)
3162      bool isfun;
3163 {
3164   /* This function should never be called when token.valid is FALSE, but
3165      we must protect against invalid input or internal errors. */
3166   if (!DEBUG && !token.valid)
3167     return;
3168
3169   if (token.valid)
3170     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3171               token.offset+token.length+1, token.lineno, token.linepos);
3172   else                          /* this case is optimised away if !DEBUG */
3173     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3174               token_name.len + 17, isfun, token.line,
3175               token.offset+token.length+1, token.lineno, token.linepos);
3176
3177   token.valid = FALSE;
3178 }
3179
3180
3181 /*
3182  * C_entries ()
3183  *      This routine finds functions, variables, typedefs,
3184  *      #define's, enum constants and struct/union/enum definitions in
3185  *      C syntax and adds them to the list.
3186  */
3187 static void
3188 C_entries (c_ext, inf)
3189      int c_ext;                 /* extension of C */
3190      FILE *inf;                 /* input file */
3191 {
3192   register char c;              /* latest char read; '\0' for end of line */
3193   register char *lp;            /* pointer one beyond the character `c' */
3194   int curndx, newndx;           /* indices for current and new lb */
3195   register int tokoff;          /* offset in line of start of current token */
3196   register int toklen;          /* length of current token */
3197   char *qualifier;              /* string used to qualify names */
3198   int qlen;                     /* length of qualifier */
3199   int bracelev;                 /* current brace level */
3200   int bracketlev;               /* current bracket level */
3201   int parlev;                   /* current parenthesis level */
3202   int attrparlev;               /* __attribute__ parenthesis level */
3203   int templatelev;              /* current template level */
3204   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3205   bool incomm, inquote, inchar, quotednl, midtoken;
3206   bool yacc_rules;              /* in the rules part of a yacc file */
3207   struct tok savetoken;         /* token saved during preprocessor handling */
3208
3209
3210   linebuffer_init (&lbs[0].lb);
3211   linebuffer_init (&lbs[1].lb);
3212   if (cstack.size == 0)
3213     {
3214       cstack.size = (DEBUG) ? 1 : 4;
3215       cstack.nl = 0;
3216       cstack.cname = xnew (cstack.size, char *);
3217       cstack.bracelev = xnew (cstack.size, int);
3218     }
3219
3220   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3221   curndx = newndx = 0;
3222   lp = curlb.buffer;
3223   *lp = 0;
3224
3225   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3226   structdef = snone; definedef = dnone; objdef = onone;
3227   yacc_rules = FALSE;
3228   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3229   token.valid = savetoken.valid = FALSE;
3230   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3231   if (cjava)
3232     { qualifier = "."; qlen = 1; }
3233   else
3234     { qualifier = "::"; qlen = 2; }
3235
3236
3237   while (!feof (inf))
3238     {
3239       c = *lp++;
3240       if (c == '\\')
3241         {
3242           /* If we are at the end of the line, the next character is a
3243              '\0'; do not skip it, because it is what tells us
3244              to read the next line.  */
3245           if (*lp == '\0')
3246             {
3247               quotednl = TRUE;
3248               continue;
3249             }
3250           lp++;
3251           c = ' ';
3252         }
3253       else if (incomm)
3254         {
3255           switch (c)
3256             {
3257             case '*':
3258               if (*lp == '/')
3259                 {
3260                   c = *lp++;
3261                   incomm = FALSE;
3262                 }
3263               break;
3264             case '\0':
3265               /* Newlines inside comments do not end macro definitions in
3266                  traditional cpp. */
3267               CNL_SAVE_DEFINEDEF ();
3268               break;
3269             }
3270           continue;
3271         }
3272       else if (inquote)
3273         {
3274           switch (c)
3275             {
3276             case '"':
3277               inquote = FALSE;
3278               break;
3279             case '\0':
3280               /* Newlines inside strings do not end macro definitions
3281                  in traditional cpp, even though compilers don't
3282                  usually accept them. */
3283               CNL_SAVE_DEFINEDEF ();
3284               break;
3285             }
3286           continue;
3287         }
3288       else if (inchar)
3289         {
3290           switch (c)
3291             {
3292             case '\0':
3293               /* Hmmm, something went wrong. */
3294               CNL ();
3295               /* FALLTHRU */
3296             case '\'':
3297               inchar = FALSE;
3298               break;
3299             }
3300           continue;
3301         }
3302       else if (bracketlev > 0)
3303         {
3304           switch (c)
3305             {
3306             case ']':
3307               if (--bracketlev > 0)
3308                 continue;
3309               break;
3310             case '\0':
3311               CNL_SAVE_DEFINEDEF ();
3312               break;
3313             }
3314           continue;
3315         }
3316       else switch (c)
3317         {
3318         case '"':
3319           inquote = TRUE;
3320           if (inattribute)
3321             break;
3322           switch (fvdef)
3323             {
3324             case fdefunkey:
3325             case fstartlist:
3326             case finlist:
3327             case fignore:
3328             case vignore:
3329               break;
3330             default:
3331               fvextern = FALSE;
3332               fvdef = fvnone;
3333             }
3334           continue;
3335         case '\'':
3336           inchar = TRUE;
3337           if (inattribute)
3338             break;
3339           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3340             {
3341               fvextern = FALSE;
3342               fvdef = fvnone;
3343             }
3344           continue;
3345         case '/':
3346           if (*lp == '*')
3347             {
3348               lp++;
3349               incomm = TRUE;
3350               continue;
3351             }
3352           else if (/* cplpl && */ *lp == '/')
3353             {
3354               c = '\0';
3355               break;
3356             }
3357           else
3358             break;
3359         case '%':
3360           if ((c_ext & YACC) && *lp == '%')
3361             {
3362               /* Entering or exiting rules section in yacc file. */
3363               lp++;
3364               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3365               typdef = tnone; structdef = snone;
3366               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3367               bracelev = 0;
3368               yacc_rules = !yacc_rules;
3369               continue;
3370             }
3371           else
3372             break;
3373         case '#':
3374           if (definedef == dnone)
3375             {
3376               char *cp;
3377               bool cpptoken = TRUE;
3378
3379               /* Look back on this line.  If all blanks, or nonblanks
3380                  followed by an end of comment, this is a preprocessor
3381                  token. */
3382               for (cp = newlb.buffer; cp < lp-1; cp++)
3383                 if (!iswhite (*cp))
3384                   {
3385                     if (*cp == '*' && *(cp+1) == '/')
3386                       {
3387                         cp++;
3388                         cpptoken = TRUE;
3389                       }
3390                     else
3391                       cpptoken = FALSE;
3392                   }
3393               if (cpptoken)
3394                 definedef = dsharpseen;
3395             } /* if (definedef == dnone) */
3396           continue;
3397         case '[':
3398           bracketlev++;
3399             continue;
3400         } /* switch (c) */
3401
3402
3403       /* Consider token only if some involved conditions are satisfied. */
3404       if (typdef != tignore
3405           && definedef != dignorerest
3406           && fvdef != finlist
3407           && templatelev == 0
3408           && (definedef != dnone
3409               || structdef != scolonseen)
3410           && !inattribute)
3411         {
3412           if (midtoken)
3413             {
3414               if (endtoken (c))
3415                 {
3416                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3417                     /* This handles :: in the middle,
3418                        but not at the beginning of an identifier.
3419                        Also, space-separated :: is not recognised. */
3420                     {
3421                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3422                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3423                       lp += 2;
3424                       toklen += 2;
3425                       c = lp[-1];
3426                       goto still_in_token;
3427                     }
3428                   else
3429                     {
3430                       bool funorvar = FALSE;
3431
3432                       if (yacc_rules
3433                           || consider_token (newlb.buffer + tokoff, toklen, c,
3434                                              &c_ext, bracelev, parlev,
3435                                              &funorvar))
3436                         {
3437                           if (fvdef == foperator)
3438                             {
3439                               char *oldlp = lp;
3440                               lp = skip_spaces (lp-1);
3441                               if (*lp != '\0')
3442                                 lp += 1;
3443                               while (*lp != '\0'
3444                                      && !iswhite (*lp) && *lp != '(')
3445                                 lp += 1;
3446                               c = *lp++;
3447                               toklen += lp - oldlp;
3448                             }
3449                           token.named = FALSE;
3450                           if (!plainc
3451                               && nestlev > 0 && definedef == dnone)
3452                             /* in struct body */
3453                             {
3454                               write_classname (&token_name, qualifier);
3455                               linebuffer_setlen (&token_name,
3456                                                  token_name.len+qlen+toklen);
3457                               strcat (token_name.buffer, qualifier);
3458                               strncat (token_name.buffer,
3459                                        newlb.buffer + tokoff, toklen);
3460                               token.named = TRUE;
3461                             }
3462                           else if (objdef == ocatseen)
3463                             /* Objective C category */
3464                             {
3465                               int len = strlen (objtag) + 2 + toklen;
3466                               linebuffer_setlen (&token_name, len);
3467                               strcpy (token_name.buffer, objtag);
3468                               strcat (token_name.buffer, "(");
3469                               strncat (token_name.buffer,
3470                                        newlb.buffer + tokoff, toklen);
3471                               strcat (token_name.buffer, ")");
3472                               token.named = TRUE;
3473                             }
3474                           else if (objdef == omethodtag
3475                                    || objdef == omethodparm)
3476                             /* Objective C method */
3477                             {
3478                               token.named = TRUE;
3479                             }
3480                           else if (fvdef == fdefunname)
3481                             /* GNU DEFUN and similar macros */
3482                             {
3483                               bool defun = (newlb.buffer[tokoff] == 'F');
3484                               int off = tokoff;
3485                               int len = toklen;
3486
3487                               /* Rewrite the tag so that emacs lisp DEFUNs
3488                                  can be found by their elisp name */
3489                               if (defun)
3490                                 {
3491                                   off += 1;
3492                                   len -= 1;
3493                                 }
3494                               len = toklen;
3495                               linebuffer_setlen (&token_name, len);
3496                               strncpy (token_name.buffer,
3497                                        newlb.buffer + off, len);
3498                               token_name.buffer[len] = '\0';
3499                               if (defun)
3500                                 while (--len >= 0)
3501                                   if (token_name.buffer[len] == '_')
3502                                     token_name.buffer[len] = '-';
3503                               token.named = defun;
3504                             }
3505                           else
3506                             {
3507                               linebuffer_setlen (&token_name, toklen);
3508                               strncpy (token_name.buffer,
3509                                        newlb.buffer + tokoff, toklen);
3510                               token_name.buffer[toklen] = '\0';
3511                               /* Name macros and members. */
3512                               token.named = (structdef == stagseen
3513                                              || typdef == ttypeseen
3514                                              || typdef == tend
3515                                              || (funorvar
3516                                                  && definedef == dignorerest)
3517                                              || (funorvar
3518                                                  && definedef == dnone
3519                                                  && structdef == snone
3520                                                  && bracelev > 0));
3521                             }
3522                           token.lineno = lineno;
3523                           token.offset = tokoff;
3524                           token.length = toklen;
3525                           token.line = newlb.buffer;
3526                           token.linepos = newlinepos;
3527                           token.valid = TRUE;
3528
3529                           if (definedef == dnone
3530                               && (fvdef == fvnameseen
3531                                   || fvdef == foperator
3532                                   || structdef == stagseen
3533                                   || typdef == tend
3534                                   || typdef == ttypeseen
3535                                   || objdef != onone))
3536                             {
3537                               if (current_lb_is_new)
3538                                 switch_line_buffers ();
3539                             }
3540                           else if (definedef != dnone
3541                                    || fvdef == fdefunname
3542                                    || instruct)
3543                             make_C_tag (funorvar);
3544                         }
3545                       else /* not yacc and consider_token failed */
3546                         {
3547                           if (inattribute && fvdef == fignore)
3548                             {
3549                               /* We have just met __attribute__ after a
3550                                  function parameter list: do not tag the
3551                                  function again. */
3552                               fvdef = fvnone;
3553                             }
3554                         }
3555                       midtoken = FALSE;
3556                     }
3557                 } /* if (endtoken (c)) */
3558               else if (intoken (c))
3559                 still_in_token:
3560                 {
3561                   toklen++;
3562                   continue;
3563                 }
3564             } /* if (midtoken) */
3565           else if (begtoken (c))
3566             {
3567               switch (definedef)
3568                 {
3569                 case dnone:
3570                   switch (fvdef)
3571                     {
3572                     case fstartlist:
3573                       /* This prevents tagging fb in
3574                          void (__attribute__((noreturn)) *fb) (void);
3575                          Fixing this is not easy and not very important. */
3576                       fvdef = finlist;
3577                       continue;
3578                     case flistseen:
3579                       if (plainc || declarations)
3580                         {
3581                           make_C_tag (TRUE); /* a function */
3582                           fvdef = fignore;
3583                         }
3584                       break;
3585                     }
3586                   if (structdef == stagseen && !cjava)
3587                     {
3588                       popclass_above (bracelev);
3589                       structdef = snone;
3590                     }
3591                   break;
3592                 case dsharpseen:
3593                   savetoken = token;
3594                   break;
3595                 }
3596               if (!yacc_rules || lp == newlb.buffer + 1)
3597                 {
3598                   tokoff = lp - 1 - newlb.buffer;
3599                   toklen = 1;
3600                   midtoken = TRUE;
3601                 }
3602               continue;
3603             } /* if (begtoken) */
3604         } /* if must look at token */
3605
3606
3607       /* Detect end of line, colon, comma, semicolon and various braces
3608          after having handled a token.*/
3609       switch (c)
3610         {
3611         case ':':
3612           if (inattribute)
3613             break;
3614           if (yacc_rules && token.offset == 0 && token.valid)
3615             {
3616               make_C_tag (FALSE); /* a yacc function */
3617               break;
3618             }
3619           if (definedef != dnone)
3620             break;
3621           switch (objdef)
3622             {
3623             case  otagseen:
3624               objdef = oignore;
3625               make_C_tag (TRUE); /* an Objective C class */
3626               break;
3627             case omethodtag:
3628             case omethodparm:
3629               objdef = omethodcolon;
3630               linebuffer_setlen (&token_name, token_name.len + 1);
3631               strcat (token_name.buffer, ":");
3632               break;
3633             }
3634           if (structdef == stagseen)
3635             {
3636               structdef = scolonseen;
3637               break;
3638             }
3639           /* Should be useless, but may be work as a safety net. */
3640           if (cplpl && fvdef == flistseen)
3641             {
3642               make_C_tag (TRUE); /* a function */
3643               fvdef = fignore;
3644               break;
3645             }
3646           break;
3647         case ';':
3648           if (definedef != dnone || inattribute)
3649             break;
3650           switch (typdef)
3651             {
3652             case tend:
3653             case ttypeseen:
3654               make_C_tag (FALSE); /* a typedef */
3655               typdef = tnone;
3656               fvdef = fvnone;
3657               break;
3658             case tnone:
3659             case tinbody:
3660             case tignore:
3661               switch (fvdef)
3662                 {
3663                 case fignore:
3664                   if (typdef == tignore || cplpl)
3665                     fvdef = fvnone;
3666                   break;
3667                 case fvnameseen:
3668                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3669                       || (members && instruct))
3670                     make_C_tag (FALSE); /* a variable */
3671                   fvextern = FALSE;
3672                   fvdef = fvnone;
3673                   token.valid = FALSE;
3674                   break;
3675                 case flistseen:
3676                   if ((declarations
3677                        && (cplpl || !instruct)
3678                        && (typdef == tnone || (typdef != tignore && instruct)))
3679                       || (members
3680                           && plainc && instruct))
3681                     make_C_tag (TRUE);  /* a function */
3682                   /* FALLTHRU */
3683                 default:
3684                   fvextern = FALSE;
3685                   fvdef = fvnone;
3686                   if (declarations
3687                        && cplpl && structdef == stagseen)
3688                     make_C_tag (FALSE); /* forward declaration */
3689                   else
3690                     token.valid = FALSE;
3691                 } /* switch (fvdef) */
3692               /* FALLTHRU */
3693             default:
3694               if (!instruct)
3695                 typdef = tnone;
3696             }
3697           if (structdef == stagseen)
3698             structdef = snone;
3699           break;
3700         case ',':
3701           if (definedef != dnone || inattribute)
3702             break;
3703           switch (objdef)
3704             {
3705             case omethodtag:
3706             case omethodparm:
3707               make_C_tag (TRUE); /* an Objective C method */
3708               objdef = oinbody;
3709               break;
3710             }
3711           switch (fvdef)
3712             {
3713             case fdefunkey:
3714             case foperator:
3715             case fstartlist:
3716             case finlist:
3717             case fignore:
3718             case vignore:
3719               break;
3720             case fdefunname:
3721               fvdef = fignore;
3722               break;
3723             case fvnameseen:
3724               if (parlev == 0
3725                   && ((globals
3726                        && bracelev == 0
3727                        && templatelev == 0
3728                        && (!fvextern || declarations))
3729                       || (members && instruct)))
3730                   make_C_tag (FALSE); /* a variable */
3731               break;
3732             case flistseen:
3733               if ((declarations && typdef == tnone && !instruct)
3734                   || (members && typdef != tignore && instruct))
3735                 {
3736                   make_C_tag (TRUE); /* a function */
3737                   fvdef = fvnameseen;
3738                 }
3739               else if (!declarations)
3740                 fvdef = fvnone;
3741               token.valid = FALSE;
3742               break;
3743             default:
3744               fvdef = fvnone;
3745             }
3746           if (structdef == stagseen)
3747             structdef = snone;
3748           break;
3749         case ']':
3750           if (definedef != dnone || inattribute)
3751             break;
3752           if (structdef == stagseen)
3753             structdef = snone;
3754           switch (typdef)
3755             {
3756             case ttypeseen:
3757             case tend:
3758               typdef = tignore;
3759               make_C_tag (FALSE);       /* a typedef */
3760               break;
3761             case tnone:
3762             case tinbody:
3763               switch (fvdef)
3764                 {
3765                 case foperator:
3766                 case finlist:
3767                 case fignore:
3768                 case vignore:
3769                   break;
3770                 case fvnameseen:
3771                   if ((members && bracelev == 1)
3772                       || (globals && bracelev == 0
3773                           && (!fvextern || declarations)))
3774                     make_C_tag (FALSE); /* a variable */
3775                   /* FALLTHRU */
3776                 default:
3777                   fvdef = fvnone;
3778                 }
3779               break;
3780             }
3781           break;
3782         case '(':
3783           if (inattribute)
3784             {
3785               attrparlev++;
3786               break;
3787             }
3788           if (definedef != dnone)
3789             break;
3790           if (objdef == otagseen && parlev == 0)
3791             objdef = oparenseen;
3792           switch (fvdef)
3793             {
3794             case fvnameseen:
3795               if (typdef == ttypeseen
3796                   && *lp != '*'
3797                   && !instruct)
3798                 {
3799                   /* This handles constructs like:
3800                      typedef void OperatorFun (int fun); */
3801                   make_C_tag (FALSE);
3802                   typdef = tignore;
3803                   fvdef = fignore;
3804                   break;
3805                 }
3806               /* FALLTHRU */
3807             case foperator:
3808               fvdef = fstartlist;
3809               break;
3810             case flistseen:
3811               fvdef = finlist;
3812               break;
3813             }
3814           parlev++;
3815           break;
3816         case ')':
3817           if (inattribute)
3818             {
3819               if (--attrparlev == 0)
3820                 inattribute = FALSE;
3821               break;
3822             }
3823           if (definedef != dnone)
3824             break;
3825           if (objdef == ocatseen && parlev == 1)
3826             {
3827               make_C_tag (TRUE); /* an Objective C category */
3828               objdef = oignore;
3829             }
3830           if (--parlev == 0)
3831             {
3832               switch (fvdef)
3833                 {
3834                 case fstartlist:
3835                 case finlist:
3836                   fvdef = flistseen;
3837                   break;
3838                 }
3839               if (!instruct
3840                   && (typdef == tend
3841                       || typdef == ttypeseen))
3842                 {
3843                   typdef = tignore;
3844                   make_C_tag (FALSE); /* a typedef */
3845                 }
3846             }
3847           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3848             parlev = 0;
3849           break;
3850         case '{':
3851           if (definedef != dnone)
3852             break;
3853           if (typdef == ttypeseen)
3854             {
3855               /* Whenever typdef is set to tinbody (currently only
3856                  here), typdefbracelev should be set to bracelev. */
3857               typdef = tinbody;
3858               typdefbracelev = bracelev;
3859             }
3860           switch (fvdef)
3861             {
3862             case flistseen:
3863               make_C_tag (TRUE);    /* a function */
3864               /* FALLTHRU */
3865             case fignore:
3866               fvdef = fvnone;
3867               break;
3868             case fvnone:
3869               switch (objdef)
3870                 {
3871                 case otagseen:
3872                   make_C_tag (TRUE); /* an Objective C class */
3873                   objdef = oignore;
3874                   break;
3875                 case omethodtag:
3876                 case omethodparm:
3877                   make_C_tag (TRUE); /* an Objective C method */
3878                   objdef = oinbody;
3879                   break;
3880                 default:
3881                   /* Neutralize `extern "C" {' grot. */
3882                   if (bracelev == 0 && structdef == snone && nestlev == 0
3883                       && typdef == tnone)
3884                     bracelev = -1;
3885                 }
3886               break;
3887             }
3888           switch (structdef)
3889             {
3890             case skeyseen:         /* unnamed struct */
3891               pushclass_above (bracelev, NULL, 0);
3892               structdef = snone;
3893               break;
3894             case stagseen:         /* named struct or enum */
3895             case scolonseen:       /* a class */
3896               pushclass_above (bracelev,token.line+token.offset, token.length);
3897               structdef = snone;
3898               make_C_tag (FALSE);  /* a struct or enum */
3899               break;
3900             }
3901           bracelev++;
3902           break;
3903         case '*':
3904           if (definedef != dnone)
3905             break;
3906           if (fvdef == fstartlist)
3907             {
3908               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3909               token.valid = FALSE;
3910             }
3911           break;
3912         case '}':
3913           if (definedef != dnone)
3914             break;
3915           if (!ignoreindent && lp == newlb.buffer + 1)
3916             {
3917               if (bracelev != 0)
3918                 token.valid = FALSE;
3919               bracelev = 0;     /* reset brace level if first column */
3920               parlev = 0;       /* also reset paren level, just in case... */
3921             }
3922           else if (bracelev > 0)
3923             bracelev--;
3924           else
3925             token.valid = FALSE; /* something gone amiss, token unreliable */
3926           popclass_above (bracelev);
3927           structdef = snone;
3928           /* Only if typdef == tinbody is typdefbracelev significant. */
3929           if (typdef == tinbody && bracelev <= typdefbracelev)
3930             {
3931               assert (bracelev == typdefbracelev);
3932               typdef = tend;
3933             }
3934           break;
3935         case '=':
3936           if (definedef != dnone)
3937             break;
3938           switch (fvdef)
3939             {
3940             case foperator:
3941             case finlist:
3942             case fignore:
3943             case vignore:
3944               break;
3945             case fvnameseen:
3946               if ((members && bracelev == 1)
3947                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3948                 make_C_tag (FALSE); /* a variable */
3949               /* FALLTHRU */
3950             default:
3951               fvdef = vignore;
3952             }
3953           break;
3954         case '<':
3955           if (cplpl
3956               && (structdef == stagseen || fvdef == fvnameseen))
3957             {
3958               templatelev++;
3959               break;
3960             }
3961           goto resetfvdef;
3962         case '>':
3963           if (templatelev > 0)
3964             {
3965               templatelev--;
3966               break;
3967             }
3968           goto resetfvdef;
3969         case '+':
3970         case '-':
3971           if (objdef == oinbody && bracelev == 0)
3972             {
3973               objdef = omethodsign;
3974               break;
3975             }
3976           /* FALLTHRU */
3977         resetfvdef:
3978         case '#': case '~': case '&': case '%': case '/':
3979         case '|': case '^': case '!': case '.': case '?':
3980           if (definedef != dnone)
3981             break;
3982           /* These surely cannot follow a function tag in C. */
3983           switch (fvdef)
3984             {
3985             case foperator:
3986             case finlist:
3987             case fignore:
3988             case vignore:
3989               break;
3990             default:
3991               fvdef = fvnone;
3992             }
3993           break;
3994         case '\0':
3995           if (objdef == otagseen)
3996             {
3997               make_C_tag (TRUE); /* an Objective C class */
3998               objdef = oignore;
3999             }
4000           /* If a macro spans multiple lines don't reset its state. */
4001           if (quotednl)
4002             CNL_SAVE_DEFINEDEF ();
4003           else
4004             CNL ();
4005           break;
4006         } /* switch (c) */
4007
4008     } /* while not eof */
4009
4010   free (lbs[0].lb.buffer);
4011   free (lbs[1].lb.buffer);
4012 }
4013
4014 /*
4015  * Process either a C++ file or a C file depending on the setting
4016  * of a global flag.
4017  */
4018 static void
4019 default_C_entries (inf)
4020      FILE *inf;
4021 {
4022   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4023 }
4024
4025 /* Always do plain C. */
4026 static void
4027 plain_C_entries (inf)
4028      FILE *inf;
4029 {
4030   C_entries (0, inf);
4031 }
4032
4033 /* Always do C++. */
4034 static void
4035 Cplusplus_entries (inf)
4036      FILE *inf;
4037 {
4038   C_entries (C_PLPL, inf);
4039 }
4040
4041 /* Always do Java. */
4042 static void
4043 Cjava_entries (inf)
4044      FILE *inf;
4045 {
4046   C_entries (C_JAVA, inf);
4047 }
4048
4049 /* Always do C*. */
4050 static void
4051 Cstar_entries (inf)
4052      FILE *inf;
4053 {
4054   C_entries (C_STAR, inf);
4055 }
4056
4057 /* Always do Yacc. */
4058 static void
4059 Yacc_entries (inf)
4060      FILE *inf;
4061 {
4062   C_entries (YACC, inf);
4063 }
4064
4065 \f
4066 /* Useful macros. */
4067 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4068   for (;                        /* loop initialization */               \
4069        !feof (file_pointer)     /* loop test */                         \
4070        &&                       /* instructions at start of loop */     \
4071           (readline (&line_buffer, file_pointer),                       \
4072            char_pointer = line_buffer.buffer,                           \
4073            TRUE);                                                       \
4074       )
4075 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */      \
4076   (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
4077    && notinname ((cp)[sizeof(keyword)-1])       /* end of keyword */    \
4078    && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
4079
4080 /*
4081  * Read a file, but do no processing.  This is used to do regexp
4082  * matching on files that have no language defined.
4083  */
4084 static void
4085 just_read_file (inf)
4086      FILE *inf;
4087 {
4088   register char *dummy;
4089
4090   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4091     continue;
4092 }
4093
4094 \f
4095 /* Fortran parsing */
4096
4097 static void F_takeprec __P((void));
4098 static void F_getit __P((FILE *));
4099
4100 static void
4101 F_takeprec ()
4102 {
4103   dbp = skip_spaces (dbp);
4104   if (*dbp != '*')
4105     return;
4106   dbp++;
4107   dbp = skip_spaces (dbp);
4108   if (strneq (dbp, "(*)", 3))
4109     {
4110       dbp += 3;
4111       return;
4112     }
4113   if (!ISDIGIT (*dbp))
4114     {
4115       --dbp;                    /* force failure */
4116       return;
4117     }
4118   do
4119     dbp++;
4120   while (ISDIGIT (*dbp));
4121 }
4122
4123 static void
4124 F_getit (inf)
4125      FILE *inf;
4126 {
4127   register char *cp;
4128
4129   dbp = skip_spaces (dbp);
4130   if (*dbp == '\0')
4131     {
4132       readline (&lb, inf);
4133       dbp = lb.buffer;
4134       if (dbp[5] != '&')
4135         return;
4136       dbp += 6;
4137       dbp = skip_spaces (dbp);
4138     }
4139   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4140     return;
4141   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4142     continue;
4143   make_tag (dbp, cp-dbp, TRUE,
4144             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4145 }
4146
4147
4148 static void
4149 Fortran_functions (inf)
4150      FILE *inf;
4151 {
4152   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4153     {
4154       if (*dbp == '%')
4155         dbp++;                  /* Ratfor escape to fortran */
4156       dbp = skip_spaces (dbp);
4157       if (*dbp == '\0')
4158         continue;
4159       switch (lowcase (*dbp))
4160         {
4161         case 'i':
4162           if (nocase_tail ("integer"))
4163             F_takeprec ();
4164           break;
4165         case 'r':
4166           if (nocase_tail ("real"))
4167             F_takeprec ();
4168           break;
4169         case 'l':
4170           if (nocase_tail ("logical"))
4171             F_takeprec ();
4172           break;
4173         case 'c':
4174           if (nocase_tail ("complex") || nocase_tail ("character"))
4175             F_takeprec ();
4176           break;
4177         case 'd':
4178           if (nocase_tail ("double"))
4179             {
4180               dbp = skip_spaces (dbp);
4181               if (*dbp == '\0')
4182                 continue;
4183               if (nocase_tail ("precision"))
4184                 break;
4185               continue;
4186             }
4187           break;
4188         }
4189       dbp = skip_spaces (dbp);
4190       if (*dbp == '\0')
4191         continue;
4192       switch (lowcase (*dbp))
4193         {
4194         case 'f':
4195           if (nocase_tail ("function"))
4196             F_getit (inf);
4197           continue;
4198         case 's':
4199           if (nocase_tail ("subroutine"))
4200             F_getit (inf);
4201           continue;
4202         case 'e':
4203           if (nocase_tail ("entry"))
4204             F_getit (inf);
4205           continue;
4206         case 'b':
4207           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4208             {
4209               dbp = skip_spaces (dbp);
4210               if (*dbp == '\0') /* assume un-named */
4211                 make_tag ("blockdata", 9, TRUE,
4212                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4213               else
4214                 F_getit (inf);  /* look for name */
4215             }
4216           continue;
4217         }
4218     }
4219 }
4220
4221 \f
4222 /*
4223  * Ada parsing
4224  * Original code by
4225  * Philippe Waroquiers (1998)
4226  */
4227
4228 static void Ada_getit __P((FILE *, char *));
4229
4230 /* Once we are positioned after an "interesting" keyword, let's get
4231    the real tag value necessary. */
4232 static void
4233 Ada_getit (inf, name_qualifier)
4234      FILE *inf;
4235      char *name_qualifier;
4236 {
4237   register char *cp;
4238   char *name;
4239   char c;
4240
4241   while (!feof (inf))
4242     {
4243       dbp = skip_spaces (dbp);
4244       if (*dbp == '\0'
4245           || (dbp[0] == '-' && dbp[1] == '-'))
4246         {
4247           readline (&lb, inf);
4248           dbp = lb.buffer;
4249         }
4250       switch (lowcase(*dbp))
4251         {
4252         case 'b':
4253           if (nocase_tail ("body"))
4254             {
4255               /* Skipping body of   procedure body   or   package body or ....
4256                  resetting qualifier to body instead of spec. */
4257               name_qualifier = "/b";
4258               continue;
4259             }
4260           break;
4261         case 't':
4262           /* Skipping type of   task type   or   protected type ... */
4263           if (nocase_tail ("type"))
4264             continue;
4265           break;
4266         }
4267       if (*dbp == '"')
4268         {
4269           dbp += 1;
4270           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4271             continue;
4272         }
4273       else
4274         {
4275           dbp = skip_spaces (dbp);
4276           for (cp = dbp;
4277                (*cp != '\0'
4278                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4279                cp++)
4280             continue;
4281           if (cp == dbp)
4282             return;
4283         }
4284       c = *cp;
4285       *cp = '\0';
4286       name = concat (dbp, name_qualifier, "");
4287       *cp = c;
4288       make_tag (name, strlen (name), TRUE,
4289                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4290       free (name);
4291       if (c == '"')
4292         dbp = cp + 1;
4293       return;
4294     }
4295 }
4296
4297 static void
4298 Ada_funcs (inf)
4299      FILE *inf;
4300 {
4301   bool inquote = FALSE;
4302   bool skip_till_semicolumn = FALSE;
4303
4304   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4305     {
4306       while (*dbp != '\0')
4307         {
4308           /* Skip a string i.e. "abcd". */
4309           if (inquote || (*dbp == '"'))
4310             {
4311               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4312               if (dbp != NULL)
4313                 {
4314                   inquote = FALSE;
4315                   dbp += 1;
4316                   continue;     /* advance char */
4317                 }
4318               else
4319                 {
4320                   inquote = TRUE;
4321                   break;        /* advance line */
4322                 }
4323             }
4324
4325           /* Skip comments. */
4326           if (dbp[0] == '-' && dbp[1] == '-')
4327             break;              /* advance line */
4328
4329           /* Skip character enclosed in single quote i.e. 'a'
4330              and skip single quote starting an attribute i.e. 'Image. */
4331           if (*dbp == '\'')
4332             {
4333               dbp++ ;
4334               if (*dbp != '\0')
4335                 dbp++;
4336               continue;
4337             }
4338
4339           if (skip_till_semicolumn)
4340             {
4341               if (*dbp == ';')
4342                 skip_till_semicolumn = FALSE;
4343               dbp++;
4344               continue;         /* advance char */
4345             }
4346
4347           /* Search for beginning of a token.  */
4348           if (!begtoken (*dbp))
4349             {
4350               dbp++;
4351               continue;         /* advance char */
4352             }
4353
4354           /* We are at the beginning of a token. */
4355           switch (lowcase(*dbp))
4356             {
4357             case 'f':
4358               if (!packages_only && nocase_tail ("function"))
4359                 Ada_getit (inf, "/f");
4360               else
4361                 break;          /* from switch */
4362               continue;         /* advance char */
4363             case 'p':
4364               if (!packages_only && nocase_tail ("procedure"))
4365                 Ada_getit (inf, "/p");
4366               else if (nocase_tail ("package"))
4367                 Ada_getit (inf, "/s");
4368               else if (nocase_tail ("protected")) /* protected type */
4369                 Ada_getit (inf, "/t");
4370               else
4371                 break;          /* from switch */
4372               continue;         /* advance char */
4373
4374             case 'u':
4375               if (typedefs && !packages_only && nocase_tail ("use"))
4376                 {
4377                   /* when tagging types, avoid tagging  use type Pack.Typename;
4378                      for this, we will skip everything till a ; */
4379                   skip_till_semicolumn = TRUE;
4380                   continue;     /* advance char */
4381                 }
4382
4383             case 't':
4384               if (!packages_only && nocase_tail ("task"))
4385                 Ada_getit (inf, "/k");
4386               else if (typedefs && !packages_only && nocase_tail ("type"))
4387                 {
4388                   Ada_getit (inf, "/t");
4389                   while (*dbp != '\0')
4390                     dbp += 1;
4391                 }
4392               else
4393                 break;          /* from switch */
4394               continue;         /* advance char */
4395             }
4396
4397           /* Look for the end of the token. */
4398           while (!endtoken (*dbp))
4399             dbp++;
4400
4401         } /* advance char */
4402     } /* advance line */
4403 }
4404
4405 \f
4406 /*
4407  * Unix and microcontroller assembly tag handling
4408  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4409  * Idea by Bob Weiner, Motorola Inc. (1994)
4410  */
4411 static void
4412 Asm_labels (inf)
4413      FILE *inf;
4414 {
4415   register char *cp;
4416
4417   LOOP_ON_INPUT_LINES (inf, lb, cp)
4418     {
4419       /* If first char is alphabetic or one of [_.$], test for colon
4420          following identifier. */
4421       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4422         {
4423           /* Read past label. */
4424           cp++;
4425           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4426             cp++;
4427           if (*cp == ':' || iswhite (*cp))
4428             /* Found end of label, so copy it and add it to the table. */
4429             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4430                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4431         }
4432     }
4433 }
4434
4435 \f
4436 /*
4437  * Perl support
4438  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4439  * Perl variable names: /^(my|local).../
4440  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4441  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4442  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4443  */
4444 static void
4445 Perl_functions (inf)
4446      FILE *inf;
4447 {
4448   char *package = savestr ("main"); /* current package name */
4449   register char *cp;
4450
4451   LOOP_ON_INPUT_LINES (inf, lb, cp)
4452     {
4453       skip_spaces(cp);
4454
4455       if (LOOKING_AT (cp, "package"))
4456         {
4457           free (package);
4458           get_tag (cp, &package);
4459         }
4460       else if (LOOKING_AT (cp, "sub"))
4461         {
4462           char *pos;
4463           char *sp = cp;
4464
4465           while (!notinname (*cp))
4466             cp++;
4467           if (cp == sp)
4468             continue;           /* nothing found */
4469           if ((pos = etags_strchr (sp, ':')) != NULL
4470               && pos < cp && pos[1] == ':')
4471             /* The name is already qualified. */
4472             make_tag (sp, cp - sp, TRUE,
4473                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4474           else
4475             /* Qualify it. */
4476             {
4477               char savechar, *name;
4478
4479               savechar = *cp;
4480               *cp = '\0';
4481               name = concat (package, "::", sp);
4482               *cp = savechar;
4483               make_tag (name, strlen(name), TRUE,
4484                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4485               free (name);
4486             }
4487         }
4488        else if (globals)        /* only if we are tagging global vars */
4489         {
4490           /* Skip a qualifier, if any. */
4491           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4492           /* After "my" or "local", but before any following paren or space. */
4493           char *varstart = cp;
4494
4495           if (qual              /* should this be removed?  If yes, how? */
4496               && (*cp == '$' || *cp == '@' || *cp == '%'))
4497             {
4498               varstart += 1;
4499               do
4500                 cp++;
4501               while (ISALNUM (*cp) || *cp == '_');
4502             }
4503           else if (qual)
4504             {
4505               /* Should be examining a variable list at this point;
4506                  could insist on seeing an open parenthesis. */
4507               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4508                 cp++;
4509             }
4510           else
4511             continue;
4512
4513           make_tag (varstart, cp - varstart, FALSE,
4514                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4515         }
4516     }
4517 }
4518
4519
4520 /*
4521  * Python support
4522  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4523  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4524  * More ideas by seb bacon <seb@jamkit.com> (2002)
4525  */
4526 static void
4527 Python_functions (inf)
4528      FILE *inf;
4529 {
4530   register char *cp;
4531
4532   LOOP_ON_INPUT_LINES (inf, lb, cp)
4533     {
4534       cp = skip_spaces (cp);
4535       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4536         {
4537           char *name = cp;
4538           while (!notinname (*cp) && *cp != ':')
4539             cp++;
4540           make_tag (name, cp - name, TRUE,
4541                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4542         }
4543     }
4544 }
4545
4546 \f
4547 /*
4548  * PHP support
4549  * Look for:
4550  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4551  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4552  *  - /^[ \t]*define\(\"[^\"]+/
4553  * Only with --members:
4554  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4555  * Idea by Diez B. Roggisch (2001)
4556  */
4557 static void
4558 PHP_functions (inf)
4559      FILE *inf;
4560 {
4561   register char *cp, *name;
4562   bool search_identifier = FALSE;
4563
4564   LOOP_ON_INPUT_LINES (inf, lb, cp)
4565     {
4566       cp = skip_spaces (cp);
4567       name = cp;
4568       if (search_identifier
4569           && *cp != '\0')
4570         {
4571           while (!notinname (*cp))
4572             cp++;
4573           make_tag (name, cp - name, TRUE,
4574                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4575           search_identifier = FALSE;
4576         }
4577       else if (LOOKING_AT (cp, "function"))
4578         {
4579           if(*cp == '&')
4580             cp = skip_spaces (cp+1);
4581           if(*cp != '\0')
4582             {
4583               name = cp;
4584               while (!notinname (*cp))
4585                 cp++;
4586               make_tag (name, cp - name, TRUE,
4587                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4588             }
4589           else
4590             search_identifier = TRUE;
4591         }
4592       else if (LOOKING_AT (cp, "class"))
4593         {
4594           if (*cp != '\0')
4595             {
4596               name = cp;
4597               while (*cp != '\0' && !iswhite (*cp))
4598                 cp++;
4599               make_tag (name, cp - name, FALSE,
4600                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4601             }
4602           else
4603             search_identifier = TRUE;
4604         }
4605       else if (strneq (cp, "define", 6)
4606                && (cp = skip_spaces (cp+6))
4607                && *cp++ == '('
4608                && (*cp == '"' || *cp == '\''))
4609         {
4610           char quote = *cp++;
4611           name = cp;
4612           while (*cp != quote && *cp != '\0')
4613             cp++;
4614           make_tag (name, cp - name, FALSE,
4615                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4616         }
4617       else if (members
4618                && LOOKING_AT (cp, "var")
4619                && *cp == '$')
4620         {
4621           name = cp;
4622           while (!notinname(*cp))
4623             cp++;
4624           make_tag (name, cp - name, FALSE,
4625                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4626         }
4627     }
4628 }
4629
4630 \f
4631 /*
4632  * Cobol tag functions
4633  * We could look for anything that could be a paragraph name.
4634  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4635  * Idea by Corny de Souza (1993)
4636  */
4637 static void
4638 Cobol_paragraphs (inf)
4639      FILE *inf;
4640 {
4641   register char *bp, *ep;
4642
4643   LOOP_ON_INPUT_LINES (inf, lb, bp)
4644     {
4645       if (lb.len < 9)
4646         continue;
4647       bp += 8;
4648
4649       /* If eoln, compiler option or comment ignore whole line. */
4650       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4651         continue;
4652
4653       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4654         continue;
4655       if (*ep++ == '.')
4656         make_tag (bp, ep - bp, TRUE,
4657                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4658     }
4659 }
4660
4661 \f
4662 /*
4663  * Makefile support
4664  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4665  */
4666 static void
4667 Makefile_targets (inf)
4668      FILE *inf;
4669 {
4670   register char *bp;
4671
4672   LOOP_ON_INPUT_LINES (inf, lb, bp)
4673     {
4674       if (*bp == '\t' || *bp == '#')
4675         continue;
4676       while (*bp != '\0' && *bp != '=' && *bp != ':')
4677         bp++;
4678       if (*bp == ':' || (globals && *bp == '='))
4679         make_tag (lb.buffer, bp - lb.buffer, TRUE,
4680                   lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4681     }
4682 }
4683
4684 \f
4685 /*
4686  * Pascal parsing
4687  * Original code by Mosur K. Mohan (1989)
4688  *
4689  *  Locates tags for procedures & functions.  Doesn't do any type- or
4690  *  var-definitions.  It does look for the keyword "extern" or
4691  *  "forward" immediately following the procedure statement; if found,
4692  *  the tag is skipped.
4693  */
4694 static void
4695 Pascal_functions (inf)
4696      FILE *inf;
4697 {
4698   linebuffer tline;             /* mostly copied from C_entries */
4699   long save_lcno;
4700   int save_lineno, namelen, taglen;
4701   char c, *name;
4702
4703   bool                          /* each of these flags is TRUE iff: */
4704     incomment,                  /* point is inside a comment */
4705     inquote,                    /* point is inside '..' string */
4706     get_tagname,                /* point is after PROCEDURE/FUNCTION
4707                                    keyword, so next item = potential tag */
4708     found_tag,                  /* point is after a potential tag */
4709     inparms,                    /* point is within parameter-list */
4710     verify_tag;                 /* point has passed the parm-list, so the
4711                                    next token will determine whether this
4712                                    is a FORWARD/EXTERN to be ignored, or
4713                                    whether it is a real tag */
4714
4715   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4716   name = NULL;                  /* keep compiler quiet */
4717   dbp = lb.buffer;
4718   *dbp = '\0';
4719   linebuffer_init (&tline);
4720
4721   incomment = inquote = FALSE;
4722   found_tag = FALSE;            /* have a proc name; check if extern */
4723   get_tagname = FALSE;          /* found "procedure" keyword         */
4724   inparms = FALSE;              /* found '(' after "proc"            */
4725   verify_tag = FALSE;           /* check if "extern" is ahead        */
4726
4727
4728   while (!feof (inf))           /* long main loop to get next char */
4729     {
4730       c = *dbp++;
4731       if (c == '\0')            /* if end of line */
4732         {
4733           readline (&lb, inf);
4734           dbp = lb.buffer;
4735           if (*dbp == '\0')
4736             continue;
4737           if (!((found_tag && verify_tag)
4738                 || get_tagname))
4739             c = *dbp++;         /* only if don't need *dbp pointing
4740                                    to the beginning of the name of
4741                                    the procedure or function */
4742         }
4743       if (incomment)
4744         {
4745           if (c == '}')         /* within { } comments */
4746             incomment = FALSE;
4747           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4748             {
4749               dbp++;
4750               incomment = FALSE;
4751             }
4752           continue;
4753         }
4754       else if (inquote)
4755         {
4756           if (c == '\'')
4757             inquote = FALSE;
4758           continue;
4759         }
4760       else
4761         switch (c)
4762           {
4763           case '\'':
4764             inquote = TRUE;     /* found first quote */
4765             continue;
4766           case '{':             /* found open { comment */
4767             incomment = TRUE;
4768             continue;
4769           case '(':
4770             if (*dbp == '*')    /* found open (* comment */
4771               {
4772                 incomment = TRUE;
4773                 dbp++;
4774               }
4775             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4776               inparms = TRUE;
4777             continue;
4778           case ')':             /* end of parms list */
4779             if (inparms)
4780               inparms = FALSE;
4781             continue;
4782           case ';':
4783             if (found_tag && !inparms) /* end of proc or fn stmt */
4784               {
4785                 verify_tag = TRUE;
4786                 break;
4787               }
4788             continue;
4789           }
4790       if (found_tag && verify_tag && (*dbp != ' '))
4791         {
4792           /* Check if this is an "extern" declaration. */
4793           if (*dbp == '\0')
4794             continue;
4795           if (lowcase (*dbp == 'e'))
4796             {
4797               if (nocase_tail ("extern")) /* superfluous, really! */
4798                 {
4799                   found_tag = FALSE;
4800                   verify_tag = FALSE;
4801                 }
4802             }
4803           else if (lowcase (*dbp) == 'f')
4804             {
4805               if (nocase_tail ("forward")) /* check for forward reference */
4806                 {
4807                   found_tag = FALSE;
4808                   verify_tag = FALSE;
4809                 }
4810             }
4811           if (found_tag && verify_tag) /* not external proc, so make tag */
4812             {
4813               found_tag = FALSE;
4814               verify_tag = FALSE;
4815               make_tag (name, namelen, TRUE,
4816                         tline.buffer, taglen, save_lineno, save_lcno);
4817               continue;
4818             }
4819         }
4820       if (get_tagname)          /* grab name of proc or fn */
4821         {
4822           char *cp;
4823
4824           if (*dbp == '\0')
4825             continue;
4826
4827           /* Find block name. */
4828           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4829             continue;
4830
4831           /* Save all values for later tagging. */
4832           linebuffer_setlen (&tline, lb.len);
4833           strcpy (tline.buffer, lb.buffer);
4834           save_lineno = lineno;
4835           save_lcno = linecharno;
4836           name = tline.buffer + (dbp - lb.buffer);
4837           namelen = cp - dbp;
4838           taglen = cp - lb.buffer + 1;
4839
4840           dbp = cp;             /* set dbp to e-o-token */
4841           get_tagname = FALSE;
4842           found_tag = TRUE;
4843           continue;
4844
4845           /* And proceed to check for "extern". */
4846         }
4847       else if (!incomment && !inquote && !found_tag)
4848         {
4849           /* Check for proc/fn keywords. */
4850           switch (lowcase (c))
4851             {
4852             case 'p':
4853               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4854                 get_tagname = TRUE;
4855               continue;
4856             case 'f':
4857               if (nocase_tail ("unction"))
4858                 get_tagname = TRUE;
4859               continue;
4860             }
4861         }
4862     } /* while not eof */
4863
4864   free (tline.buffer);
4865 }
4866
4867 \f
4868 /*
4869  * Lisp tag functions
4870  *  look for (def or (DEF, quote or QUOTE
4871  */
4872
4873 static void L_getit __P((void));
4874
4875 static void
4876 L_getit ()
4877 {
4878   if (*dbp == '\'')             /* Skip prefix quote */
4879     dbp++;
4880   else if (*dbp == '(')
4881   {
4882     dbp++;
4883     /* Try to skip "(quote " */
4884     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4885       /* Ok, then skip "(" before name in (defstruct (foo)) */
4886       dbp = skip_spaces (dbp);
4887   }
4888   get_tag (dbp, NULL);
4889 }
4890
4891 static void
4892 Lisp_functions (inf)
4893      FILE *inf;
4894 {
4895   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4896     {
4897       if (dbp[0] != '(')
4898         continue;
4899
4900       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4901         {
4902           dbp = skip_non_spaces (dbp);
4903           dbp = skip_spaces (dbp);
4904           L_getit ();
4905         }
4906       else
4907         {
4908           /* Check for (foo::defmumble name-defined ... */
4909           do
4910             dbp++;
4911           while (!notinname (*dbp) && *dbp != ':');
4912           if (*dbp == ':')
4913             {
4914               do
4915                 dbp++;
4916               while (*dbp == ':');
4917
4918               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4919                 {
4920                   dbp = skip_non_spaces (dbp);
4921                   dbp = skip_spaces (dbp);
4922                   L_getit ();
4923                 }
4924             }
4925         }
4926     }
4927 }
4928
4929 \f
4930 /*
4931  * Lua script language parsing
4932  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4933  *
4934  *  "function" and "local function" are tags if they start at column 1.
4935  */
4936 static void
4937 Lua_functions (inf)
4938      FILE *inf;
4939 {
4940   register char *bp;
4941
4942   LOOP_ON_INPUT_LINES (inf, lb, bp)
4943     {
4944       if (bp[0] != 'f' && bp[0] != 'l')
4945         continue;
4946
4947       LOOKING_AT (bp, "local"); /* skip possible "local" */
4948
4949       if (LOOKING_AT (bp, "function"))
4950         get_tag (bp, NULL);
4951     }
4952 }
4953
4954 \f
4955 /*
4956  * Postscript tag functions
4957  * Just look for lines where the first character is '/'
4958  * Also look at "defineps" for PSWrap
4959  * Ideas by:
4960  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4961  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4962  */
4963 static void
4964 PS_functions (inf)
4965      FILE *inf;
4966 {
4967   register char *bp, *ep;
4968
4969   LOOP_ON_INPUT_LINES (inf, lb, bp)
4970     {
4971       if (bp[0] == '/')
4972         {
4973           for (ep = bp+1;
4974                *ep != '\0' && *ep != ' ' && *ep != '{';
4975                ep++)
4976             continue;
4977           make_tag (bp, ep - bp, TRUE,
4978                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4979         }
4980       else if (LOOKING_AT (bp, "defineps"))
4981         get_tag (bp, NULL);
4982     }
4983 }
4984
4985 \f
4986 /*
4987  * Scheme tag functions
4988  * look for (def... xyzzy
4989  *          (def... (xyzzy
4990  *          (def ... ((...(xyzzy ....
4991  *          (set! xyzzy
4992  * Original code by Ken Haase (1985?)
4993  */
4994
4995 static void
4996 Scheme_functions (inf)
4997      FILE *inf;
4998 {
4999   register char *bp;
5000
5001   LOOP_ON_INPUT_LINES (inf, lb, bp)
5002     {
5003       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5004         {
5005           bp = skip_non_spaces (bp+4);
5006           /* Skip over open parens and white space */
5007           while (notinname (*bp))
5008             bp++;
5009           get_tag (bp, NULL);
5010         }
5011       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5012         get_tag (bp, NULL);
5013     }
5014 }
5015
5016 \f
5017 /* Find tags in TeX and LaTeX input files.  */
5018
5019 /* TEX_toktab is a table of TeX control sequences that define tags.
5020  * Each entry records one such control sequence.
5021  *
5022  * Original code from who knows whom.
5023  * Ideas by:
5024  *   Stefan Monnier (2002)
5025  */
5026
5027 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5028
5029 /* Default set of control sequences to put into TEX_toktab.
5030    The value of environment var TEXTAGS is prepended to this.  */
5031 static char *TEX_defenv = "\
5032 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5033 :part:appendix:entry:index:def\
5034 :newcommand:renewcommand:newenvironment:renewenvironment";
5035
5036 static void TEX_mode __P((FILE *));
5037 static void TEX_decode_env __P((char *, char *));
5038
5039 static char TEX_esc = '\\';
5040 static char TEX_opgrp = '{';
5041 static char TEX_clgrp = '}';
5042
5043 /*
5044  * TeX/LaTeX scanning loop.
5045  */
5046 static void
5047 TeX_commands (inf)
5048      FILE *inf;
5049 {
5050   char *cp;
5051   linebuffer *key;
5052
5053   /* Select either \ or ! as escape character.  */
5054   TEX_mode (inf);
5055
5056   /* Initialize token table once from environment. */
5057   if (TEX_toktab == NULL)
5058     TEX_decode_env ("TEXTAGS", TEX_defenv);
5059
5060   LOOP_ON_INPUT_LINES (inf, lb, cp)
5061     {
5062       /* Look at each TEX keyword in line. */
5063       for (;;)
5064         {
5065           /* Look for a TEX escape. */
5066           while (*cp++ != TEX_esc)
5067             if (cp[-1] == '\0' || cp[-1] == '%')
5068               goto tex_next_line;
5069
5070           for (key = TEX_toktab; key->buffer != NULL; key++)
5071             if (strneq (cp, key->buffer, key->len))
5072               {
5073                 register char *p;
5074                 int namelen, linelen;
5075                 bool opgrp = FALSE;
5076
5077                 cp = skip_spaces (cp + key->len);
5078                 if (*cp == TEX_opgrp)
5079                   {
5080                     opgrp = TRUE;
5081                     cp++;
5082                   }
5083                 for (p = cp;
5084                      (!iswhite (*p) && *p != '#' &&
5085                       *p != TEX_opgrp && *p != TEX_clgrp);
5086                      p++)
5087                   continue;
5088                 namelen = p - cp;
5089                 linelen = lb.len;
5090                 if (!opgrp || *p == TEX_clgrp)
5091                   {
5092                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5093                       *p++;
5094                     linelen = p - lb.buffer + 1;
5095                   }
5096                 make_tag (cp, namelen, TRUE,
5097                           lb.buffer, linelen, lineno, linecharno);
5098                 goto tex_next_line; /* We only tag a line once */
5099               }
5100         }
5101     tex_next_line:
5102       ;
5103     }
5104 }
5105
5106 #define TEX_LESC '\\'
5107 #define TEX_SESC '!'
5108
5109 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5110    chars accordingly. */
5111 static void
5112 TEX_mode (inf)
5113      FILE *inf;
5114 {
5115   int c;
5116
5117   while ((c = getc (inf)) != EOF)
5118     {
5119       /* Skip to next line if we hit the TeX comment char. */
5120       if (c == '%')
5121         while (c != '\n')
5122           c = getc (inf);
5123       else if (c == TEX_LESC || c == TEX_SESC )
5124         break;
5125     }
5126
5127   if (c == TEX_LESC)
5128     {
5129       TEX_esc = TEX_LESC;
5130       TEX_opgrp = '{';
5131       TEX_clgrp = '}';
5132     }
5133   else
5134     {
5135       TEX_esc = TEX_SESC;
5136       TEX_opgrp = '<';
5137       TEX_clgrp = '>';
5138     }
5139   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5140      No attempt is made to correct the situation. */
5141   rewind (inf);
5142 }
5143
5144 /* Read environment and prepend it to the default string.
5145    Build token table. */
5146 static void
5147 TEX_decode_env (evarname, defenv)
5148      char *evarname;
5149      char *defenv;
5150 {
5151   register char *env, *p;
5152   int i, len;
5153
5154   /* Append default string to environment. */
5155   env = getenv (evarname);
5156   if (!env)
5157     env = defenv;
5158   else
5159     {
5160       char *oldenv = env;
5161       env = concat (oldenv, defenv, "");
5162     }
5163
5164   /* Allocate a token table */
5165   for (len = 1, p = env; p;)
5166     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5167       len++;
5168   TEX_toktab = xnew (len, linebuffer);
5169
5170   /* Unpack environment string into token table. Be careful about */
5171   /* zero-length strings (leading ':', "::" and trailing ':') */
5172   for (i = 0; *env != '\0';)
5173     {
5174       p = etags_strchr (env, ':');
5175       if (!p)                   /* End of environment string. */
5176         p = env + strlen (env);
5177       if (p - env > 0)
5178         {                       /* Only non-zero strings. */
5179           TEX_toktab[i].buffer = savenstr (env, p - env);
5180           TEX_toktab[i].len = p - env;
5181           i++;
5182         }
5183       if (*p)
5184         env = p + 1;
5185       else
5186         {
5187           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5188           TEX_toktab[i].len = 0;
5189           break;
5190         }
5191     }
5192 }
5193
5194 \f
5195 /* Texinfo support.  Dave Love, Mar. 2000.  */
5196 static void
5197 Texinfo_nodes (inf)
5198      FILE * inf;
5199 {
5200   char *cp, *start;
5201   LOOP_ON_INPUT_LINES (inf, lb, cp)
5202     if (LOOKING_AT (cp, "@node"))
5203       {
5204         start = cp;
5205         while (*cp != '\0' && *cp != ',')
5206           cp++;
5207         make_tag (start, cp - start, TRUE,
5208                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5209       }
5210 }
5211
5212 \f
5213 /* Similar to LOOKING_AT but does not use notinname, does not skip */
5214 #define LOOKING_AT_NOCASE(cp, kw)       /* kw is a constant string */   \
5215   (strncaseeq ((cp), kw, sizeof(kw)-1)  /* cp points at kw */           \
5216    && ((cp) += sizeof(kw)-1))           /* skip spaces */
5217
5218 /*
5219  * HTML support.
5220  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5221  * Contents of <a name=xxx> are tags with name xxx.
5222  *
5223  * Francesco Potortì, 2002.
5224  */
5225 static void
5226 HTML_labels (inf)
5227      FILE * inf;
5228 {
5229   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5230   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5231   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5232   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5233   char *end;
5234
5235
5236   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5237
5238   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5239     for (;;)                    /* loop on the same line */
5240       {
5241         if (skiptag)            /* skip HTML tag */
5242           {
5243             while (*dbp != '\0' && *dbp != '>')
5244               dbp++;
5245             if (*dbp == '>')
5246               {
5247                 dbp += 1;
5248                 skiptag = FALSE;
5249                 continue;       /* look on the same line */
5250               }
5251             break;              /* go to next line */
5252           }
5253
5254         else if (intag) /* look for "name=" or "id=" */
5255           {
5256             while (*dbp != '\0' && *dbp != '>'
5257                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5258               dbp++;
5259             if (*dbp == '\0')
5260               break;            /* go to next line */
5261             if (*dbp == '>')
5262               {
5263                 dbp += 1;
5264                 intag = FALSE;
5265                 continue;       /* look on the same line */
5266               }
5267             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5268                 || LOOKING_AT_NOCASE (dbp, "id="))
5269               {
5270                 bool quoted = (dbp[0] == '"');
5271
5272                 if (quoted)
5273                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5274                     continue;
5275                 else
5276                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5277                     continue;
5278                 linebuffer_setlen (&token_name, end - dbp);
5279                 strncpy (token_name.buffer, dbp, end - dbp);
5280                 token_name.buffer[end - dbp] = '\0';
5281
5282                 dbp = end;
5283                 intag = FALSE;  /* we found what we looked for */
5284                 skiptag = TRUE; /* skip to the end of the tag */
5285                 getnext = TRUE; /* then grab the text */
5286                 continue;       /* look on the same line */
5287               }
5288             dbp += 1;
5289           }
5290
5291         else if (getnext)       /* grab next tokens and tag them */
5292           {
5293             dbp = skip_spaces (dbp);
5294             if (*dbp == '\0')
5295               break;            /* go to next line */
5296             if (*dbp == '<')
5297               {
5298                 intag = TRUE;
5299                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5300                 continue;       /* look on the same line */
5301               }
5302
5303             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5304               continue;
5305             make_tag (token_name.buffer, token_name.len, TRUE,
5306                       dbp, end - dbp, lineno, linecharno);
5307             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5308             getnext = FALSE;
5309             break;              /* go to next line */
5310           }
5311
5312         else                    /* look for an interesting HTML tag */
5313           {
5314             while (*dbp != '\0' && *dbp != '<')
5315               dbp++;
5316             if (*dbp == '\0')
5317               break;            /* go to next line */
5318             intag = TRUE;
5319             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5320               {
5321                 inanchor = TRUE;
5322                 continue;       /* look on the same line */
5323               }
5324             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5325                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5326                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5327                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5328               {
5329                 intag = FALSE;
5330                 getnext = TRUE;
5331                 continue;       /* look on the same line */
5332               }
5333             dbp += 1;
5334           }
5335       }
5336 }
5337
5338 \f
5339 /*
5340  * Prolog support
5341  *
5342  * Assumes that the predicate or rule starts at column 0.
5343  * Only the first clause of a predicate or rule is added.
5344  * Original code by Sunichirou Sugou (1989)
5345  * Rewritten by Anders Lindgren (1996)
5346  */
5347 static int prolog_pr __P((char *, char *));
5348 static void prolog_skip_comment __P((linebuffer *, FILE *));
5349 static int prolog_atom __P((char *, int));
5350
5351 static void
5352 Prolog_functions (inf)
5353      FILE *inf;
5354 {
5355   char *cp, *last;
5356   int len;
5357   int allocated;
5358
5359   allocated = 0;
5360   len = 0;
5361   last = NULL;
5362
5363   LOOP_ON_INPUT_LINES (inf, lb, cp)
5364     {
5365       if (cp[0] == '\0')        /* Empty line */
5366         continue;
5367       else if (iswhite (cp[0])) /* Not a predicate */
5368         continue;
5369       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5370         prolog_skip_comment (&lb, inf);
5371       else if ((len = prolog_pr (cp, last)) > 0)
5372         {
5373           /* Predicate or rule.  Store the function name so that we
5374              only generate a tag for the first clause.  */
5375           if (last == NULL)
5376             last = xnew(len + 1, char);
5377           else if (len + 1 > allocated)
5378             xrnew (last, len + 1, char);
5379           allocated = len + 1;
5380           strncpy (last, cp, len);
5381           last[len] = '\0';
5382         }
5383     }
5384 }
5385
5386
5387 static void
5388 prolog_skip_comment (plb, inf)
5389      linebuffer *plb;
5390      FILE *inf;
5391 {
5392   char *cp;
5393
5394   do
5395     {
5396       for (cp = plb->buffer; *cp != '\0'; cp++)
5397         if (cp[0] == '*' && cp[1] == '/')
5398           return;
5399       readline (plb, inf);
5400     }
5401   while (!feof(inf));
5402 }
5403
5404 /*
5405  * A predicate or rule definition is added if it matches:
5406  *     <beginning of line><Prolog Atom><whitespace>(
5407  * or  <beginning of line><Prolog Atom><whitespace>:-
5408  *
5409  * It is added to the tags database if it doesn't match the
5410  * name of the previous clause header.
5411  *
5412  * Return the size of the name of the predicate or rule, or 0 if no
5413  * header was found.
5414  */
5415 static int
5416 prolog_pr (s, last)
5417      char *s;
5418      char *last;                /* Name of last clause. */
5419 {
5420   int pos;
5421   int len;
5422
5423   pos = prolog_atom (s, 0);
5424   if (pos < 1)
5425     return 0;
5426
5427   len = pos;
5428   pos = skip_spaces (s + pos) - s;
5429
5430   if ((s[pos] == '.'
5431        || (s[pos] == '(' && (pos += 1))
5432        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5433       && (last == NULL          /* save only the first clause */
5434           || len != strlen (last)
5435           || !strneq (s, last, len)))
5436         {
5437           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5438           return len;
5439         }
5440   else
5441     return 0;
5442 }
5443
5444 /*
5445  * Consume a Prolog atom.
5446  * Return the number of bytes consumed, or -1 if there was an error.
5447  *
5448  * A prolog atom, in this context, could be one of:
5449  * - An alphanumeric sequence, starting with a lower case letter.
5450  * - A quoted arbitrary string. Single quotes can escape themselves.
5451  *   Backslash quotes everything.
5452  */
5453 static int
5454 prolog_atom (s, pos)
5455      char *s;
5456      int pos;
5457 {
5458   int origpos;
5459
5460   origpos = pos;
5461
5462   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5463     {
5464       /* The atom is unquoted. */
5465       pos++;
5466       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5467         {
5468           pos++;
5469         }
5470       return pos - origpos;
5471     }
5472   else if (s[pos] == '\'')
5473     {
5474       pos++;
5475
5476       for (;;)
5477         {
5478           if (s[pos] == '\'')
5479             {
5480               pos++;
5481               if (s[pos] != '\'')
5482                 break;
5483               pos++;            /* A double quote */
5484             }
5485           else if (s[pos] == '\0')
5486             /* Multiline quoted atoms are ignored. */
5487             return -1;
5488           else if (s[pos] == '\\')
5489             {
5490               if (s[pos+1] == '\0')
5491                 return -1;
5492               pos += 2;
5493             }
5494           else
5495             pos++;
5496         }
5497       return pos - origpos;
5498     }
5499   else
5500     return -1;
5501 }
5502
5503 \f
5504 /*
5505  * Support for Erlang
5506  *
5507  * Generates tags for functions, defines, and records.
5508  * Assumes that Erlang functions start at column 0.
5509  * Original code by Anders Lindgren (1996)
5510  */
5511 static int erlang_func __P((char *, char *));
5512 static void erlang_attribute __P((char *));
5513 static int erlang_atom __P((char *));
5514
5515 static void
5516 Erlang_functions (inf)
5517      FILE *inf;
5518 {
5519   char *cp, *last;
5520   int len;
5521   int allocated;
5522
5523   allocated = 0;
5524   len = 0;
5525   last = NULL;
5526
5527   LOOP_ON_INPUT_LINES (inf, lb, cp)
5528     {
5529       if (cp[0] == '\0')        /* Empty line */
5530         continue;
5531       else if (iswhite (cp[0])) /* Not function nor attribute */
5532         continue;
5533       else if (cp[0] == '%')    /* comment */
5534         continue;
5535       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5536         continue;
5537       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5538         {
5539           erlang_attribute (cp);
5540           last = NULL;
5541         }
5542       else if ((len = erlang_func (cp, last)) > 0)
5543         {
5544           /*
5545            * Function.  Store the function name so that we only
5546            * generates a tag for the first clause.
5547            */
5548           if (last == NULL)
5549             last = xnew (len + 1, char);
5550           else if (len + 1 > allocated)
5551             xrnew (last, len + 1, char);
5552           allocated = len + 1;
5553           strncpy (last, cp, len);
5554           last[len] = '\0';
5555         }
5556     }
5557 }
5558
5559
5560 /*
5561  * A function definition is added if it matches:
5562  *     <beginning of line><Erlang Atom><whitespace>(
5563  *
5564  * It is added to the tags database if it doesn't match the
5565  * name of the previous clause header.
5566  *
5567  * Return the size of the name of the function, or 0 if no function
5568  * was found.
5569  */
5570 static int
5571 erlang_func (s, last)
5572      char *s;
5573      char *last;                /* Name of last clause. */
5574 {
5575   int pos;
5576   int len;
5577
5578   pos = erlang_atom (s);
5579   if (pos < 1)
5580     return 0;
5581
5582   len = pos;
5583   pos = skip_spaces (s + pos) - s;
5584
5585   /* Save only the first clause. */
5586   if (s[pos++] == '('
5587       && (last == NULL
5588           || len != (int)strlen (last)
5589           || !strneq (s, last, len)))
5590         {
5591           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5592           return len;
5593         }
5594
5595   return 0;
5596 }
5597
5598
5599 /*
5600  * Handle attributes.  Currently, tags are generated for defines
5601  * and records.
5602  *
5603  * They are on the form:
5604  * -define(foo, bar).
5605  * -define(Foo(M, N), M+N).
5606  * -record(graph, {vtab = notable, cyclic = true}).
5607  */
5608 static void
5609 erlang_attribute (s)
5610      char *s;
5611 {
5612   char *cp = s;
5613
5614   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5615       && *cp++ == '(')
5616     {
5617       int len = erlang_atom (skip_spaces (cp));
5618       if (len > 0)
5619         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5620     }
5621   return;
5622 }
5623
5624
5625 /*
5626  * Consume an Erlang atom (or variable).
5627  * Return the number of bytes consumed, or -1 if there was an error.
5628  */
5629 static int
5630 erlang_atom (s)
5631      char *s;
5632 {
5633   int pos = 0;
5634
5635   if (ISALPHA (s[pos]) || s[pos] == '_')
5636     {
5637       /* The atom is unquoted. */
5638       do
5639         pos++;
5640       while (ISALNUM (s[pos]) || s[pos] == '_');
5641     }
5642   else if (s[pos] == '\'')
5643     {
5644       for (pos++; s[pos] != '\''; pos++)
5645         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5646             || (s[pos] == '\\' && s[++pos] == '\0'))
5647           return 0;
5648       pos++;
5649     }
5650
5651   return pos;
5652 }
5653
5654 \f
5655 #ifdef ETAGS_REGEXPS
5656
5657 static char *scan_separators __P((char *));
5658 static void add_regex __P((char *, language *));
5659 static char *substitute __P((char *, char *, struct re_registers *));
5660
5661 /*
5662  * Take a string like "/blah/" and turn it into "blah", verifying
5663  * that the first and last characters are the same, and handling
5664  * quoted separator characters.  Actually, stops on the occurrence of
5665  * an unquoted separator.  Also process \t, \n, etc. and turn into
5666  * appropriate characters. Works in place.  Null terminates name string.
5667  * Returns pointer to terminating separator, or NULL for
5668  * unterminated regexps.
5669  */
5670 static char *
5671 scan_separators (name)
5672      char *name;
5673 {
5674   char sep = name[0];
5675   char *copyto = name;
5676   bool quoted = FALSE;
5677
5678   for (++name; *name != '\0'; ++name)
5679     {
5680       if (quoted)
5681         {
5682           switch (*name)
5683             {
5684             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5685             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5686             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5687             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5688             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5689             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5690             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5691             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5692             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5693             default:
5694               if (*name == sep)
5695                 *copyto++ = sep;
5696               else
5697                 {
5698                   /* Something else is quoted, so preserve the quote. */
5699                   *copyto++ = '\\';
5700                   *copyto++ = *name;
5701                 }
5702               break;
5703             }
5704           quoted = FALSE;
5705         }
5706       else if (*name == '\\')
5707         quoted = TRUE;
5708       else if (*name == sep)
5709         break;
5710       else
5711         *copyto++ = *name;
5712     }
5713   if (*name != sep)
5714     name = NULL;                /* signal unterminated regexp */
5715
5716   /* Terminate copied string. */
5717   *copyto = '\0';
5718   return name;
5719 }
5720
5721 /* Look at the argument of --regex or --no-regex and do the right
5722    thing.  Same for each line of a regexp file. */
5723 static void
5724 analyse_regex (regex_arg)
5725      char *regex_arg;
5726 {
5727   if (regex_arg == NULL)
5728     {
5729       free_regexps ();          /* --no-regex: remove existing regexps */
5730       return;
5731     }
5732
5733   /* A real --regexp option or a line in a regexp file. */
5734   switch (regex_arg[0])
5735     {
5736       /* Comments in regexp file or null arg to --regex. */
5737     case '\0':
5738     case ' ':
5739     case '\t':
5740       break;
5741
5742       /* Read a regex file.  This is recursive and may result in a
5743          loop, which will stop when the file descriptors are exhausted. */
5744     case '@':
5745       {
5746         FILE *regexfp;
5747         linebuffer regexbuf;
5748         char *regexfile = regex_arg + 1;
5749
5750         /* regexfile is a file containing regexps, one per line. */
5751         regexfp = fopen (regexfile, "r");
5752         if (regexfp == NULL)
5753           {
5754             pfatal (regexfile);
5755             return;
5756           }
5757         linebuffer_init (&regexbuf);
5758         while (readline_internal (&regexbuf, regexfp) > 0)
5759           analyse_regex (regexbuf.buffer);
5760         free (regexbuf.buffer);
5761         fclose (regexfp);
5762       }
5763       break;
5764
5765       /* Regexp to be used for a specific language only. */
5766     case '{':
5767       {
5768         language *lang;
5769         char *lang_name = regex_arg + 1;
5770         char *cp;
5771
5772         for (cp = lang_name; *cp != '}'; cp++)
5773           if (*cp == '\0')
5774             {
5775               error ("unterminated language name in regex: %s", regex_arg);
5776               return;
5777             }
5778         *cp++ = '\0';
5779         lang = get_language_from_langname (lang_name);
5780         if (lang == NULL)
5781           return;
5782         add_regex (cp, lang);
5783       }
5784       break;
5785
5786       /* Regexp to be used for any language. */
5787     default:
5788       add_regex (regex_arg, NULL);
5789       break;
5790     }
5791 }
5792
5793 /* Separate the regexp pattern, compile it,
5794    and care for optional name and modifiers. */
5795 static void
5796 add_regex (regexp_pattern, lang)
5797      char *regexp_pattern;
5798      language *lang;
5799 {
5800   static struct re_pattern_buffer zeropattern;
5801   char sep, *pat, *name, *modifiers;
5802   const char *err;
5803   struct re_pattern_buffer *patbuf;
5804   regexp *rp;
5805   bool
5806     force_explicit_name = TRUE, /* do not use implicit tag names */
5807     ignore_case = FALSE,        /* case is significant */
5808     multi_line = FALSE,         /* matches are done one line at a time */
5809     single_line = FALSE;        /* dot does not match newline */
5810
5811
5812   if (strlen(regexp_pattern) < 3)
5813     {
5814       error ("null regexp", (char *)NULL);
5815       return;
5816     }
5817   sep = regexp_pattern[0];
5818   name = scan_separators (regexp_pattern);
5819   if (name == NULL)
5820     {
5821       error ("%s: unterminated regexp", regexp_pattern);
5822       return;
5823     }
5824   if (name[1] == sep)
5825     {
5826       error ("null name for regexp \"%s\"", regexp_pattern);
5827       return;
5828     }
5829   modifiers = scan_separators (name);
5830   if (modifiers == NULL)        /* no terminating separator --> no name */
5831     {
5832       modifiers = name;
5833       name = "";
5834     }
5835   else
5836     modifiers += 1;             /* skip separator */
5837
5838   /* Parse regex modifiers. */
5839   for (; modifiers[0] != '\0'; modifiers++)
5840     switch (modifiers[0])
5841       {
5842       case 'N':
5843         if (modifiers == name)
5844           error ("forcing explicit tag name but no name, ignoring", NULL);
5845         force_explicit_name = TRUE;
5846         break;
5847       case 'i':
5848         ignore_case = TRUE;
5849         break;
5850       case 's':
5851         single_line = TRUE;
5852         /* FALLTHRU */
5853       case 'm':
5854         multi_line = TRUE;
5855         need_filebuf = TRUE;
5856         break;
5857       default:
5858         {
5859           char wrongmod [2];
5860           wrongmod[0] = modifiers[0];
5861           wrongmod[1] = '\0';
5862           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5863         }
5864         break;
5865       }
5866
5867   patbuf = xnew (1, struct re_pattern_buffer);
5868   *patbuf = zeropattern;
5869   if (ignore_case)
5870     {
5871       static char lc_trans[CHARS];
5872       int i;
5873       for (i = 0; i < CHARS; i++)
5874         lc_trans[i] = lowcase (i);
5875       patbuf->translate = lc_trans;     /* translation table to fold case  */
5876     }
5877
5878   if (multi_line)
5879     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5880   else
5881     pat = regexp_pattern;
5882
5883   if (single_line)
5884     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5885   else
5886     re_set_syntax (RE_SYNTAX_EMACS);
5887
5888   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5889   if (multi_line)
5890     free (pat);
5891   if (err != NULL)
5892     {
5893       error ("%s while compiling pattern", err);
5894       return;
5895     }
5896
5897   rp = p_head;
5898   p_head = xnew (1, regexp);
5899   p_head->pattern = savestr (regexp_pattern);
5900   p_head->p_next = rp;
5901   p_head->lang = lang;
5902   p_head->pat = patbuf;
5903   p_head->name = savestr (name);
5904   p_head->error_signaled = FALSE;
5905   p_head->force_explicit_name = force_explicit_name;
5906   p_head->ignore_case = ignore_case;
5907   p_head->multi_line = multi_line;
5908 }
5909
5910 /*
5911  * Do the substitutions indicated by the regular expression and
5912  * arguments.
5913  */
5914 static char *
5915 substitute (in, out, regs)
5916      char *in, *out;
5917      struct re_registers *regs;
5918 {
5919   char *result, *t;
5920   int size, dig, diglen;
5921
5922   result = NULL;
5923   size = strlen (out);
5924
5925   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5926   if (out[size - 1] == '\\')
5927     fatal ("pattern error in \"%s\"", out);
5928   for (t = etags_strchr (out, '\\');
5929        t != NULL;
5930        t = etags_strchr (t + 2, '\\'))
5931     if (ISDIGIT (t[1]))
5932       {
5933         dig = t[1] - '0';
5934         diglen = regs->end[dig] - regs->start[dig];
5935         size += diglen - 2;
5936       }
5937     else
5938       size -= 1;
5939
5940   /* Allocate space and do the substitutions. */
5941   assert (size >= 0);
5942   result = xnew (size + 1, char);
5943
5944   for (t = result; *out != '\0'; out++)
5945     if (*out == '\\' && ISDIGIT (*++out))
5946       {
5947         dig = *out - '0';
5948         diglen = regs->end[dig] - regs->start[dig];
5949         strncpy (t, in + regs->start[dig], diglen);
5950         t += diglen;
5951       }
5952     else
5953       *t++ = *out;
5954   *t = '\0';
5955
5956   assert (t <= result + size);
5957   assert (t - result == (int)strlen (result));
5958
5959   return result;
5960 }
5961
5962 /* Deallocate all regexps. */
5963 static void
5964 free_regexps ()
5965 {
5966   regexp *rp;
5967   while (p_head != NULL)
5968     {
5969       rp = p_head->p_next;
5970       free (p_head->pattern);
5971       free (p_head->name);
5972       free (p_head);
5973       p_head = rp;
5974     }
5975   return;
5976 }
5977
5978 /*
5979  * Reads the whole file as a single string from `filebuf' and looks for
5980  * multi-line regular expressions, creating tags on matches.
5981  * readline already dealt with normal regexps.
5982  *
5983  * Idea by Ben Wing <ben@666.com> (2002).
5984  */
5985 static void
5986 regex_tag_multiline ()
5987 {
5988   char *buffer = filebuf.buffer;
5989   regexp *rp;
5990   char *name;
5991
5992   for (rp = p_head; rp != NULL; rp = rp->p_next)
5993     {
5994       int match = 0;
5995
5996       if (!rp->multi_line)
5997         continue;               /* skip normal regexps */
5998
5999       /* Generic initialisations before parsing file from memory. */
6000       lineno = 1;               /* reset global line number */
6001       charno = 0;               /* reset global char number */
6002       linecharno = 0;           /* reset global char number of line start */
6003
6004       /* Only use generic regexps or those for the current language. */
6005       if (rp->lang != NULL && rp->lang != curfdp->lang)
6006         continue;
6007
6008       while (match >= 0 && match < filebuf.len)
6009         {
6010           match = re_search (rp->pat, buffer, filebuf.len, charno,
6011                              filebuf.len - match, &rp->regs);
6012           switch (match)
6013             {
6014             case -2:
6015               /* Some error. */
6016               if (!rp->error_signaled)
6017                 {
6018                   error ("regexp stack overflow while matching \"%s\"",
6019                          rp->pattern);
6020                   rp->error_signaled = TRUE;
6021                 }
6022               break;
6023             case -1:
6024               /* No match. */
6025               break;
6026             default:
6027               if (match == rp->regs.end[0])
6028                 {
6029                   if (!rp->error_signaled)
6030                     {
6031                       error ("regexp matches the empty string: \"%s\"",
6032                              rp->pattern);
6033                       rp->error_signaled = TRUE;
6034                     }
6035                   match = -3;   /* exit from while loop */
6036                   break;
6037                 }
6038
6039               /* Match occurred.  Construct a tag. */
6040               while (charno < rp->regs.end[0])
6041                 if (buffer[charno++] == '\n')
6042                   lineno++, linecharno = charno;
6043               name = rp->name;
6044               if (name[0] == '\0')
6045                 name = NULL;
6046               else /* make a named tag */
6047                 name = substitute (buffer, rp->name, &rp->regs);
6048               if (rp->force_explicit_name)
6049                 /* Force explicit tag name, if a name is there. */
6050                 pfnote (name, TRUE, buffer + linecharno,
6051                         charno - linecharno + 1, lineno, linecharno);
6052               else
6053                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6054                           charno - linecharno + 1, lineno, linecharno);
6055               break;
6056             }
6057         }
6058     }
6059 }
6060
6061 #endif /* ETAGS_REGEXPS */
6062
6063 \f
6064 static bool
6065 nocase_tail (cp)
6066      char *cp;
6067 {
6068   register int len = 0;
6069
6070   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6071     cp++, len++;
6072   if (*cp == '\0' && !intoken (dbp[len]))
6073     {
6074       dbp += len;
6075       return TRUE;
6076     }
6077   return FALSE;
6078 }
6079
6080 static void
6081 get_tag (bp, namepp)
6082      register char *bp;
6083      char **namepp;
6084 {
6085   register char *cp = bp;
6086
6087   if (*bp != '\0')
6088     {
6089       /* Go till you get to white space or a syntactic break */
6090       for (cp = bp + 1; !notinname (*cp); cp++)
6091         continue;
6092       make_tag (bp, cp - bp, TRUE,
6093                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6094     }
6095
6096   if (namepp != NULL)
6097     *namepp = savenstr (bp, cp - bp);
6098 }
6099
6100 /*
6101  * Read a line of text from `stream' into `lbp', excluding the
6102  * newline or CR-NL, if any.  Return the number of characters read from
6103  * `stream', which is the length of the line including the newline.
6104  *
6105  * On DOS or Windows we do not count the CR character, if any before the
6106  * NL, in the returned length; this mirrors the behavior of Emacs on those
6107  * platforms (for text files, it translates CR-NL to NL as it reads in the
6108  * file).
6109  *
6110  * If multi-line regular expressions are requested, each line read is
6111  * appended to `filebuf'.
6112  */
6113 static long
6114 readline_internal (lbp, stream)
6115      linebuffer *lbp;
6116      register FILE *stream;
6117 {
6118   char *buffer = lbp->buffer;
6119   register char *p = lbp->buffer;
6120   register char *pend;
6121   int chars_deleted;
6122
6123   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6124
6125   for (;;)
6126     {
6127       register int c = getc (stream);
6128       if (p == pend)
6129         {
6130           /* We're at the end of linebuffer: expand it. */
6131           lbp->size *= 2;
6132           xrnew (buffer, lbp->size, char);
6133           p += buffer - lbp->buffer;
6134           pend = buffer + lbp->size;
6135           lbp->buffer = buffer;
6136         }
6137       if (c == EOF)
6138         {
6139           *p = '\0';
6140           chars_deleted = 0;
6141           break;
6142         }
6143       if (c == '\n')
6144         {
6145           if (p > buffer && p[-1] == '\r')
6146             {
6147               p -= 1;
6148 #ifdef DOS_NT
6149              /* Assume CRLF->LF translation will be performed by Emacs
6150                 when loading this file, so CRs won't appear in the buffer.
6151                 It would be cleaner to compensate within Emacs;
6152                 however, Emacs does not know how many CRs were deleted
6153                 before any given point in the file.  */
6154               chars_deleted = 1;
6155 #else
6156               chars_deleted = 2;
6157 #endif
6158             }
6159           else
6160             {
6161               chars_deleted = 1;
6162             }
6163           *p = '\0';
6164           break;
6165         }
6166       *p++ = c;
6167     }
6168   lbp->len = p - buffer;
6169
6170   if (need_filebuf              /* we need filebuf for multi-line regexps */
6171       && chars_deleted > 0)     /* not at EOF */
6172     {
6173       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6174         {
6175           /* Expand filebuf. */
6176           filebuf.size *= 2;
6177           xrnew (filebuf.buffer, filebuf.size, char);
6178         }
6179       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6180       filebuf.len += lbp->len;
6181       filebuf.buffer[filebuf.len++] = '\n';
6182       filebuf.buffer[filebuf.len] = '\0';
6183     }
6184
6185   return lbp->len + chars_deleted;
6186 }
6187
6188 /*
6189  * Like readline_internal, above, but in addition try to match the
6190  * input line against relevant regular expressions and manage #line
6191  * directives.
6192  */
6193 static void
6194 readline (lbp, stream)
6195      linebuffer *lbp;
6196      FILE *stream;
6197 {
6198   long result;
6199
6200   linecharno = charno;          /* update global char number of line start */
6201   result = readline_internal (lbp, stream); /* read line */
6202   lineno += 1;                  /* increment global line number */
6203   charno += result;             /* increment global char number */
6204
6205   /* Honour #line directives. */
6206   if (!no_line_directive)
6207     {
6208       static bool discard_until_line_directive;
6209
6210       /* Check whether this is a #line directive. */
6211       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6212         {
6213           int start, lno;
6214
6215           if (DEBUG) start = 0; /* shut up the compiler */
6216           if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6217             {
6218               char *endp = lbp->buffer + start;
6219
6220               assert (start > 0);
6221               while ((endp = etags_strchr (endp, '"')) != NULL
6222                      && endp[-1] == '\\')
6223                 endp++;
6224               if (endp != NULL)
6225                 /* Ok, this is a real #line directive.  Let's deal with it. */
6226                 {
6227                   char *taggedabsname;  /* absolute name of original file */
6228                   char *taggedfname;    /* name of original file as given */
6229                   char *name;           /* temp var */
6230
6231                   discard_until_line_directive = FALSE; /* found it */
6232                   name = lbp->buffer + start;
6233                   *endp = '\0';
6234                   canonicalize_filename (name); /* for DOS */
6235                   taggedabsname = absolute_filename (name, curfdp->infabsdir);
6236                   if (filename_is_absolute (name)
6237                       || filename_is_absolute (curfdp->infname))
6238                     taggedfname = savestr (taggedabsname);
6239                   else
6240                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6241
6242                   if (streq (curfdp->taggedfname, taggedfname))
6243                     /* The #line directive is only a line number change.  We
6244                        deal with this afterwards. */
6245                     free (taggedfname);
6246                   else
6247                     /* The tags following this #line directive should be
6248                        attributed to taggedfname.  In order to do this, set
6249                        curfdp accordingly. */
6250                     {
6251                       fdesc *fdp; /* file description pointer */
6252
6253                       /* Go look for a file description already set up for the
6254                          file indicated in the #line directive.  If there is
6255                          one, use it from now until the next #line
6256                          directive. */
6257                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6258                         if (streq (fdp->infname, curfdp->infname)
6259                             && streq (fdp->taggedfname, taggedfname))
6260                           /* If we remove the second test above (after the &&)
6261                              then all entries pertaining to the same file are
6262                              coalesced in the tags file.  If we use it, then
6263                              entries pertaining to the same file but generated
6264                              from different files (via #line directives) will
6265                              go into separate sections in the tags file.  These
6266                              alternatives look equivalent.  The first one
6267                              destroys some apparently useless information. */
6268                           {
6269                             curfdp = fdp;
6270                             free (taggedfname);
6271                             break;
6272                           }
6273                       /* Else, if we already tagged the real file, skip all
6274                          input lines until the next #line directive. */
6275                       if (fdp == NULL) /* not found */
6276                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6277                           if (streq (fdp->infabsname, taggedabsname))
6278                             {
6279                               discard_until_line_directive = TRUE;
6280                               free (taggedfname);
6281                               break;
6282                             }
6283                       /* Else create a new file description and use that from
6284                          now on, until the next #line directive. */
6285                       if (fdp == NULL) /* not found */
6286                         {
6287                           fdp = fdhead;
6288                           fdhead = xnew (1, fdesc);
6289                           *fdhead = *curfdp; /* copy curr. file description */
6290                           fdhead->next = fdp;
6291                           fdhead->infname = savestr (curfdp->infname);
6292                           fdhead->infabsname = savestr (curfdp->infabsname);
6293                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6294                           fdhead->taggedfname = taggedfname;
6295                           fdhead->usecharno = FALSE;
6296                           fdhead->prop = NULL;
6297                           fdhead->written = FALSE;
6298                           curfdp = fdhead;
6299                         }
6300                     }
6301                   free (taggedabsname);
6302                   lineno = lno - 1;
6303                   readline (lbp, stream);
6304                   return;
6305                 } /* if a real #line directive */
6306             } /* if #line is followed by a a number */
6307         } /* if line begins with "#line " */
6308
6309       /* If we are here, no #line directive was found. */
6310       if (discard_until_line_directive)
6311         {
6312           if (result > 0)
6313             {
6314               /* Do a tail recursion on ourselves, thus discarding the contents
6315                  of the line buffer. */
6316               readline (lbp, stream);
6317               return;
6318             }
6319           /* End of file. */
6320           discard_until_line_directive = FALSE;
6321           return;
6322         }
6323     } /* if #line directives should be considered */
6324
6325 #ifdef ETAGS_REGEXPS
6326   {
6327     int match;
6328     regexp *rp;
6329     char *name;
6330
6331     /* Match against relevant regexps. */
6332     if (lbp->len > 0)
6333       for (rp = p_head; rp != NULL; rp = rp->p_next)
6334         {
6335           /* Only use generic regexps or those for the current language.
6336              Also do not use multiline regexps, which is the job of
6337              regex_tag_multiline. */
6338           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6339               || rp->multi_line)
6340             continue;
6341
6342           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6343           switch (match)
6344             {
6345             case -2:
6346               /* Some error. */
6347               if (!rp->error_signaled)
6348                 {
6349                   error ("regexp stack overflow while matching \"%s\"",
6350                          rp->pattern);
6351                   rp->error_signaled = TRUE;
6352                 }
6353               break;
6354             case -1:
6355               /* No match. */
6356               break;
6357             case 0:
6358               /* Empty string matched. */
6359               if (!rp->error_signaled)
6360                 {
6361                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6362                   rp->error_signaled = TRUE;
6363                 }
6364               break;
6365             default:
6366               /* Match occurred.  Construct a tag. */
6367               name = rp->name;
6368               if (name[0] == '\0')
6369                 name = NULL;
6370               else /* make a named tag */
6371                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6372               if (rp->force_explicit_name)
6373                 /* Force explicit tag name, if a name is there. */
6374                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6375               else
6376                 make_tag (name, strlen (name), TRUE,
6377                           lbp->buffer, match, lineno, linecharno);
6378               break;
6379             }
6380         }
6381   }
6382 #endif /* ETAGS_REGEXPS */
6383 }
6384
6385 \f
6386 /*
6387  * Return a pointer to a space of size strlen(cp)+1 allocated
6388  * with xnew where the string CP has been copied.
6389  */
6390 static char *
6391 savestr (cp)
6392      char *cp;
6393 {
6394   return savenstr (cp, strlen (cp));
6395 }
6396
6397 /*
6398  * Return a pointer to a space of size LEN+1 allocated with xnew where
6399  * the string CP has been copied for at most the first LEN characters.
6400  */
6401 static char *
6402 savenstr (cp, len)
6403      char *cp;
6404      int len;
6405 {
6406   register char *dp;
6407
6408   dp = xnew (len + 1, char);
6409   strncpy (dp, cp, len);
6410   dp[len] = '\0';
6411   return dp;
6412 }
6413
6414 /*
6415  * Return the ptr in sp at which the character c last
6416  * appears; NULL if not found
6417  *
6418  * Identical to POSIX strrchr, included for portability.
6419  */
6420 static char *
6421 etags_strrchr (sp, c)
6422      register const char *sp;
6423      register int c;
6424 {
6425   register const char *r;
6426
6427   r = NULL;
6428   do
6429     {
6430       if (*sp == c)
6431         r = sp;
6432   } while (*sp++);
6433   return (char *)r;
6434 }
6435
6436 /*
6437  * Return the ptr in sp at which the character c first
6438  * appears; NULL if not found
6439  *
6440  * Identical to POSIX strchr, included for portability.
6441  */
6442 static char *
6443 etags_strchr (sp, c)
6444      register const char *sp;
6445      register int c;
6446 {
6447   do
6448     {
6449       if (*sp == c)
6450         return (char *)sp;
6451     } while (*sp++);
6452   return NULL;
6453 }
6454
6455 /*
6456  * Compare two strings, ignoring case for alphabetic characters.
6457  *
6458  * Same as BSD's strcasecmp, included for portability.
6459  */
6460 static int
6461 etags_strcasecmp (s1, s2)
6462      register const char *s1;
6463      register const char *s2;
6464 {
6465   while (*s1 != '\0'
6466          && (ISALPHA (*s1) && ISALPHA (*s2)
6467              ? lowcase (*s1) == lowcase (*s2)
6468              : *s1 == *s2))
6469     s1++, s2++;
6470
6471   return (ISALPHA (*s1) && ISALPHA (*s2)
6472           ? lowcase (*s1) - lowcase (*s2)
6473           : *s1 - *s2);
6474 }
6475
6476 /*
6477  * Compare two strings, ignoring case for alphabetic characters.
6478  * Stop after a given number of characters
6479  *
6480  * Same as BSD's strncasecmp, included for portability.
6481  */
6482 static int
6483 etags_strncasecmp (s1, s2, n)
6484      register const char *s1;
6485      register const char *s2;
6486      register int n;
6487 {
6488   while (*s1 != '\0' && n-- > 0
6489          && (ISALPHA (*s1) && ISALPHA (*s2)
6490              ? lowcase (*s1) == lowcase (*s2)
6491              : *s1 == *s2))
6492     s1++, s2++;
6493
6494   if (n < 0)
6495     return 0;
6496   else
6497     return (ISALPHA (*s1) && ISALPHA (*s2)
6498             ? lowcase (*s1) - lowcase (*s2)
6499             : *s1 - *s2);
6500 }
6501
6502 /* Skip spaces, return new pointer. */
6503 static char *
6504 skip_spaces (cp)
6505      char *cp;
6506 {
6507   while (iswhite (*cp))
6508     cp++;
6509   return cp;
6510 }
6511
6512 /* Skip non spaces, return new pointer. */
6513 static char *
6514 skip_non_spaces (cp)
6515      char *cp;
6516 {
6517   while (*cp != '\0' && !iswhite (*cp))
6518     cp++;
6519   return cp;
6520 }
6521
6522 /* Print error message and exit.  */
6523 void
6524 fatal (s1, s2)
6525      char *s1, *s2;
6526 {
6527   error (s1, s2);
6528   exit (EXIT_FAILURE);
6529 }
6530
6531 static void
6532 pfatal (s1)
6533      char *s1;
6534 {
6535   perror (s1);
6536   exit (EXIT_FAILURE);
6537 }
6538
6539 static void
6540 suggest_asking_for_help ()
6541 {
6542   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6543            progname, LONG_OPTIONS ? "--help" : "-h");
6544   exit (EXIT_FAILURE);
6545 }
6546
6547 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6548 static void
6549 error (s1, s2)
6550      const char *s1, *s2;
6551 {
6552   fprintf (stderr, "%s: ", progname);
6553   fprintf (stderr, s1, s2);
6554   fprintf (stderr, "\n");
6555 }
6556
6557 /* Return a newly-allocated string whose contents
6558    concatenate those of s1, s2, s3.  */
6559 static char *
6560 concat (s1, s2, s3)
6561      char *s1, *s2, *s3;
6562 {
6563   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6564   char *result = xnew (len1 + len2 + len3 + 1, char);
6565
6566   strcpy (result, s1);
6567   strcpy (result + len1, s2);
6568   strcpy (result + len1 + len2, s3);
6569   result[len1 + len2 + len3] = '\0';
6570
6571   return result;
6572 }
6573
6574 \f
6575 /* Does the same work as the system V getcwd, but does not need to
6576    guess the buffer size in advance. */
6577 static char *
6578 etags_getcwd ()
6579 {
6580 #ifdef HAVE_GETCWD
6581   int bufsize = 200;
6582   char *path = xnew (bufsize, char);
6583
6584   while (getcwd (path, bufsize) == NULL)
6585     {
6586       if (errno != ERANGE)
6587         pfatal ("getcwd");
6588       bufsize *= 2;
6589       free (path);
6590       path = xnew (bufsize, char);
6591     }
6592
6593   canonicalize_filename (path);
6594   return path;
6595
6596 #else /* not HAVE_GETCWD */
6597 #if MSDOS
6598
6599   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6600
6601   getwd (path);
6602
6603   for (p = path; *p != '\0'; p++)
6604     if (*p == '\\')
6605       *p = '/';
6606     else
6607       *p = lowcase (*p);
6608
6609   return strdup (path);
6610 #else /* not MSDOS */
6611   linebuffer path;
6612   FILE *pipe;
6613
6614   linebuffer_init (&path);
6615   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6616   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6617     pfatal ("pwd");
6618   pclose (pipe);
6619
6620   return path.buffer;
6621 #endif /* not MSDOS */
6622 #endif /* not HAVE_GETCWD */
6623 }
6624
6625 /* Return a newly allocated string containing the file name of FILE
6626    relative to the absolute directory DIR (which should end with a slash). */
6627 static char *
6628 relative_filename (file, dir)
6629      char *file, *dir;
6630 {
6631   char *fp, *dp, *afn, *res;
6632   int i;
6633
6634   /* Find the common root of file and dir (with a trailing slash). */
6635   afn = absolute_filename (file, cwd);
6636   fp = afn;
6637   dp = dir;
6638   while (*fp++ == *dp++)
6639     continue;
6640   fp--, dp--;                   /* back to the first differing char */
6641 #ifdef DOS_NT
6642   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6643     return afn;
6644 #endif
6645   do                            /* look at the equal chars until '/' */
6646     fp--, dp--;
6647   while (*fp != '/');
6648
6649   /* Build a sequence of "../" strings for the resulting relative file name. */
6650   i = 0;
6651   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6652     i += 1;
6653   res = xnew (3*i + strlen (fp + 1) + 1, char);
6654   res[0] = '\0';
6655   while (i-- > 0)
6656     strcat (res, "../");
6657
6658   /* Add the file name relative to the common root of file and dir. */
6659   strcat (res, fp + 1);
6660   free (afn);
6661
6662   return res;
6663 }
6664
6665 /* Return a newly allocated string containing the absolute file name
6666    of FILE given DIR (which should end with a slash). */
6667 static char *
6668 absolute_filename (file, dir)
6669      char *file, *dir;
6670 {
6671   char *slashp, *cp, *res;
6672
6673   if (filename_is_absolute (file))
6674     res = savestr (file);
6675 #ifdef DOS_NT
6676   /* We don't support non-absolute file names with a drive
6677      letter, like `d:NAME' (it's too much hassle).  */
6678   else if (file[1] == ':')
6679     fatal ("%s: relative file names with drive letters not supported", file);
6680 #endif
6681   else
6682     res = concat (dir, file, "");
6683
6684   /* Delete the "/dirname/.." and "/." substrings. */
6685   slashp = etags_strchr (res, '/');
6686   while (slashp != NULL && slashp[0] != '\0')
6687     {
6688       if (slashp[1] == '.')
6689         {
6690           if (slashp[2] == '.'
6691               && (slashp[3] == '/' || slashp[3] == '\0'))
6692             {
6693               cp = slashp;
6694               do
6695                 cp--;
6696               while (cp >= res && !filename_is_absolute (cp));
6697               if (cp < res)
6698                 cp = slashp;    /* the absolute name begins with "/.." */
6699 #ifdef DOS_NT
6700               /* Under MSDOS and NT we get `d:/NAME' as absolute
6701                  file name, so the luser could say `d:/../NAME'.
6702                  We silently treat this as `d:/NAME'.  */
6703               else if (cp[0] != '/')
6704                 cp = slashp;
6705 #endif
6706               strcpy (cp, slashp + 3);
6707               slashp = cp;
6708               continue;
6709             }
6710           else if (slashp[2] == '/' || slashp[2] == '\0')
6711             {
6712               strcpy (slashp, slashp + 2);
6713               continue;
6714             }
6715         }
6716
6717       slashp = etags_strchr (slashp + 1, '/');
6718     }
6719
6720   if (res[0] == '\0')
6721     return savestr ("/");
6722   else
6723     return res;
6724 }
6725
6726 /* Return a newly allocated string containing the absolute
6727    file name of dir where FILE resides given DIR (which should
6728    end with a slash). */
6729 static char *
6730 absolute_dirname (file, dir)
6731      char *file, *dir;
6732 {
6733   char *slashp, *res;
6734   char save;
6735
6736   canonicalize_filename (file);
6737   slashp = etags_strrchr (file, '/');
6738   if (slashp == NULL)
6739     return savestr (dir);
6740   save = slashp[1];
6741   slashp[1] = '\0';
6742   res = absolute_filename (file, dir);
6743   slashp[1] = save;
6744
6745   return res;
6746 }
6747
6748 /* Whether the argument string is an absolute file name.  The argument
6749    string must have been canonicalized with canonicalize_filename. */
6750 static bool
6751 filename_is_absolute (fn)
6752      char *fn;
6753 {
6754   return (fn[0] == '/'
6755 #ifdef DOS_NT
6756           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6757 #endif
6758           );
6759 }
6760
6761 /* Translate backslashes into slashes.  Works in place. */
6762 static void
6763 canonicalize_filename (fn)
6764      register char *fn;
6765 {
6766 #ifdef DOS_NT
6767   /* Canonicalize drive letter case.  */
6768   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6769     fn[0] = upcase (fn[0]);
6770   /* Convert backslashes to slashes.  */
6771   for (; *fn != '\0'; fn++)
6772     if (*fn == '\\')
6773       *fn = '/';
6774 #else
6775   /* No action. */
6776   fn = NULL;                    /* shut up the compiler */
6777 #endif
6778 }
6779
6780 \f
6781 /* Initialize a linebuffer for use */
6782 static void
6783 linebuffer_init (lbp)
6784      linebuffer *lbp;
6785 {
6786   lbp->size = (DEBUG) ? 3 : 200;
6787   lbp->buffer = xnew (lbp->size, char);
6788   lbp->buffer[0] = '\0';
6789   lbp->len = 0;
6790 }
6791
6792 /* Set the minimum size of a string contained in a linebuffer. */
6793 static void
6794 linebuffer_setlen (lbp, toksize)
6795      linebuffer *lbp;
6796      int toksize;
6797 {
6798   while (lbp->size <= toksize)
6799     {
6800       lbp->size *= 2;
6801       xrnew (lbp->buffer, lbp->size, char);
6802     }
6803   lbp->len = toksize;
6804 }
6805
6806 /* Like malloc but get fatal error if memory is exhausted. */
6807 static PTR
6808 xmalloc (size)
6809      unsigned int size;
6810 {
6811   PTR result = (PTR) malloc (size);
6812   if (result == NULL)
6813     fatal ("virtual memory exhausted", (char *)NULL);
6814   return result;
6815 }
6816
6817 static PTR
6818 xrealloc (ptr, size)
6819      char *ptr;
6820      unsigned int size;
6821 {
6822   PTR result = (PTR) realloc (ptr, size);
6823   if (result == NULL)
6824     fatal ("virtual memory exhausted", (char *)NULL);
6825   return result;
6826 }
6827
6828 /*
6829  * Local Variables:
6830  * c-indentation-style: gnu
6831  * indent-tabs-mode: t
6832  * tab-width: 8
6833  * fill-column: 79
6834  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6835  * End:
6836  */
6837
6838 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6839    (do not change this comment) */
6840
6841 /* etags.c ends here */