lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
  32   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
  33   Free Software Foundation, Inc.
  34
  35 This file is not considered part of GNU Emacs.
  36
  37 This program is free software: you can redistribute it and/or modify
  38 it under the terms of the GNU General Public License as published by
  39 the Free Software Foundation, either version 3 of the License, or
  40 (at your option) any later version.
  41
  42 This program is distributed in the hope that it will be useful,
  43 but WITHOUT ANY WARRANTY; without even the implied warranty of
  44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  45 GNU General Public License for more details.
  46
  47 You should have received a copy of the GNU General Public License
  48 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  49
  50
  51 /* NB To comply with the above BSD license, copyright information is
  52 reproduced in etc/ETAGS.README.  That file should be updated when the
  53 above notices are.
  54
  55 To the best of our knowledge, this code was originally based on the
  56 ctags.c distributed with BSD4.2, which was copyrighted by the
  57 University of California, as described above. */
  58
  59
  60 /*
  61  * Authors:
  62  * 1983 Ctags originally by Ken Arnold.
  63  * 1984 Fortran added by Jim Kleckner.
  64  * 1984 Ed Pelegri-Llopart added C typedefs.
  65  * 1985 Emacs TAGS format by Richard Stallman.
  66  * 1989 Sam Kendall added C++.
  67  * 1992 Joseph B. Wells improved C and C++ parsing.
  68  * 1993 Francesco Potortì reorganized C and C++.
  69  * 1994 Line-by-line regexp tags by Tom Tromey.
  70  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  71  * 2002 #line directives by Francesco Potortì.
  72  *
  73  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  74  */
  75
  76 /*
  77  * If you want to add support for a new language, start by looking at the LUA
  78  * language, which is the simplest.  Alternatively, consider distributing etags
  79  * together with a configuration file containing regexp definitions for etags.
  80  */
  81
  82 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  83
  84 #define TRUE    1
  85 #define FALSE   0
  86
  87 #ifdef DEBUG
  88 #  undef DEBUG
  89 #  define DEBUG TRUE
  90 #else
  91 #  define DEBUG  FALSE
  92 #  define NDEBUG                /* disable assert */
  93 #endif
  94
  95 #ifdef HAVE_CONFIG_H
  96 # include <config.h>
  97   /* On some systems, Emacs defines static as nothing for the sake
  98      of unexec.  We don't want that here since we don't use unexec. */
  99 # undef static
 100 # ifndef PTR                    /* for XEmacs */
 101 #   define PTR void *
 102 # endif
 103 # ifndef __P                    /* for XEmacs */
 104 #   define __P(args) args
 105 # endif
 106 #else  /* no config.h */
 107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 108 #   define __P(args) args       /* use prototypes */
 109 #   define PTR void *           /* for generic pointers */
 110 # else /* not standard C */
 111 #   define __P(args) ()         /* no prototypes */
 112 #   define const                /* remove const for old compilers' sake */
 113 #   define PTR long *           /* don't use void* */
 114 # endif
 115 #endif /* !HAVE_CONFIG_H */
 116
 117 #ifndef _GNU_SOURCE
 118 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 119 #endif
 120
 121 /* WIN32_NATIVE is for XEmacs.
 122    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 123 #ifdef WIN32_NATIVE
 124 # undef MSDOS
 125 # undef  WINDOWSNT
 126 # define WINDOWSNT
 127 #endif /* WIN32_NATIVE */
 128
 129 #ifdef MSDOS
 130 # undef MSDOS
 131 # define MSDOS TRUE
 132 # include <fcntl.h>
 133 # include <sys/param.h>
 134 # include <io.h>
 135 # ifndef HAVE_CONFIG_H
 136 #   define DOS_NT
 137 #   include <sys/config.h>
 138 # endif
 139 #else
 140 # define MSDOS FALSE
 141 #endif /* MSDOS */
 142
 143 #ifdef WINDOWSNT
 144 # include <stdlib.h>
 145 # include <fcntl.h>
 146 # include <string.h>
 147 # include <direct.h>
 148 # include <io.h>
 149 # define MAXPATHLEN _MAX_PATH
 150 # undef HAVE_NTGUI
 151 # undef  DOS_NT
 152 # define DOS_NT
 153 # ifndef HAVE_GETCWD
 154 #   define HAVE_GETCWD
 155 # endif /* undef HAVE_GETCWD */
 156 #else /* not WINDOWSNT */
 157 # ifdef STDC_HEADERS
 158 #  include <stdlib.h>
 159 #  include <string.h>
 160 # else /* no standard C headers */
 161    extern char *getenv __P((const char *));
 162    extern char *strcpy __P((char *, const char *));
 163    extern char *strncpy __P((char *, const char *, unsigned long));
 164    extern char *strcat __P((char *, const char *));
 165    extern char *strncat __P((char *, const char *, unsigned long));
 166    extern int strcmp __P((const char *, const char *));
 167    extern int strncmp __P((const char *, const char *, unsigned long));
 168    extern int system __P((const char *));
 169    extern unsigned long strlen __P((const char *));
 170    extern void *malloc __P((unsigned long));
 171    extern void *realloc __P((void *, unsigned long));
 172    extern void exit __P((int));
 173    extern void free __P((void *));
 174    extern void *memmove __P((void *, const void *, unsigned long));
 175 #  define EXIT_SUCCESS  0
 176 #  define EXIT_FAILURE  1
 177 # endif
 178 #endif /* !WINDOWSNT */
 179
 180 #ifdef HAVE_UNISTD_H
 181 # include <unistd.h>
 182 #else
 183 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 184     extern char *getcwd (char *buf, size_t size);
 185 # endif
 186 #endif /* HAVE_UNISTD_H */
 187
 188 #include <stdio.h>
 189 #include <ctype.h>
 190 #include <errno.h>
 191 #ifndef errno
 192   extern int errno;
 193 #endif
 194 #include <sys/types.h>
 195 #include <sys/stat.h>
 196
 197 #include <assert.h>
 198 #ifdef NDEBUG
 199 # undef  assert                 /* some systems have a buggy assert.h */
 200 # define assert(x) ((void) 0)
 201 #endif
 202
 203 #if !defined (S_ISREG) && defined (S_IFREG)
 204 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 205 #endif
 206
 207 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 208 # define NO_LONG_OPTIONS TRUE
 209 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 210   extern char *optarg;
 211   extern int optind, opterr;
 212 #else
 213 # define NO_LONG_OPTIONS FALSE
 214 # include <getopt.h>
 215 #endif /* NO_LONG_OPTIONS */
 216
 217 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 218 # ifdef __CYGWIN__              /* compiling on Cygwin */
 219                              !!! NOTICE !!!
 220  the regex.h distributed with Cygwin is not compatible with etags, alas!
 221 If you want regular expression support, you should delete this notice and
 222               arrange to use the GNU regex.h and regex.c.
 223 # endif
 224 #endif
 225 #include <regex.h>
 226
 227 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 228  Leave it undefined to make the program "etags", which makes emacs-style
 229  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 230 #ifdef CTAGS
 231 # undef  CTAGS
 232 # define CTAGS TRUE
 233 #else
 234 # define CTAGS FALSE
 235 #endif
 236
 237 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 238 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 239 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 240 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 241
 242 #define CHARS 256               /* 2^sizeof(char) */
 243 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 244 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 245 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 246 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 247 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 248 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 249
 250 #define ISALNUM(c)      isalnum (CHAR(c))
 251 #define ISALPHA(c)      isalpha (CHAR(c))
 252 #define ISDIGIT(c)      isdigit (CHAR(c))
 253 #define ISLOWER(c)      islower (CHAR(c))
 254
 255 #define lowcase(c)      tolower (CHAR(c))
 256 #define upcase(c)       toupper (CHAR(c))
 257
 258
 259 /*
 260  *      xnew, xrnew -- allocate, reallocate storage
 261  *
 262  * SYNOPSIS:    Type *xnew (int n, Type);
 263  *              void xrnew (OldPointer, int n, Type);
 264  */
 265 #if DEBUG
 266 # include "chkmalloc.h"
 267 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 268                                                   (n) * sizeof (Type)))
 269 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 270                                         (char *) (op), (n) * sizeof (Type)))
 271 #else
 272 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 273 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 274                                         (char *) (op), (n) * sizeof (Type)))
 275 #endif
 276
 277 #define bool int
 278
 279 typedef void Lang_function __P((FILE *));
 280
 281 typedef struct
 282 {
 283   char *suffix;                 /* file name suffix for this compressor */
 284   char *command;                /* takes one arg and decompresses to stdout */
 285 } compressor;
 286
 287 typedef struct
 288 {
 289   char *name;                   /* language name */
 290   char *help;                   /* detailed help for the language */
 291   Lang_function *function;      /* parse function */
 292   char **suffixes;              /* name suffixes of this language's files */
 293   char **filenames;             /* names of this language's files */
 294   char **interpreters;          /* interpreters for this language */
 295   bool metasource;              /* source used to generate other sources */
 296 } language;
 297
 298 typedef struct fdesc
 299 {
 300   struct fdesc *next;           /* for the linked list */
 301   char *infname;                /* uncompressed input file name */
 302   char *infabsname;             /* absolute uncompressed input file name */
 303   char *infabsdir;              /* absolute dir of input file */
 304   char *taggedfname;            /* file name to write in tagfile */
 305   language *lang;               /* language of file */
 306   char *prop;                   /* file properties to write in tagfile */
 307   bool usecharno;               /* etags tags shall contain char number */
 308   bool written;                 /* entry written in the tags file */
 309 } fdesc;
 310
 311 typedef struct node_st
 312 {                               /* sorting structure */
 313   struct node_st *left, *right; /* left and right sons */
 314   fdesc *fdp;                   /* description of file to whom tag belongs */
 315   char *name;                   /* tag name */
 316   char *regex;                  /* search regexp */
 317   bool valid;                   /* write this tag on the tag file */
 318   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 319   bool been_warned;             /* warning already given for duplicated tag */
 320   int lno;                      /* line number tag is on */
 321   long cno;                     /* character number line starts on */
 322 } node;
 323
 324 /*
 325  * A `linebuffer' is a structure which holds a line of text.
 326  * `readline_internal' reads a line from a stream into a linebuffer
 327  * and works regardless of the length of the line.
 328  * SIZE is the size of BUFFER, LEN is the length of the string in
 329  * BUFFER after readline reads it.
 330  */
 331 typedef struct
 332 {
 333   long size;
 334   int len;
 335   char *buffer;
 336 } linebuffer;
 337
 338 /* Used to support mixing of --lang and file names. */
 339 typedef struct
 340 {
 341   enum {
 342     at_language,                /* a language specification */
 343     at_regexp,                  /* a regular expression */
 344     at_filename,                /* a file name */
 345     at_stdin,                   /* read from stdin here */
 346     at_end                      /* stop parsing the list */
 347   } arg_type;                   /* argument type */
 348   language *lang;               /* language associated with the argument */
 349   char *what;                   /* the argument itself */
 350 } argument;
 351
 352 /* Structure defining a regular expression. */
 353 typedef struct regexp
 354 {
 355   struct regexp *p_next;        /* pointer to next in list */
 356   language *lang;               /* if set, use only for this language */
 357   char *pattern;                /* the regexp pattern */
 358   char *name;                   /* tag name */
 359   struct re_pattern_buffer *pat; /* the compiled pattern */
 360   struct re_registers regs;     /* re registers */
 361   bool error_signaled;          /* already signaled for this regexp */
 362   bool force_explicit_name;     /* do not allow implict tag name */
 363   bool ignore_case;             /* ignore case when matching */
 364   bool multi_line;              /* do a multi-line match on the whole file */
 365 } regexp;
 366
 367
 368 /* Many compilers barf on this:
 369         Lang_function Ada_funcs;
 370    so let's write it this way */
 371 static void Ada_funcs __P((FILE *));
 372 static void Asm_labels __P((FILE *));
 373 static void C_entries __P((int c_ext, FILE *));
 374 static void default_C_entries __P((FILE *));
 375 static void plain_C_entries __P((FILE *));
 376 static void Cjava_entries __P((FILE *));
 377 static void Cobol_paragraphs __P((FILE *));
 378 static void Cplusplus_entries __P((FILE *));
 379 static void Cstar_entries __P((FILE *));
 380 static void Erlang_functions __P((FILE *));
 381 static void Forth_words __P((FILE *));
 382 static void Fortran_functions __P((FILE *));
 383 static void HTML_labels __P((FILE *));
 384 static void Lisp_functions __P((FILE *));
 385 static void Lua_functions __P((FILE *));
 386 static void Makefile_targets __P((FILE *));
 387 static void Pascal_functions __P((FILE *));
 388 static void Perl_functions __P((FILE *));
 389 static void PHP_functions __P((FILE *));
 390 static void PS_functions __P((FILE *));
 391 static void Prolog_functions __P((FILE *));
 392 static void Python_functions __P((FILE *));
 393 static void Scheme_functions __P((FILE *));
 394 static void TeX_commands __P((FILE *));
 395 static void Texinfo_nodes __P((FILE *));
 396 static void Yacc_entries __P((FILE *));
 397 static void just_read_file __P((FILE *));
 398
 399 static void print_language_names __P((void));
 400 static void print_version __P((void));
 401 static void print_help __P((argument *));
 402 int main __P((int, char **));
 403
 404 static compressor *get_compressor_from_suffix __P((char *, char **));
 405 static language *get_language_from_langname __P((const char *));
 406 static language *get_language_from_interpreter __P((char *));
 407 static language *get_language_from_filename __P((char *, bool));
 408 static void readline __P((linebuffer *, FILE *));
 409 static long readline_internal __P((linebuffer *, FILE *));
 410 static bool nocase_tail __P((char *));
 411 static void get_tag __P((char *, char **));
 412
 413 static void analyse_regex __P((char *));
 414 static void free_regexps __P((void));
 415 static void regex_tag_multiline __P((void));
 416 static void error __P((const char *, const char *));
 417 static void suggest_asking_for_help __P((void));
 418 void fatal __P((char *, char *));
 419 static void pfatal __P((char *));
 420 static void add_node __P((node *, node **));
 421
 422 static void init __P((void));
 423 static void process_file_name __P((char *, language *));
 424 static void process_file __P((FILE *, char *, language *));
 425 static void find_entries __P((FILE *));
 426 static void free_tree __P((node *));
 427 static void free_fdesc __P((fdesc *));
 428 static void pfnote __P((char *, bool, char *, int, int, long));
 429 static void make_tag __P((char *, int, bool, char *, int, int, long));
 430 static void invalidate_nodes __P((fdesc *, node **));
 431 static void put_entries __P((node *));
 432
 433 static char *concat __P((char *, char *, char *));
 434 static char *skip_spaces __P((char *));
 435 static char *skip_non_spaces __P((char *));
 436 static char *savenstr __P((char *, int));
 437 static char *savestr __P((char *));
 438 static char *etags_strchr __P((const char *, int));
 439 static char *etags_strrchr __P((const char *, int));
 440 static int etags_strcasecmp __P((const char *, const char *));
 441 static int etags_strncasecmp __P((const char *, const char *, int));
 442 static char *etags_getcwd __P((void));
 443 static char *relative_filename __P((char *, char *));
 444 static char *absolute_filename __P((char *, char *));
 445 static char *absolute_dirname __P((char *, char *));
 446 static bool filename_is_absolute __P((char *f));
 447 static void canonicalize_filename __P((char *));
 448 static void linebuffer_init __P((linebuffer *));
 449 static void linebuffer_setlen __P((linebuffer *, int));
 450 static PTR xmalloc __P((unsigned int));
 451 static PTR xrealloc __P((char *, unsigned int));
 452
 453 \f
 454 static char searchar = '/';     /* use /.../ searches */
 455
 456 static char *tagfile;           /* output file */
 457 static char *progname;          /* name this program was invoked with */
 458 static char *cwd;               /* current working directory */
 459 static char *tagfiledir;        /* directory of tagfile */
 460 static FILE *tagf;              /* ioptr for tags file */
 461
 462 static fdesc *fdhead;           /* head of file description list */
 463 static fdesc *curfdp;           /* current file description */
 464 static int lineno;              /* line number of current line */
 465 static long charno;             /* current character number */
 466 static long linecharno;         /* charno of start of current line */
 467 static char *dbp;               /* pointer to start of current tag */
 468
 469 static const int invalidcharno = -1;
 470
 471 static node *nodehead;          /* the head of the binary tree of tags */
 472 static node *last_node;         /* the last node created */
 473
 474 static linebuffer lb;           /* the current line */
 475 static linebuffer filebuf;      /* a buffer containing the whole file */
 476 static linebuffer token_name;   /* a buffer containing a tag name */
 477
 478 /* boolean "functions" (see init)       */
 479 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 480 static char
 481   /* white chars */
 482   *white = " \f\t\n\r\v",
 483   /* not in a name */
 484   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 485   /* token ending chars */
 486   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 487   /* token starting chars */
 488   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 489   /* valid in-token chars */
 490   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 491
 492 static bool append_to_tagfile;  /* -a: append to tags */
 493 /* The next five default to TRUE in C and derived languages.  */
 494 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 495 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 496                                 /* 0 struct/enum/union decls, and C++ */
 497                                 /* member functions. */
 498 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 499                                 /* constants and variables. */
 500                                 /* -D: opposite of -d.  Default under ctags. */
 501 static bool globals;            /* create tags for global variables */
 502 static bool members;            /* create tags for C member variables */
 503 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 504 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 505 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 506 static bool update;             /* -u: update tags */
 507 static bool vgrind_style;       /* -v: create vgrind style index output */
 508 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 509 static bool cxref_style;        /* -x: create cxref style output */
 510 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 511 static bool ignoreindent;       /* -I: ignore indentation in C */
 512 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 513
 514 /* STDIN is defined in LynxOS system headers */
 515 #ifdef STDIN
 516 # undef STDIN
 517 #endif
 518
 519 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 520 static bool parsing_stdin;      /* --parse-stdin used */
 521
 522 static regexp *p_head;          /* list of all regexps */
 523 static bool need_filebuf;       /* some regexes are multi-line */
 524
 525 static struct option longopts[] =
 526 {
 527   { "append",             no_argument,       NULL,               'a'   },
 528   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 529   { "c++",                no_argument,       NULL,               'C'   },
 530   { "declarations",       no_argument,       &declarations,      TRUE  },
 531   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 532   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 533   { "help",               no_argument,       NULL,               'h'   },
 534   { "help",               no_argument,       NULL,               'H'   },
 535   { "ignore-indentation", no_argument,       NULL,               'I'   },
 536   { "language",           required_argument, NULL,               'l'   },
 537   { "members",            no_argument,       &members,           TRUE  },
 538   { "no-members",         no_argument,       &members,           FALSE },
 539   { "output",             required_argument, NULL,               'o'   },
 540   { "regex",              required_argument, NULL,               'r'   },
 541   { "no-regex",           no_argument,       NULL,               'R'   },
 542   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 543   { "parse-stdin",        required_argument, NULL,               STDIN },
 544   { "version",            no_argument,       NULL,               'V'   },
 545
 546 #if CTAGS /* Ctags options */
 547   { "backward-search",    no_argument,       NULL,               'B'   },
 548   { "cxref",              no_argument,       NULL,               'x'   },
 549   { "defines",            no_argument,       NULL,               'd'   },
 550   { "globals",            no_argument,       &globals,           TRUE  },
 551   { "typedefs",           no_argument,       NULL,               't'   },
 552   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 553   { "update",             no_argument,       NULL,               'u'   },
 554   { "vgrind",             no_argument,       NULL,               'v'   },
 555   { "no-warn",            no_argument,       NULL,               'w'   },
 556
 557 #else /* Etags options */
 558   { "no-defines",         no_argument,       NULL,               'D'   },
 559   { "no-globals",         no_argument,       &globals,           FALSE },
 560   { "include",            required_argument, NULL,               'i'   },
 561 #endif
 562   { NULL }
 563 };
 564
 565 static compressor compressors[] =
 566 {
 567   { "z", "gzip -d -c"},
 568   { "Z", "gzip -d -c"},
 569   { "gz", "gzip -d -c"},
 570   { "GZ", "gzip -d -c"},
 571   { "bz2", "bzip2 -d -c" },
 572   { NULL }
 573 };
 574
 575 /*
 576  * Language stuff.
 577  */
 578
 579 /* Ada code */
 580 static char *Ada_suffixes [] =
 581   { "ads", "adb", "ada", NULL };
 582 static char Ada_help [] =
 583 "In Ada code, functions, procedures, packages, tasks and types are\n\
 584 tags.  Use the `--packages-only' option to create tags for\n\
 585 packages only.\n\
 586 Ada tag names have suffixes indicating the type of entity:\n\
 587         Entity type:    Qualifier:\n\
 588         ------------    ----------\n\
 589         function        /f\n\
 590         procedure       /p\n\
 591         package spec    /s\n\
 592         package body    /b\n\
 593         type            /t\n\
 594         task            /k\n\
 595 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 596 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 597 will just search for any tag `bidule'.";
 598
 599 /* Assembly code */
 600 static char *Asm_suffixes [] =
 601   { "a",        /* Unix assembler */
 602     "asm", /* Microcontroller assembly */
 603     "def", /* BSO/Tasking definition includes  */
 604     "inc", /* Microcontroller include files */
 605     "ins", /* Microcontroller include files */
 606     "s", "sa", /* Unix assembler */
 607     "S",   /* cpp-processed Unix assembler */
 608     "src", /* BSO/Tasking C compiler output */
 609     NULL
 610   };
 611 static char Asm_help [] =
 612 "In assembler code, labels appearing at the beginning of a line,\n\
 613 followed by a colon, are tags.";
 614
 615
 616 /* Note that .c and .h can be considered C++, if the --c++ flag was
 617    given, or if the `class' or `template' keywords are met inside the file.
 618    That is why default_C_entries is called for these. */
 619 static char *default_C_suffixes [] =
 620   { "c", "h", NULL };
 621 #if CTAGS                               /* C help for Ctags */
 622 static char default_C_help [] =
 623 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 624 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 625 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 626 Use --globals to tag global variables.\n\
 627 You can tag function declarations and external variables by\n\
 628 using `--declarations', and struct members by using `--members'.";
 629 #else                                   /* C help for Etags */
 630 static char default_C_help [] =
 631 "In C code, any C function or typedef is a tag, and so are\n\
 632 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 633 definitions and `enum' constants are tags unless you specify\n\
 634 `--no-defines'.  Global variables are tags unless you specify\n\
 635 `--no-globals' and so are struct members unless you specify\n\
 636 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 637 `--no-members' can make the tags table file much smaller.\n\
 638 You can tag function declarations and external variables by\n\
 639 using `--declarations'.";
 640 #endif  /* C help for Ctags and Etags */
 641
 642 static char *Cplusplus_suffixes [] =
 643   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 644     "M",                        /* Objective C++ */
 645     "pdb",                      /* Postscript with C syntax */
 646     NULL };
 647 static char Cplusplus_help [] =
 648 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 649 --help --lang=c --lang=c++ for full help.)\n\
 650 In addition to C tags, member functions are also recognized.  Member\n\
 651 variables are recognized unless you use the `--no-members' option.\n\
 652 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 653 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 654 `operator+'.";
 655
 656 static char *Cjava_suffixes [] =
 657   { "java", NULL };
 658 static char Cjava_help [] =
 659 "In Java code, all the tags constructs of C and C++ code are\n\
 660 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 661
 662
 663 static char *Cobol_suffixes [] =
 664   { "COB", "cob", NULL };
 665 static char Cobol_help [] =
 666 "In Cobol code, tags are paragraph names; that is, any word\n\
 667 starting in column 8 and followed by a period.";
 668
 669 static char *Cstar_suffixes [] =
 670   { "cs", "hs", NULL };
 671
 672 static char *Erlang_suffixes [] =
 673   { "erl", "hrl", NULL };
 674 static char Erlang_help [] =
 675 "In Erlang code, the tags are the functions, records and macros\n\
 676 defined in the file.";
 677
 678 char *Forth_suffixes [] =
 679   { "fth", "tok", NULL };
 680 static char Forth_help [] =
 681 "In Forth code, tags are words defined by `:',\n\
 682 constant, code, create, defer, value, variable, buffer:, field.";
 683
 684 static char *Fortran_suffixes [] =
 685   { "F", "f", "f90", "for", NULL };
 686 static char Fortran_help [] =
 687 "In Fortran code, functions, subroutines and block data are tags.";
 688
 689 static char *HTML_suffixes [] =
 690   { "htm", "html", "shtml", NULL };
 691 static char HTML_help [] =
 692 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 693 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 694 occurrences of `id='.";
 695
 696 static char *Lisp_suffixes [] =
 697   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 698 static char Lisp_help [] =
 699 "In Lisp code, any function defined with `defun', any variable\n\
 700 defined with `defvar' or `defconst', and in general the first\n\
 701 argument of any expression that starts with `(def' in column zero\n\
 702 is a tag.";
 703
 704 static char *Lua_suffixes [] =
 705   { "lua", "LUA", NULL };
 706 static char Lua_help [] =
 707 "In Lua scripts, all functions are tags.";
 708
 709 static char *Makefile_filenames [] =
 710   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 711 static char Makefile_help [] =
 712 "In makefiles, targets are tags; additionally, variables are tags\n\
 713 unless you specify `--no-globals'.";
 714
 715 static char *Objc_suffixes [] =
 716   { "lm",                       /* Objective lex file */
 717     "m",                        /* Objective C file */
 718      NULL };
 719 static char Objc_help [] =
 720 "In Objective C code, tags include Objective C definitions for classes,\n\
 721 class categories, methods and protocols.  Tags for variables and\n\
 722 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 723 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 724
 725 static char *Pascal_suffixes [] =
 726   { "p", "pas", NULL };
 727 static char Pascal_help [] =
 728 "In Pascal code, the tags are the functions and procedures defined\n\
 729 in the file.";
 730 /* " // this is for working around an Emacs highlighting bug... */
 731
 732 static char *Perl_suffixes [] =
 733   { "pl", "pm", NULL };
 734 static char *Perl_interpreters [] =
 735   { "perl", "@PERL@", NULL };
 736 static char Perl_help [] =
 737 "In Perl code, the tags are the packages, subroutines and variables\n\
 738 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 739 `--globals' if you want to tag global variables.  Tags for\n\
 740 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 741 defined in the default package is `main::SUB'.";
 742
 743 static char *PHP_suffixes [] =
 744   { "php", "php3", "php4", NULL };
 745 static char PHP_help [] =
 746 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 747 the `--no-members' option, vars are tags too.";
 748
 749 static char *plain_C_suffixes [] =
 750   { "pc",                       /* Pro*C file */
 751      NULL };
 752
 753 static char *PS_suffixes [] =
 754   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 755 static char PS_help [] =
 756 "In PostScript code, the tags are the functions.";
 757
 758 static char *Prolog_suffixes [] =
 759   { "prolog", NULL };
 760 static char Prolog_help [] =
 761 "In Prolog code, tags are predicates and rules at the beginning of\n\
 762 line.";
 763
 764 static char *Python_suffixes [] =
 765   { "py", NULL };
 766 static char Python_help [] =
 767 "In Python code, `def' or `class' at the beginning of a line\n\
 768 generate a tag.";
 769
 770 /* Can't do the `SCM' or `scm' prefix with a version number. */
 771 static char *Scheme_suffixes [] =
 772   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 773 static char Scheme_help [] =
 774 "In Scheme code, tags include anything defined with `def' or with a\n\
 775 construct whose name starts with `def'.  They also include\n\
 776 variables set with `set!' at top level in the file.";
 777
 778 static char *TeX_suffixes [] =
 779   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 780 static char TeX_help [] =
 781 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 782 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 783 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 784 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 785 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 786 \n\
 787 Other commands can be specified by setting the environment variable\n\
 788 `TEXTAGS' to a colon-separated list like, for example,\n\
 789      TEXTAGS=\"mycommand:myothercommand\".";
 790
 791
 792 static char *Texinfo_suffixes [] =
 793   { "texi", "texinfo", "txi", NULL };
 794 static char Texinfo_help [] =
 795 "for texinfo files, lines starting with @node are tagged.";
 796
 797 static char *Yacc_suffixes [] =
 798   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 799 static char Yacc_help [] =
 800 "In Bison or Yacc input files, each rule defines as a tag the\n\
 801 nonterminal it constructs.  The portions of the file that contain\n\
 802 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 803 for full help).";
 804
 805 static char auto_help [] =
 806 "`auto' is not a real language, it indicates to use\n\
 807 a default language for files base on file name suffix and file contents.";
 808
 809 static char none_help [] =
 810 "`none' is not a real language, it indicates to only do\n\
 811 regexp processing on files.";
 812
 813 static char no_lang_help [] =
 814 "No detailed help available for this language.";
 815
 816
 817 /*
 818  * Table of languages.
 819  *
 820  * It is ok for a given function to be listed under more than one
 821  * name.  I just didn't.
 822  */
 823
 824 static language lang_names [] =
 825 {
 826   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 827   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 828   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 829   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 830   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 831   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 832   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 833   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 834   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 835   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 836   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 837   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 838   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 839   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 840   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 841   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 842   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 843   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 844   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 845   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 846   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 847   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 848   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 849   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 850   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 851   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 852   { "auto",      auto_help },                      /* default guessing scheme */
 853   { "none",      none_help,      just_read_file }, /* regexp matching only */
 854   { NULL }                /* end of list */
 855 };
 856
 857 \f
 858 static void
 859 print_language_names ()
 860 {
 861   language *lang;
 862   char **name, **ext;
 863
 864   puts ("\nThese are the currently supported languages, along with the\n\
 865 default file names and dot suffixes:");
 866   for (lang = lang_names; lang->name != NULL; lang++)
 867     {
 868       printf ("  %-*s", 10, lang->name);
 869       if (lang->filenames != NULL)
 870         for (name = lang->filenames; *name != NULL; name++)
 871           printf (" %s", *name);
 872       if (lang->suffixes != NULL)
 873         for (ext = lang->suffixes; *ext != NULL; ext++)
 874           printf (" .%s", *ext);
 875       puts ("");
 876     }
 877   puts ("where `auto' means use default language for files based on file\n\
 878 name suffix, and `none' means only do regexp processing on files.\n\
 879 If no language is specified and no matching suffix is found,\n\
 880 the first line of the file is read for a sharp-bang (#!) sequence\n\
 881 followed by the name of an interpreter.  If no such sequence is found,\n\
 882 Fortran is tried first; if no tags are found, C is tried next.\n\
 883 When parsing any C file, a \"class\" or \"template\" keyword\n\
 884 switches to C++.");
 885   puts ("Compressed files are supported using gzip and bzip2.\n\
 886 \n\
 887 For detailed help on a given language use, for example,\n\
 888 etags --help --lang=ada.");
 889 }
 890
 891 #ifndef EMACS_NAME
 892 # define EMACS_NAME "standalone"
 893 #endif
 894 #ifndef VERSION
 895 # define VERSION "17.38.1.4"
 896 #endif
 897 static void
 898 print_version ()
 899 {
 900   /* Makes it easier to update automatically. */
 901   char emacs_copyright[] = "Copyright (C) 2010 Free Software Foundation, Inc.";
 902
 903   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 904   puts (emacs_copyright);
 905   puts ("This program is distributed under the terms in ETAGS.README");
 906
 907   exit (EXIT_SUCCESS);
 908 }
 909
 910 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 911 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 912 #endif
 913
 914 static void
 915 print_help (argbuffer)
 916      argument *argbuffer;
 917 {
 918   bool help_for_lang = FALSE;
 919
 920   for (; argbuffer->arg_type != at_end; argbuffer++)
 921     if (argbuffer->arg_type == at_language)
 922       {
 923         if (help_for_lang)
 924           puts ("");
 925         puts (argbuffer->lang->help);
 926         help_for_lang = TRUE;
 927       }
 928
 929   if (help_for_lang)
 930     exit (EXIT_SUCCESS);
 931
 932   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 933 \n\
 934 These are the options accepted by %s.\n", progname, progname);
 935   if (NO_LONG_OPTIONS)
 936     puts ("WARNING: long option names do not work with this executable,\n\
 937 as it is not linked with GNU getopt.");
 938   else
 939     puts ("You may use unambiguous abbreviations for the long option names.");
 940   puts ("  A - as file name means read names from stdin (one per line).\n\
 941 Absolute names are stored in the output file as they are.\n\
 942 Relative ones are stored relative to the output file's directory.\n");
 943
 944   puts ("-a, --append\n\
 945         Append tag entries to existing tags file.");
 946
 947   puts ("--packages-only\n\
 948         For Ada files, only generate tags for packages.");
 949
 950   if (CTAGS)
 951     puts ("-B, --backward-search\n\
 952         Write the search commands for the tag entries using '?', the\n\
 953         backward-search command instead of '/', the forward-search command.");
 954
 955   /* This option is mostly obsolete, because etags can now automatically
 956      detect C++.  Retained for backward compatibility and for debugging and
 957      experimentation.  In principle, we could want to tag as C++ even
 958      before any "class" or "template" keyword.
 959   puts ("-C, --c++\n\
 960         Treat files whose name suffix defaults to C language as C++ files.");
 961   */
 962
 963   puts ("--declarations\n\
 964         In C and derived languages, create tags for function declarations,");
 965   if (CTAGS)
 966     puts ("\tand create tags for extern variables if --globals is used.");
 967   else
 968     puts
 969       ("\tand create tags for extern variables unless --no-globals is used.");
 970
 971   if (CTAGS)
 972     puts ("-d, --defines\n\
 973         Create tag entries for C #define constants and enum constants, too.");
 974   else
 975     puts ("-D, --no-defines\n\
 976         Don't create tag entries for C #define constants and enum constants.\n\
 977         This makes the tags file smaller.");
 978
 979   if (!CTAGS)
 980     puts ("-i FILE, --include=FILE\n\
 981         Include a note in tag file indicating that, when searching for\n\
 982         a tag, one should also consult the tags file FILE after\n\
 983         checking the current file.");
 984
 985   puts ("-l LANG, --language=LANG\n\
 986         Force the following files to be considered as written in the\n\
 987         named language up to the next --language=LANG option.");
 988
 989   if (CTAGS)
 990     puts ("--globals\n\
 991         Create tag entries for global variables in some languages.");
 992   else
 993     puts ("--no-globals\n\
 994         Do not create tag entries for global variables in some\n\
 995         languages.  This makes the tags file smaller.");
 996
 997   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 998     puts ("--no-line-directive\n\
 999         Ignore #line preprocessor directives in C and derived languages.");
1000
1001   if (CTAGS)
1002     puts ("--members\n\
1003         Create tag entries for members of structures in some languages.");
1004   else
1005     puts ("--no-members\n\
1006         Do not create tag entries for members of structures\n\
1007         in some languages.");
1008
1009   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1010         Make a tag for each line matching a regular expression pattern\n\
1011         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1012         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
1013         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1014         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
1015   puts ("       If TAGNAME/ is present, the tags created are named.\n\
1016         For example Tcl named tags can be created with:\n\
1017           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1018         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1019         `m' means to allow multi-line matches, `s' implies `m' and\n\
1020         causes dot to match any character, including newline.");
1021
1022   puts ("-R, --no-regex\n\
1023         Don't create tags from regexps for the following files.");
1024
1025   puts ("-I, --ignore-indentation\n\
1026         In C and C++ do not assume that a closing brace in the first\n\
1027         column is the final brace of a function or structure definition.");
1028
1029   puts ("-o FILE, --output=FILE\n\
1030         Write the tags to FILE.");
1031
1032   puts ("--parse-stdin=NAME\n\
1033         Read from standard input and record tags as belonging to file NAME.");
1034
1035   if (CTAGS)
1036     {
1037       puts ("-t, --typedefs\n\
1038         Generate tag entries for C and Ada typedefs.");
1039       puts ("-T, --typedefs-and-c++\n\
1040         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1041         and C++ member functions.");
1042     }
1043
1044   if (CTAGS)
1045     puts ("-u, --update\n\
1046         Update the tag entries for the given files, leaving tag\n\
1047         entries for other files in place.  Currently, this is\n\
1048         implemented by deleting the existing entries for the given\n\
1049         files and then rewriting the new entries at the end of the\n\
1050         tags file.  It is often faster to simply rebuild the entire\n\
1051         tag file than to use this.");
1052
1053   if (CTAGS)
1054     {
1055       puts ("-v, --vgrind\n\
1056         Print on the standard output an index of items intended for\n\
1057         human consumption, similar to the output of vgrind.  The index\n\
1058         is sorted, and gives the page number of each item.");
1059
1060       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1061         puts ("-w, --no-duplicates\n\
1062         Do not create duplicate tag entries, for compatibility with\n\
1063         traditional ctags.");
1064
1065       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1066         puts ("-w, --no-warn\n\
1067         Suppress warning messages about duplicate tag entries.");
1068
1069       puts ("-x, --cxref\n\
1070         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1071         The output uses line numbers instead of page numbers, but\n\
1072         beyond that the differences are cosmetic; try both to see\n\
1073         which you like.");
1074     }
1075
1076   puts ("-V, --version\n\
1077         Print the version of the program.\n\
1078 -h, --help\n\
1079         Print this help message.\n\
1080         Followed by one or more `--language' options prints detailed\n\
1081         help about tag generation for the specified languages.");
1082
1083   print_language_names ();
1084
1085   puts ("");
1086   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1087
1088   exit (EXIT_SUCCESS);
1089 }
1090
1091 \f
1092 int
1093 main (argc, argv)
1094      int argc;
1095      char *argv[];
1096 {
1097   int i;
1098   unsigned int nincluded_files;
1099   char **included_files;
1100   argument *argbuffer;
1101   int current_arg, file_count;
1102   linebuffer filename_lb;
1103   bool help_asked = FALSE;
1104  char *optstring;
1105  int opt;
1106
1107
1108 #ifdef DOS_NT
1109   _fmode = O_BINARY;   /* all of files are treated as binary files */
1110 #endif /* DOS_NT */
1111
1112   progname = argv[0];
1113   nincluded_files = 0;
1114   included_files = xnew (argc, char *);
1115   current_arg = 0;
1116   file_count = 0;
1117
1118   /* Allocate enough no matter what happens.  Overkill, but each one
1119      is small. */
1120   argbuffer = xnew (argc, argument);
1121
1122   /*
1123    * Always find typedefs and structure tags.
1124    * Also default to find macro constants, enum constants, struct
1125    * members and global variables.  Do it for both etags and ctags.
1126    */
1127   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1128   globals = members = TRUE;
1129
1130   /* When the optstring begins with a '-' getopt_long does not rearrange the
1131      non-options arguments to be at the end, but leaves them alone. */
1132   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1133                       "ac:Cf:Il:o:r:RSVhH",
1134                       (CTAGS) ? "BxdtTuvw" : "Di:");
1135
1136   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1137     switch (opt)
1138       {
1139       case 0:
1140         /* If getopt returns 0, then it has already processed a
1141            long-named option.  We should do nothing.  */
1142         break;
1143
1144       case 1:
1145         /* This means that a file name has been seen.  Record it. */
1146         argbuffer[current_arg].arg_type = at_filename;
1147         argbuffer[current_arg].what     = optarg;
1148         ++current_arg;
1149         ++file_count;
1150         break;
1151
1152       case STDIN:
1153         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1154         argbuffer[current_arg].arg_type = at_stdin;
1155         argbuffer[current_arg].what     = optarg;
1156         ++current_arg;
1157         ++file_count;
1158         if (parsing_stdin)
1159           fatal ("cannot parse standard input more than once", (char *)NULL);
1160         parsing_stdin = TRUE;
1161         break;
1162
1163         /* Common options. */
1164       case 'a': append_to_tagfile = TRUE;       break;
1165       case 'C': cplusplus = TRUE;               break;
1166       case 'f':         /* for compatibility with old makefiles */
1167       case 'o':
1168         if (tagfile)
1169           {
1170             error ("-o option may only be given once.", (char *)NULL);
1171             suggest_asking_for_help ();
1172             /* NOTREACHED */
1173           }
1174         tagfile = optarg;
1175         break;
1176       case 'I':
1177       case 'S':         /* for backward compatibility */
1178         ignoreindent = TRUE;
1179         break;
1180       case 'l':
1181         {
1182           language *lang = get_language_from_langname (optarg);
1183           if (lang != NULL)
1184             {
1185               argbuffer[current_arg].lang = lang;
1186               argbuffer[current_arg].arg_type = at_language;
1187               ++current_arg;
1188             }
1189         }
1190         break;
1191       case 'c':
1192         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1193         optarg = concat (optarg, "i", ""); /* memory leak here */
1194         /* FALLTHRU */
1195       case 'r':
1196         argbuffer[current_arg].arg_type = at_regexp;
1197         argbuffer[current_arg].what = optarg;
1198         ++current_arg;
1199         break;
1200       case 'R':
1201         argbuffer[current_arg].arg_type = at_regexp;
1202         argbuffer[current_arg].what = NULL;
1203         ++current_arg;
1204         break;
1205       case 'V':
1206         print_version ();
1207         break;
1208       case 'h':
1209       case 'H':
1210         help_asked = TRUE;
1211         break;
1212
1213         /* Etags options */
1214       case 'D': constantypedefs = FALSE;                        break;
1215       case 'i': included_files[nincluded_files++] = optarg;     break;
1216
1217         /* Ctags options. */
1218       case 'B': searchar = '?';                                 break;
1219       case 'd': constantypedefs = TRUE;                         break;
1220       case 't': typedefs = TRUE;                                break;
1221       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1222       case 'u': update = TRUE;                                  break;
1223       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1224       case 'x': cxref_style = TRUE;                             break;
1225       case 'w': no_warnings = TRUE;                             break;
1226       default:
1227         suggest_asking_for_help ();
1228         /* NOTREACHED */
1229       }
1230
1231   /* No more options.  Store the rest of arguments. */
1232   for (; optind < argc; optind++)
1233     {
1234       argbuffer[current_arg].arg_type = at_filename;
1235       argbuffer[current_arg].what = argv[optind];
1236       ++current_arg;
1237       ++file_count;
1238     }
1239
1240   argbuffer[current_arg].arg_type = at_end;
1241
1242   if (help_asked)
1243     print_help (argbuffer);
1244     /* NOTREACHED */
1245
1246   if (nincluded_files == 0 && file_count == 0)
1247     {
1248       error ("no input files specified.", (char *)NULL);
1249       suggest_asking_for_help ();
1250       /* NOTREACHED */
1251     }
1252
1253   if (tagfile == NULL)
1254     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1255   cwd = etags_getcwd ();        /* the current working directory */
1256   if (cwd[strlen (cwd) - 1] != '/')
1257     {
1258       char *oldcwd = cwd;
1259       cwd = concat (oldcwd, "/", "");
1260       free (oldcwd);
1261     }
1262
1263   /* Compute base directory for relative file names. */
1264   if (streq (tagfile, "-")
1265       || strneq (tagfile, "/dev/", 5))
1266     tagfiledir = cwd;            /* relative file names are relative to cwd */
1267   else
1268     {
1269       canonicalize_filename (tagfile);
1270       tagfiledir = absolute_dirname (tagfile, cwd);
1271     }
1272
1273   init ();                      /* set up boolean "functions" */
1274
1275   linebuffer_init (&lb);
1276   linebuffer_init (&filename_lb);
1277   linebuffer_init (&filebuf);
1278   linebuffer_init (&token_name);
1279
1280   if (!CTAGS)
1281     {
1282       if (streq (tagfile, "-"))
1283         {
1284           tagf = stdout;
1285 #ifdef DOS_NT
1286           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1287              doesn't take effect until after `stdout' is already open). */
1288           if (!isatty (fileno (stdout)))
1289             setmode (fileno (stdout), O_BINARY);
1290 #endif /* DOS_NT */
1291         }
1292       else
1293         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1294       if (tagf == NULL)
1295         pfatal (tagfile);
1296     }
1297
1298   /*
1299    * Loop through files finding functions.
1300    */
1301   for (i = 0; i < current_arg; i++)
1302     {
1303       static language *lang;    /* non-NULL if language is forced */
1304       char *this_file;
1305
1306       switch (argbuffer[i].arg_type)
1307         {
1308         case at_language:
1309           lang = argbuffer[i].lang;
1310           break;
1311         case at_regexp:
1312           analyse_regex (argbuffer[i].what);
1313           break;
1314         case at_filename:
1315               this_file = argbuffer[i].what;
1316               /* Input file named "-" means read file names from stdin
1317                  (one per line) and use them. */
1318               if (streq (this_file, "-"))
1319                 {
1320                   if (parsing_stdin)
1321                     fatal ("cannot parse standard input AND read file names from it",
1322                            (char *)NULL);
1323                   while (readline_internal (&filename_lb, stdin) > 0)
1324                     process_file_name (filename_lb.buffer, lang);
1325                 }
1326               else
1327                 process_file_name (this_file, lang);
1328           break;
1329         case at_stdin:
1330           this_file = argbuffer[i].what;
1331           process_file (stdin, this_file, lang);
1332           break;
1333         }
1334     }
1335
1336   free_regexps ();
1337   free (lb.buffer);
1338   free (filebuf.buffer);
1339   free (token_name.buffer);
1340
1341   if (!CTAGS || cxref_style)
1342     {
1343       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1344       put_entries (nodehead);
1345       free_tree (nodehead);
1346       nodehead = NULL;
1347       if (!CTAGS)
1348         {
1349           fdesc *fdp;
1350
1351           /* Output file entries that have no tags. */
1352           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1353             if (!fdp->written)
1354               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1355
1356           while (nincluded_files-- > 0)
1357             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1358
1359           if (fclose (tagf) == EOF)
1360             pfatal (tagfile);
1361         }
1362
1363       exit (EXIT_SUCCESS);
1364     }
1365
1366   /* From here on, we are in (CTAGS && !cxref_style) */
1367   if (update)
1368     {
1369       char cmd[BUFSIZ];
1370       for (i = 0; i < current_arg; ++i)
1371         {
1372           switch (argbuffer[i].arg_type)
1373             {
1374             case at_filename:
1375             case at_stdin:
1376               break;
1377             default:
1378               continue;         /* the for loop */
1379             }
1380           sprintf (cmd,
1381                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1382                    tagfile, argbuffer[i].what, tagfile);
1383           if (system (cmd) != EXIT_SUCCESS)
1384             fatal ("failed to execute shell command", (char *)NULL);
1385         }
1386       append_to_tagfile = TRUE;
1387     }
1388
1389   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1390   if (tagf == NULL)
1391     pfatal (tagfile);
1392   put_entries (nodehead);       /* write all the tags (CTAGS) */
1393   free_tree (nodehead);
1394   nodehead = NULL;
1395   if (fclose (tagf) == EOF)
1396     pfatal (tagfile);
1397
1398   if (CTAGS)
1399     if (append_to_tagfile || update)
1400       {
1401         char cmd[2*BUFSIZ+20];
1402         /* Maybe these should be used:
1403            setenv ("LC_COLLATE", "C", 1);
1404            setenv ("LC_ALL", "C", 1); */
1405         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1406         exit (system (cmd));
1407       }
1408   return EXIT_SUCCESS;
1409 }
1410
1411
1412 /*
1413  * Return a compressor given the file name.  If EXTPTR is non-zero,
1414  * return a pointer into FILE where the compressor-specific
1415  * extension begins.  If no compressor is found, NULL is returned
1416  * and EXTPTR is not significant.
1417  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1418  */
1419 static compressor *
1420 get_compressor_from_suffix (file, extptr)
1421      char *file;
1422      char **extptr;
1423 {
1424   compressor *compr;
1425   char *slash, *suffix;
1426
1427   /* File has been processed by canonicalize_filename,
1428      so we don't need to consider backslashes on DOS_NT.  */
1429   slash = etags_strrchr (file, '/');
1430   suffix = etags_strrchr (file, '.');
1431   if (suffix == NULL || suffix < slash)
1432     return NULL;
1433   if (extptr != NULL)
1434     *extptr = suffix;
1435   suffix += 1;
1436   /* Let those poor souls who live with DOS 8+3 file name limits get
1437      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1438      Only the first do loop is run if not MSDOS */
1439   do
1440     {
1441       for (compr = compressors; compr->suffix != NULL; compr++)
1442         if (streq (compr->suffix, suffix))
1443           return compr;
1444       if (!MSDOS)
1445         break;                  /* do it only once: not really a loop */
1446       if (extptr != NULL)
1447         *extptr = ++suffix;
1448     } while (*suffix != '\0');
1449   return NULL;
1450 }
1451
1452
1453
1454 /*
1455  * Return a language given the name.
1456  */
1457 static language *
1458 get_language_from_langname (name)
1459      const char *name;
1460 {
1461   language *lang;
1462
1463   if (name == NULL)
1464     error ("empty language name", (char *)NULL);
1465   else
1466     {
1467       for (lang = lang_names; lang->name != NULL; lang++)
1468         if (streq (name, lang->name))
1469           return lang;
1470       error ("unknown language \"%s\"", name);
1471     }
1472
1473   return NULL;
1474 }
1475
1476
1477 /*
1478  * Return a language given the interpreter name.
1479  */
1480 static language *
1481 get_language_from_interpreter (interpreter)
1482      char *interpreter;
1483 {
1484   language *lang;
1485   char **iname;
1486
1487   if (interpreter == NULL)
1488     return NULL;
1489   for (lang = lang_names; lang->name != NULL; lang++)
1490     if (lang->interpreters != NULL)
1491       for (iname = lang->interpreters; *iname != NULL; iname++)
1492         if (streq (*iname, interpreter))
1493             return lang;
1494
1495   return NULL;
1496 }
1497
1498
1499
1500 /*
1501  * Return a language given the file name.
1502  */
1503 static language *
1504 get_language_from_filename (file, case_sensitive)
1505      char *file;
1506      bool case_sensitive;
1507 {
1508   language *lang;
1509   char **name, **ext, *suffix;
1510
1511   /* Try whole file name first. */
1512   for (lang = lang_names; lang->name != NULL; lang++)
1513     if (lang->filenames != NULL)
1514       for (name = lang->filenames; *name != NULL; name++)
1515         if ((case_sensitive)
1516             ? streq (*name, file)
1517             : strcaseeq (*name, file))
1518           return lang;
1519
1520   /* If not found, try suffix after last dot. */
1521   suffix = etags_strrchr (file, '.');
1522   if (suffix == NULL)
1523     return NULL;
1524   suffix += 1;
1525   for (lang = lang_names; lang->name != NULL; lang++)
1526     if (lang->suffixes != NULL)
1527       for (ext = lang->suffixes; *ext != NULL; ext++)
1528         if ((case_sensitive)
1529             ? streq (*ext, suffix)
1530             : strcaseeq (*ext, suffix))
1531           return lang;
1532   return NULL;
1533 }
1534
1535 \f
1536 /*
1537  * This routine is called on each file argument.
1538  */
1539 static void
1540 process_file_name (file, lang)
1541      char *file;
1542      language *lang;
1543 {
1544   struct stat stat_buf;
1545   FILE *inf;
1546   fdesc *fdp;
1547   compressor *compr;
1548   char *compressed_name, *uncompressed_name;
1549   char *ext, *real_name;
1550   int retval;
1551
1552   canonicalize_filename (file);
1553   if (streq (file, tagfile) && !streq (tagfile, "-"))
1554     {
1555       error ("skipping inclusion of %s in self.", file);
1556       return;
1557     }
1558   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1559     {
1560       compressed_name = NULL;
1561       real_name = uncompressed_name = savestr (file);
1562     }
1563   else
1564     {
1565       real_name = compressed_name = savestr (file);
1566       uncompressed_name = savenstr (file, ext - file);
1567     }
1568
1569   /* If the canonicalized uncompressed name
1570      has already been dealt with, skip it silently. */
1571   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1572     {
1573       assert (fdp->infname != NULL);
1574       if (streq (uncompressed_name, fdp->infname))
1575         goto cleanup;
1576     }
1577
1578   if (stat (real_name, &stat_buf) != 0)
1579     {
1580       /* Reset real_name and try with a different name. */
1581       real_name = NULL;
1582       if (compressed_name != NULL) /* try with the given suffix */
1583         {
1584           if (stat (uncompressed_name, &stat_buf) == 0)
1585             real_name = uncompressed_name;
1586         }
1587       else                      /* try all possible suffixes */
1588         {
1589           for (compr = compressors; compr->suffix != NULL; compr++)
1590             {
1591               compressed_name = concat (file, ".", compr->suffix);
1592               if (stat (compressed_name, &stat_buf) != 0)
1593                 {
1594                   if (MSDOS)
1595                     {
1596                       char *suf = compressed_name + strlen (file);
1597                       size_t suflen = strlen (compr->suffix) + 1;
1598                       for ( ; suf[1]; suf++, suflen--)
1599                         {
1600                           memmove (suf, suf + 1, suflen);
1601                           if (stat (compressed_name, &stat_buf) == 0)
1602                             {
1603                               real_name = compressed_name;
1604                               break;
1605                             }
1606                         }
1607                       if (real_name != NULL)
1608                         break;
1609                     } /* MSDOS */
1610                   free (compressed_name);
1611                   compressed_name = NULL;
1612                 }
1613               else
1614                 {
1615                   real_name = compressed_name;
1616                   break;
1617                 }
1618             }
1619         }
1620       if (real_name == NULL)
1621         {
1622           perror (file);
1623           goto cleanup;
1624         }
1625     } /* try with a different name */
1626
1627   if (!S_ISREG (stat_buf.st_mode))
1628     {
1629       error ("skipping %s: it is not a regular file.", real_name);
1630       goto cleanup;
1631     }
1632   if (real_name == compressed_name)
1633     {
1634       char *cmd = concat (compr->command, " ", real_name);
1635       inf = (FILE *) popen (cmd, "r");
1636       free (cmd);
1637     }
1638   else
1639     inf = fopen (real_name, "r");
1640   if (inf == NULL)
1641     {
1642       perror (real_name);
1643       goto cleanup;
1644     }
1645
1646   process_file (inf, uncompressed_name, lang);
1647
1648   if (real_name == compressed_name)
1649     retval = pclose (inf);
1650   else
1651     retval = fclose (inf);
1652   if (retval < 0)
1653     pfatal (file);
1654
1655  cleanup:
1656   free (compressed_name);
1657   free (uncompressed_name);
1658   last_node = NULL;
1659   curfdp = NULL;
1660   return;
1661 }
1662
1663 static void
1664 process_file (fh, fn, lang)
1665      FILE *fh;
1666      char *fn;
1667      language *lang;
1668 {
1669   static const fdesc emptyfdesc;
1670   fdesc *fdp;
1671
1672   /* Create a new input file description entry. */
1673   fdp = xnew (1, fdesc);
1674   *fdp = emptyfdesc;
1675   fdp->next = fdhead;
1676   fdp->infname = savestr (fn);
1677   fdp->lang = lang;
1678   fdp->infabsname = absolute_filename (fn, cwd);
1679   fdp->infabsdir = absolute_dirname (fn, cwd);
1680   if (filename_is_absolute (fn))
1681     {
1682       /* An absolute file name.  Canonicalize it. */
1683       fdp->taggedfname = absolute_filename (fn, NULL);
1684     }
1685   else
1686     {
1687       /* A file name relative to cwd.  Make it relative
1688          to the directory of the tags file. */
1689       fdp->taggedfname = relative_filename (fn, tagfiledir);
1690     }
1691   fdp->usecharno = TRUE;        /* use char position when making tags */
1692   fdp->prop = NULL;
1693   fdp->written = FALSE;         /* not written on tags file yet */
1694
1695   fdhead = fdp;
1696   curfdp = fdhead;              /* the current file description */
1697
1698   find_entries (fh);
1699
1700   /* If not Ctags, and if this is not metasource and if it contained no #line
1701      directives, we can write the tags and free all nodes pointing to
1702      curfdp. */
1703   if (!CTAGS
1704       && curfdp->usecharno      /* no #line directives in this file */
1705       && !curfdp->lang->metasource)
1706     {
1707       node *np, *prev;
1708
1709       /* Look for the head of the sublist relative to this file.  See add_node
1710          for the structure of the node tree. */
1711       prev = NULL;
1712       for (np = nodehead; np != NULL; prev = np, np = np->left)
1713         if (np->fdp == curfdp)
1714           break;
1715
1716       /* If we generated tags for this file, write and delete them. */
1717       if (np != NULL)
1718         {
1719           /* This is the head of the last sublist, if any.  The following
1720              instructions depend on this being true. */
1721           assert (np->left == NULL);
1722
1723           assert (fdhead == curfdp);
1724           assert (last_node->fdp == curfdp);
1725           put_entries (np);     /* write tags for file curfdp->taggedfname */
1726           free_tree (np);       /* remove the written nodes */
1727           if (prev == NULL)
1728             nodehead = NULL;    /* no nodes left */
1729           else
1730             prev->left = NULL;  /* delete the pointer to the sublist */
1731         }
1732     }
1733 }
1734
1735 /*
1736  * This routine sets up the boolean pseudo-functions which work
1737  * by setting boolean flags dependent upon the corresponding character.
1738  * Every char which is NOT in that string is not a white char.  Therefore,
1739  * all of the array "_wht" is set to FALSE, and then the elements
1740  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1741  * of a char is TRUE if it is the string "white", else FALSE.
1742  */
1743 static void
1744 init ()
1745 {
1746   register char *sp;
1747   register int i;
1748
1749   for (i = 0; i < CHARS; i++)
1750     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1751   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1752   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1753   notinname('\0') = notinname('\n');
1754   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1755   begtoken('\0') = begtoken('\n');
1756   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1757   intoken('\0') = intoken('\n');
1758   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1759   endtoken('\0') = endtoken('\n');
1760 }
1761
1762 /*
1763  * This routine opens the specified file and calls the function
1764  * which finds the function and type definitions.
1765  */
1766 static void
1767 find_entries (inf)
1768      FILE *inf;
1769 {
1770   char *cp;
1771   language *lang = curfdp->lang;
1772   Lang_function *parser = NULL;
1773
1774   /* If user specified a language, use it. */
1775   if (lang != NULL && lang->function != NULL)
1776     {
1777       parser = lang->function;
1778     }
1779
1780   /* Else try to guess the language given the file name. */
1781   if (parser == NULL)
1782     {
1783       lang = get_language_from_filename (curfdp->infname, TRUE);
1784       if (lang != NULL && lang->function != NULL)
1785         {
1786           curfdp->lang = lang;
1787           parser = lang->function;
1788         }
1789     }
1790
1791   /* Else look for sharp-bang as the first two characters. */
1792   if (parser == NULL
1793       && readline_internal (&lb, inf) > 0
1794       && lb.len >= 2
1795       && lb.buffer[0] == '#'
1796       && lb.buffer[1] == '!')
1797     {
1798       char *lp;
1799
1800       /* Set lp to point at the first char after the last slash in the
1801          line or, if no slashes, at the first nonblank.  Then set cp to
1802          the first successive blank and terminate the string. */
1803       lp = etags_strrchr (lb.buffer+2, '/');
1804       if (lp != NULL)
1805         lp += 1;
1806       else
1807         lp = skip_spaces (lb.buffer + 2);
1808       cp = skip_non_spaces (lp);
1809       *cp = '\0';
1810
1811       if (strlen (lp) > 0)
1812         {
1813           lang = get_language_from_interpreter (lp);
1814           if (lang != NULL && lang->function != NULL)
1815             {
1816               curfdp->lang = lang;
1817               parser = lang->function;
1818             }
1819         }
1820     }
1821
1822   /* We rewind here, even if inf may be a pipe.  We fail if the
1823      length of the first line is longer than the pipe block size,
1824      which is unlikely. */
1825   rewind (inf);
1826
1827   /* Else try to guess the language given the case insensitive file name. */
1828   if (parser == NULL)
1829     {
1830       lang = get_language_from_filename (curfdp->infname, FALSE);
1831       if (lang != NULL && lang->function != NULL)
1832         {
1833           curfdp->lang = lang;
1834           parser = lang->function;
1835         }
1836     }
1837
1838   /* Else try Fortran or C. */
1839   if (parser == NULL)
1840     {
1841       node *old_last_node = last_node;
1842
1843       curfdp->lang = get_language_from_langname ("fortran");
1844       find_entries (inf);
1845
1846       if (old_last_node == last_node)
1847         /* No Fortran entries found.  Try C. */
1848         {
1849           /* We do not tag if rewind fails.
1850              Only the file name will be recorded in the tags file. */
1851           rewind (inf);
1852           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1853           find_entries (inf);
1854         }
1855       return;
1856     }
1857
1858   if (!no_line_directive
1859       && curfdp->lang != NULL && curfdp->lang->metasource)
1860     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1861        file, or anyway we parsed a file that is automatically generated from
1862        this one.  If this is the case, the bingo.c file contained #line
1863        directives that generated tags pointing to this file.  Let's delete
1864        them all before parsing this file, which is the real source. */
1865     {
1866       fdesc **fdpp = &fdhead;
1867       while (*fdpp != NULL)
1868         if (*fdpp != curfdp
1869             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1870           /* We found one of those!  We must delete both the file description
1871              and all tags referring to it. */
1872           {
1873             fdesc *badfdp = *fdpp;
1874
1875             /* Delete the tags referring to badfdp->taggedfname
1876                that were obtained from badfdp->infname. */
1877             invalidate_nodes (badfdp, &nodehead);
1878
1879             *fdpp = badfdp->next; /* remove the bad description from the list */
1880             free_fdesc (badfdp);
1881           }
1882         else
1883           fdpp = &(*fdpp)->next; /* advance the list pointer */
1884     }
1885
1886   assert (parser != NULL);
1887
1888   /* Generic initialisations before reading from file. */
1889   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1890
1891   /* Generic initialisations before parsing file with readline. */
1892   lineno = 0;                  /* reset global line number */
1893   charno = 0;                  /* reset global char number */
1894   linecharno = 0;              /* reset global char number of line start */
1895
1896   parser (inf);
1897
1898   regex_tag_multiline ();
1899 }
1900
1901 \f
1902 /*
1903  * Check whether an implicitly named tag should be created,
1904  * then call `pfnote'.
1905  * NAME is a string that is internally copied by this function.
1906  *
1907  * TAGS format specification
1908  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1909  * The following is explained in some more detail in etc/ETAGS.EBNF.
1910  *
1911  * make_tag creates tags with "implicit tag names" (unnamed tags)
1912  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1913  *  1. NAME does not contain any of the characters in NONAM;
1914  *  2. LINESTART contains name as either a rightmost, or rightmost but
1915  *     one character, substring;
1916  *  3. the character, if any, immediately before NAME in LINESTART must
1917  *     be a character in NONAM;
1918  *  4. the character, if any, immediately after NAME in LINESTART must
1919  *     also be a character in NONAM.
1920  *
1921  * The implementation uses the notinname() macro, which recognises the
1922  * characters stored in the string `nonam'.
1923  * etags.el needs to use the same characters that are in NONAM.
1924  */
1925 static void
1926 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1927      char *name;                /* tag name, or NULL if unnamed */
1928      int namelen;               /* tag length */
1929      bool is_func;              /* tag is a function */
1930      char *linestart;           /* start of the line where tag is */
1931      int linelen;               /* length of the line where tag is */
1932      int lno;                   /* line number */
1933      long cno;                  /* character number */
1934 {
1935   bool named = (name != NULL && namelen > 0);
1936
1937   if (!CTAGS && named)          /* maybe set named to false */
1938     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1939        such that etags.el can guess a name from it. */
1940     {
1941       int i;
1942       register char *cp = name;
1943
1944       for (i = 0; i < namelen; i++)
1945         if (notinname (*cp++))
1946           break;
1947       if (i == namelen)                         /* rule #1 */
1948         {
1949           cp = linestart + linelen - namelen;
1950           if (notinname (linestart[linelen-1]))
1951             cp -= 1;                            /* rule #4 */
1952           if (cp >= linestart                   /* rule #2 */
1953               && (cp == linestart
1954                   || notinname (cp[-1]))        /* rule #3 */
1955               && strneq (name, cp, namelen))    /* rule #2 */
1956             named = FALSE;      /* use implicit tag name */
1957         }
1958     }
1959
1960   if (named)
1961     name = savenstr (name, namelen);
1962   else
1963     name = NULL;
1964   pfnote (name, is_func, linestart, linelen, lno, cno);
1965 }
1966
1967 /* Record a tag. */
1968 static void
1969 pfnote (name, is_func, linestart, linelen, lno, cno)
1970      char *name;                /* tag name, or NULL if unnamed */
1971      bool is_func;              /* tag is a function */
1972      char *linestart;           /* start of the line where tag is */
1973      int linelen;               /* length of the line where tag is */
1974      int lno;                   /* line number */
1975      long cno;                  /* character number */
1976 {
1977   register node *np;
1978
1979   assert (name == NULL || name[0] != '\0');
1980   if (CTAGS && name == NULL)
1981     return;
1982
1983   np = xnew (1, node);
1984
1985   /* If ctags mode, change name "main" to M<thisfilename>. */
1986   if (CTAGS && !cxref_style && streq (name, "main"))
1987     {
1988       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1989       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1990       fp = etags_strrchr (np->name, '.');
1991       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1992         fp[0] = '\0';
1993     }
1994   else
1995     np->name = name;
1996   np->valid = TRUE;
1997   np->been_warned = FALSE;
1998   np->fdp = curfdp;
1999   np->is_func = is_func;
2000   np->lno = lno;
2001   if (np->fdp->usecharno)
2002     /* Our char numbers are 0-base, because of C language tradition?
2003        ctags compatibility?  old versions compatibility?   I don't know.
2004        Anyway, since emacs's are 1-base we expect etags.el to take care
2005        of the difference.  If we wanted to have 1-based numbers, we would
2006        uncomment the +1 below. */
2007     np->cno = cno /* + 1 */ ;
2008   else
2009     np->cno = invalidcharno;
2010   np->left = np->right = NULL;
2011   if (CTAGS && !cxref_style)
2012     {
2013       if (strlen (linestart) < 50)
2014         np->regex = concat (linestart, "$", "");
2015       else
2016         np->regex = savenstr (linestart, 50);
2017     }
2018   else
2019     np->regex = savenstr (linestart, linelen);
2020
2021   add_node (np, &nodehead);
2022 }
2023
2024 /*
2025  * free_tree ()
2026  *      recurse on left children, iterate on right children.
2027  */
2028 static void
2029 free_tree (np)
2030      register node *np;
2031 {
2032   while (np)
2033     {
2034       register node *node_right = np->right;
2035       free_tree (np->left);
2036       free (np->name);
2037       free (np->regex);
2038       free (np);
2039       np = node_right;
2040     }
2041 }
2042
2043 /*
2044  * free_fdesc ()
2045  *      delete a file description
2046  */
2047 static void
2048 free_fdesc (fdp)
2049      register fdesc *fdp;
2050 {
2051   free (fdp->infname);
2052   free (fdp->infabsname);
2053   free (fdp->infabsdir);
2054   free (fdp->taggedfname);
2055   free (fdp->prop);
2056   free (fdp);
2057 }
2058
2059 /*
2060  * add_node ()
2061  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2062  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2063  *      balancing.
2064  *
2065  *      add_node is the only function allowed to add nodes, so it can
2066  *      maintain state.
2067  */
2068 static void
2069 add_node (np, cur_node_p)
2070      node *np, **cur_node_p;
2071 {
2072   register int dif;
2073   register node *cur_node = *cur_node_p;
2074
2075   if (cur_node == NULL)
2076     {
2077       *cur_node_p = np;
2078       last_node = np;
2079       return;
2080     }
2081
2082   if (!CTAGS)
2083     /* Etags Mode */
2084     {
2085       /* For each file name, tags are in a linked sublist on the right
2086          pointer.  The first tags of different files are a linked list
2087          on the left pointer.  last_node points to the end of the last
2088          used sublist. */
2089       if (last_node != NULL && last_node->fdp == np->fdp)
2090         {
2091           /* Let's use the same sublist as the last added node. */
2092           assert (last_node->right == NULL);
2093           last_node->right = np;
2094           last_node = np;
2095         }
2096       else if (cur_node->fdp == np->fdp)
2097         {
2098           /* Scanning the list we found the head of a sublist which is
2099              good for us.  Let's scan this sublist. */
2100           add_node (np, &cur_node->right);
2101         }
2102       else
2103         /* The head of this sublist is not good for us.  Let's try the
2104            next one. */
2105         add_node (np, &cur_node->left);
2106     } /* if ETAGS mode */
2107
2108   else
2109     {
2110       /* Ctags Mode */
2111       dif = strcmp (np->name, cur_node->name);
2112
2113       /*
2114        * If this tag name matches an existing one, then
2115        * do not add the node, but maybe print a warning.
2116        */
2117       if (no_duplicates && !dif)
2118         {
2119           if (np->fdp == cur_node->fdp)
2120             {
2121               if (!no_warnings)
2122                 {
2123                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2124                            np->fdp->infname, lineno, np->name);
2125                   fprintf (stderr, "Second entry ignored\n");
2126                 }
2127             }
2128           else if (!cur_node->been_warned && !no_warnings)
2129             {
2130               fprintf
2131                 (stderr,
2132                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2133                  np->fdp->infname, cur_node->fdp->infname, np->name);
2134               cur_node->been_warned = TRUE;
2135             }
2136           return;
2137         }
2138
2139       /* Actually add the node */
2140       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2141     } /* if CTAGS mode */
2142 }
2143
2144 /*
2145  * invalidate_nodes ()
2146  *      Scan the node tree and invalidate all nodes pointing to the
2147  *      given file description (CTAGS case) or free them (ETAGS case).
2148  */
2149 static void
2150 invalidate_nodes (badfdp, npp)
2151      fdesc *badfdp;
2152      node **npp;
2153 {
2154   node *np = *npp;
2155
2156   if (np == NULL)
2157     return;
2158
2159   if (CTAGS)
2160     {
2161       if (np->left != NULL)
2162         invalidate_nodes (badfdp, &np->left);
2163       if (np->fdp == badfdp)
2164         np->valid = FALSE;
2165       if (np->right != NULL)
2166         invalidate_nodes (badfdp, &np->right);
2167     }
2168   else
2169     {
2170       assert (np->fdp != NULL);
2171       if (np->fdp == badfdp)
2172         {
2173           *npp = np->left;      /* detach the sublist from the list */
2174           np->left = NULL;      /* isolate it */
2175           free_tree (np);       /* free it */
2176           invalidate_nodes (badfdp, npp);
2177         }
2178       else
2179         invalidate_nodes (badfdp, &np->left);
2180     }
2181 }
2182
2183 \f
2184 static int total_size_of_entries __P((node *));
2185 static int number_len __P((long));
2186
2187 /* Length of a non-negative number's decimal representation. */
2188 static int
2189 number_len (num)
2190      long num;
2191 {
2192   int len = 1;
2193   while ((num /= 10) > 0)
2194     len += 1;
2195   return len;
2196 }
2197
2198 /*
2199  * Return total number of characters that put_entries will output for
2200  * the nodes in the linked list at the right of the specified node.
2201  * This count is irrelevant with etags.el since emacs 19.34 at least,
2202  * but is still supplied for backward compatibility.
2203  */
2204 static int
2205 total_size_of_entries (np)
2206      register node *np;
2207 {
2208   register int total = 0;
2209
2210   for (; np != NULL; np = np->right)
2211     if (np->valid)
2212       {
2213         total += strlen (np->regex) + 1;                /* pat\177 */
2214         if (np->name != NULL)
2215           total += strlen (np->name) + 1;               /* name\001 */
2216         total += number_len ((long) np->lno) + 1;       /* lno, */
2217         if (np->cno != invalidcharno)                   /* cno */
2218           total += number_len (np->cno);
2219         total += 1;                                     /* newline */
2220       }
2221
2222   return total;
2223 }
2224
2225 static void
2226 put_entries (np)
2227      register node *np;
2228 {
2229   register char *sp;
2230   static fdesc *fdp = NULL;
2231
2232   if (np == NULL)
2233     return;
2234
2235   /* Output subentries that precede this one */
2236   if (CTAGS)
2237     put_entries (np->left);
2238
2239   /* Output this entry */
2240   if (np->valid)
2241     {
2242       if (!CTAGS)
2243         {
2244           /* Etags mode */
2245           if (fdp != np->fdp)
2246             {
2247               fdp = np->fdp;
2248               fprintf (tagf, "\f\n%s,%d\n",
2249                        fdp->taggedfname, total_size_of_entries (np));
2250               fdp->written = TRUE;
2251             }
2252           fputs (np->regex, tagf);
2253           fputc ('\177', tagf);
2254           if (np->name != NULL)
2255             {
2256               fputs (np->name, tagf);
2257               fputc ('\001', tagf);
2258             }
2259           fprintf (tagf, "%d,", np->lno);
2260           if (np->cno != invalidcharno)
2261             fprintf (tagf, "%ld", np->cno);
2262           fputs ("\n", tagf);
2263         }
2264       else
2265         {
2266           /* Ctags mode */
2267           if (np->name == NULL)
2268             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2269
2270           if (cxref_style)
2271             {
2272               if (vgrind_style)
2273                 fprintf (stdout, "%s %s %d\n",
2274                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2275               else
2276                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2277                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2278             }
2279           else
2280             {
2281               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2282
2283               if (np->is_func)
2284                 {               /* function or #define macro with args */
2285                   putc (searchar, tagf);
2286                   putc ('^', tagf);
2287
2288                   for (sp = np->regex; *sp; sp++)
2289                     {
2290                       if (*sp == '\\' || *sp == searchar)
2291                         putc ('\\', tagf);
2292                       putc (*sp, tagf);
2293                     }
2294                   putc (searchar, tagf);
2295                 }
2296               else
2297                 {               /* anything else; text pattern inadequate */
2298                   fprintf (tagf, "%d", np->lno);
2299                 }
2300               putc ('\n', tagf);
2301             }
2302         }
2303     } /* if this node contains a valid tag */
2304
2305   /* Output subentries that follow this one */
2306   put_entries (np->right);
2307   if (!CTAGS)
2308     put_entries (np->left);
2309 }
2310
2311 \f
2312 /* C extensions. */
2313 #define C_EXT   0x00fff         /* C extensions */
2314 #define C_PLAIN 0x00000         /* C */
2315 #define C_PLPL  0x00001         /* C++ */
2316 #define C_STAR  0x00003         /* C* */
2317 #define C_JAVA  0x00005         /* JAVA */
2318 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2319 #define YACC    0x10000         /* yacc file */
2320
2321 /*
2322  * The C symbol tables.
2323  */
2324 enum sym_type
2325 {
2326   st_none,
2327   st_C_objprot, st_C_objimpl, st_C_objend,
2328   st_C_gnumacro,
2329   st_C_ignore, st_C_attribute,
2330   st_C_javastruct,
2331   st_C_operator,
2332   st_C_class, st_C_template,
2333   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2334 };
2335
2336 static unsigned int hash __P((const char *, unsigned int));
2337 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2338 static enum sym_type C_symtype __P((char *, int, int));
2339
2340 /* Feed stuff between (but not including) %[ and %] lines to:
2341      gperf -m 5
2342 %[
2343 %compare-strncmp
2344 %enum
2345 %struct-type
2346 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2347 %%
2348 if,             0,                      st_C_ignore
2349 for,            0,                      st_C_ignore
2350 while,          0,                      st_C_ignore
2351 switch,         0,                      st_C_ignore
2352 return,         0,                      st_C_ignore
2353 __attribute__,  0,                      st_C_attribute
2354 GTY,            0,                      st_C_attribute
2355 @interface,     0,                      st_C_objprot
2356 @protocol,      0,                      st_C_objprot
2357 @implementation,0,                      st_C_objimpl
2358 @end,           0,                      st_C_objend
2359 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2360 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2361 friend,         C_PLPL,                 st_C_ignore
2362 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2363 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2364 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2365 class,          0,                      st_C_class
2366 namespace,      C_PLPL,                 st_C_struct
2367 domain,         C_STAR,                 st_C_struct
2368 union,          0,                      st_C_struct
2369 struct,         0,                      st_C_struct
2370 extern,         0,                      st_C_extern
2371 enum,           0,                      st_C_enum
2372 typedef,        0,                      st_C_typedef
2373 define,         0,                      st_C_define
2374 undef,          0,                      st_C_define
2375 operator,       C_PLPL,                 st_C_operator
2376 template,       0,                      st_C_template
2377 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2378 DEFUN,          0,                      st_C_gnumacro
2379 SYSCALL,        0,                      st_C_gnumacro
2380 ENTRY,          0,                      st_C_gnumacro
2381 PSEUDO,         0,                      st_C_gnumacro
2382 # These are defined inside C functions, so currently they are not met.
2383 # EXFUN used in glibc, DEFVAR_* in emacs.
2384 #EXFUN,         0,                      st_C_gnumacro
2385 #DEFVAR_,       0,                      st_C_gnumacro
2386 %]
2387 and replace lines between %< and %> with its output, then:
2388  - remove the #if characterset check
2389  - make in_word_set static and not inline. */
2390 /*%<*/
2391 /* C code produced by gperf version 3.0.1 */
2392 /* Command-line: gperf -m 5  */
2393 /* Computed positions: -k'2-3' */
2394
2395 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2396 /* maximum key range = 33, duplicates = 0 */
2397
2398 #ifdef __GNUC__
2399 __inline
2400 #else
2401 #ifdef __cplusplus
2402 inline
2403 #endif
2404 #endif
2405 static unsigned int
2406 hash (str, len)
2407      register const char *str;
2408      register unsigned int len;
2409 {
2410   static unsigned char asso_values[] =
2411     {
2412       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2413       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2414       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2415       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2416       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2417       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2418       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2419       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2420       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2421       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2422       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2423        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2424        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2425       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2426       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2427       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2428       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2429       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2430       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2431       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2432       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2433       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2434       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2435       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2436       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2437       35, 35, 35, 35, 35, 35
2438     };
2439   register int hval = len;
2440
2441   switch (hval)
2442     {
2443       default:
2444         hval += asso_values[(unsigned char)str[2]];
2445       /*FALLTHROUGH*/
2446       case 2:
2447         hval += asso_values[(unsigned char)str[1]];
2448         break;
2449     }
2450   return hval;
2451 }
2452
2453 static struct C_stab_entry *
2454 in_word_set (str, len)
2455      register const char *str;
2456      register unsigned int len;
2457 {
2458   enum
2459     {
2460       TOTAL_KEYWORDS = 33,
2461       MIN_WORD_LENGTH = 2,
2462       MAX_WORD_LENGTH = 15,
2463       MIN_HASH_VALUE = 2,
2464       MAX_HASH_VALUE = 34
2465     };
2466
2467   static struct C_stab_entry wordlist[] =
2468     {
2469       {""}, {""},
2470       {"if",            0,                      st_C_ignore},
2471       {"GTY",           0,                      st_C_attribute},
2472       {"@end",          0,                      st_C_objend},
2473       {"union",         0,                      st_C_struct},
2474       {"define",                0,                      st_C_define},
2475       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2476       {"template",      0,                      st_C_template},
2477       {"operator",      C_PLPL,                 st_C_operator},
2478       {"@interface",    0,                      st_C_objprot},
2479       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2480       {"friend",                C_PLPL,                 st_C_ignore},
2481       {"typedef",       0,                      st_C_typedef},
2482       {"return",                0,                      st_C_ignore},
2483       {"@implementation",0,                     st_C_objimpl},
2484       {"@protocol",     0,                      st_C_objprot},
2485       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2486       {"extern",                0,                      st_C_extern},
2487       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2488       {"struct",                0,                      st_C_struct},
2489       {"domain",                C_STAR,                 st_C_struct},
2490       {"switch",                0,                      st_C_ignore},
2491       {"enum",          0,                      st_C_enum},
2492       {"for",           0,                      st_C_ignore},
2493       {"namespace",     C_PLPL,                 st_C_struct},
2494       {"class",         0,                      st_C_class},
2495       {"while",         0,                      st_C_ignore},
2496       {"undef",         0,                      st_C_define},
2497       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2498       {"__attribute__", 0,                      st_C_attribute},
2499       {"SYSCALL",       0,                      st_C_gnumacro},
2500       {"ENTRY",         0,                      st_C_gnumacro},
2501       {"PSEUDO",                0,                      st_C_gnumacro},
2502       {"DEFUN",         0,                      st_C_gnumacro}
2503     };
2504
2505   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2506     {
2507       register int key = hash (str, len);
2508
2509       if (key <= MAX_HASH_VALUE && key >= 0)
2510         {
2511           register const char *s = wordlist[key].name;
2512
2513           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2514             return &wordlist[key];
2515         }
2516     }
2517   return 0;
2518 }
2519 /*%>*/
2520
2521 static enum sym_type
2522 C_symtype (str, len, c_ext)
2523      char *str;
2524      int len;
2525      int c_ext;
2526 {
2527   register struct C_stab_entry *se = in_word_set (str, len);
2528
2529   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2530     return st_none;
2531   return se->type;
2532 }
2533
2534 \f
2535 /*
2536  * Ignoring __attribute__ ((list))
2537  */
2538 static bool inattribute;        /* looking at an __attribute__ construct */
2539
2540 /*
2541  * C functions and variables are recognized using a simple
2542  * finite automaton.  fvdef is its state variable.
2543  */
2544 static enum
2545 {
2546   fvnone,                       /* nothing seen */
2547   fdefunkey,                    /* Emacs DEFUN keyword seen */
2548   fdefunname,                   /* Emacs DEFUN name seen */
2549   foperator,                    /* func: operator keyword seen (cplpl) */
2550   fvnameseen,                   /* function or variable name seen */
2551   fstartlist,                   /* func: just after open parenthesis */
2552   finlist,                      /* func: in parameter list */
2553   flistseen,                    /* func: after parameter list */
2554   fignore,                      /* func: before open brace */
2555   vignore                       /* var-like: ignore until ';' */
2556 } fvdef;
2557
2558 static bool fvextern;           /* func or var: extern keyword seen; */
2559
2560 /*
2561  * typedefs are recognized using a simple finite automaton.
2562  * typdef is its state variable.
2563  */
2564 static enum
2565 {
2566   tnone,                        /* nothing seen */
2567   tkeyseen,                     /* typedef keyword seen */
2568   ttypeseen,                    /* defined type seen */
2569   tinbody,                      /* inside typedef body */
2570   tend,                         /* just before typedef tag */
2571   tignore                       /* junk after typedef tag */
2572 } typdef;
2573
2574 /*
2575  * struct-like structures (enum, struct and union) are recognized
2576  * using another simple finite automaton.  `structdef' is its state
2577  * variable.
2578  */
2579 static enum
2580 {
2581   snone,                        /* nothing seen yet,
2582                                    or in struct body if bracelev > 0 */
2583   skeyseen,                     /* struct-like keyword seen */
2584   stagseen,                     /* struct-like tag seen */
2585   scolonseen                    /* colon seen after struct-like tag */
2586 } structdef;
2587
2588 /*
2589  * When objdef is different from onone, objtag is the name of the class.
2590  */
2591 static char *objtag = "<uninited>";
2592
2593 /*
2594  * Yet another little state machine to deal with preprocessor lines.
2595  */
2596 static enum
2597 {
2598   dnone,                        /* nothing seen */
2599   dsharpseen,                   /* '#' seen as first char on line */
2600   ddefineseen,                  /* '#' and 'define' seen */
2601   dignorerest                   /* ignore rest of line */
2602 } definedef;
2603
2604 /*
2605  * State machine for Objective C protocols and implementations.
2606  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2607  */
2608 static enum
2609 {
2610   onone,                        /* nothing seen */
2611   oprotocol,                    /* @interface or @protocol seen */
2612   oimplementation,              /* @implementations seen */
2613   otagseen,                     /* class name seen */
2614   oparenseen,                   /* parenthesis before category seen */
2615   ocatseen,                     /* category name seen */
2616   oinbody,                      /* in @implementation body */
2617   omethodsign,                  /* in @implementation body, after +/- */
2618   omethodtag,                   /* after method name */
2619   omethodcolon,                 /* after method colon */
2620   omethodparm,                  /* after method parameter */
2621   oignore                       /* wait for @end */
2622 } objdef;
2623
2624
2625 /*
2626  * Use this structure to keep info about the token read, and how it
2627  * should be tagged.  Used by the make_C_tag function to build a tag.
2628  */
2629 static struct tok
2630 {
2631   char *line;                   /* string containing the token */
2632   int offset;                   /* where the token starts in LINE */
2633   int length;                   /* token length */
2634   /*
2635     The previous members can be used to pass strings around for generic
2636     purposes.  The following ones specifically refer to creating tags.  In this
2637     case the token contained here is the pattern that will be used to create a
2638     tag.
2639   */
2640   bool valid;                   /* do not create a tag; the token should be
2641                                    invalidated whenever a state machine is
2642                                    reset prematurely */
2643   bool named;                   /* create a named tag */
2644   int lineno;                   /* source line number of tag */
2645   long linepos;                 /* source char number of tag */
2646 } token;                        /* latest token read */
2647
2648 /*
2649  * Variables and functions for dealing with nested structures.
2650  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2651  */
2652 static void pushclass_above __P((int, char *, int));
2653 static void popclass_above __P((int));
2654 static void write_classname __P((linebuffer *, char *qualifier));
2655
2656 static struct {
2657   char **cname;                 /* nested class names */
2658   int *bracelev;                /* nested class brace level */
2659   int nl;                       /* class nesting level (elements used) */
2660   int size;                     /* length of the array */
2661 } cstack;                       /* stack for nested declaration tags */
2662 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2663 #define nestlev         (cstack.nl)
2664 /* After struct keyword or in struct body, not inside a nested function. */
2665 #define instruct        (structdef == snone && nestlev > 0                      \
2666                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2667
2668 static void
2669 pushclass_above (bracelev, str, len)
2670      int bracelev;
2671      char *str;
2672      int len;
2673 {
2674   int nl;
2675
2676   popclass_above (bracelev);
2677   nl = cstack.nl;
2678   if (nl >= cstack.size)
2679     {
2680       int size = cstack.size *= 2;
2681       xrnew (cstack.cname, size, char *);
2682       xrnew (cstack.bracelev, size, int);
2683     }
2684   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2685   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2686   cstack.bracelev[nl] = bracelev;
2687   cstack.nl = nl + 1;
2688 }
2689
2690 static void
2691 popclass_above (bracelev)
2692      int bracelev;
2693 {
2694   int nl;
2695
2696   for (nl = cstack.nl - 1;
2697        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2698        nl--)
2699     {
2700       free (cstack.cname[nl]);
2701       cstack.nl = nl;
2702     }
2703 }
2704
2705 static void
2706 write_classname (cn, qualifier)
2707      linebuffer *cn;
2708      char *qualifier;
2709 {
2710   int i, len;
2711   int qlen = strlen (qualifier);
2712
2713   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2714     {
2715       len = 0;
2716       cn->len = 0;
2717       cn->buffer[0] = '\0';
2718     }
2719   else
2720     {
2721       len = strlen (cstack.cname[0]);
2722       linebuffer_setlen (cn, len);
2723       strcpy (cn->buffer, cstack.cname[0]);
2724     }
2725   for (i = 1; i < cstack.nl; i++)
2726     {
2727       char *s;
2728       int slen;
2729
2730       s = cstack.cname[i];
2731       if (s == NULL)
2732         continue;
2733       slen = strlen (s);
2734       len += slen + qlen;
2735       linebuffer_setlen (cn, len);
2736       strncat (cn->buffer, qualifier, qlen);
2737       strncat (cn->buffer, s, slen);
2738     }
2739 }
2740
2741 \f
2742 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2743 static void make_C_tag __P((bool));
2744
2745 /*
2746  * consider_token ()
2747  *      checks to see if the current token is at the start of a
2748  *      function or variable, or corresponds to a typedef, or
2749  *      is a struct/union/enum tag, or #define, or an enum constant.
2750  *
2751  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2752  *      with args.  C_EXTP points to which language we are looking at.
2753  *
2754  * Globals
2755  *      fvdef                   IN OUT
2756  *      structdef               IN OUT
2757  *      definedef               IN OUT
2758  *      typdef                  IN OUT
2759  *      objdef                  IN OUT
2760  */
2761
2762 static bool
2763 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2764      register char *str;        /* IN: token pointer */
2765      register int len;          /* IN: token length */
2766      register int c;            /* IN: first char after the token */
2767      int *c_extp;               /* IN, OUT: C extensions mask */
2768      int bracelev;              /* IN: brace level */
2769      int parlev;                /* IN: parenthesis level */
2770      bool *is_func_or_var;      /* OUT: function or variable found */
2771 {
2772   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2773      structtype is the type of the preceding struct-like keyword, and
2774      structbracelev is the brace level where it has been seen. */
2775   static enum sym_type structtype;
2776   static int structbracelev;
2777   static enum sym_type toktype;
2778
2779
2780   toktype = C_symtype (str, len, *c_extp);
2781
2782   /*
2783    * Skip __attribute__
2784    */
2785   if (toktype == st_C_attribute)
2786     {
2787       inattribute = TRUE;
2788       return FALSE;
2789      }
2790
2791    /*
2792     * Advance the definedef state machine.
2793     */
2794    switch (definedef)
2795      {
2796      case dnone:
2797        /* We're not on a preprocessor line. */
2798        if (toktype == st_C_gnumacro)
2799          {
2800            fvdef = fdefunkey;
2801            return FALSE;
2802          }
2803        break;
2804      case dsharpseen:
2805        if (toktype == st_C_define)
2806          {
2807            definedef = ddefineseen;
2808          }
2809        else
2810          {
2811            definedef = dignorerest;
2812          }
2813        return FALSE;
2814      case ddefineseen:
2815        /*
2816         * Make a tag for any macro, unless it is a constant
2817         * and constantypedefs is FALSE.
2818         */
2819        definedef = dignorerest;
2820        *is_func_or_var = (c == '(');
2821        if (!*is_func_or_var && !constantypedefs)
2822          return FALSE;
2823        else
2824          return TRUE;
2825      case dignorerest:
2826        return FALSE;
2827      default:
2828        error ("internal error: definedef value.", (char *)NULL);
2829      }
2830
2831    /*
2832     * Now typedefs
2833     */
2834    switch (typdef)
2835      {
2836      case tnone:
2837        if (toktype == st_C_typedef)
2838          {
2839            if (typedefs)
2840              typdef = tkeyseen;
2841            fvextern = FALSE;
2842            fvdef = fvnone;
2843            return FALSE;
2844          }
2845        break;
2846      case tkeyseen:
2847        switch (toktype)
2848          {
2849          case st_none:
2850          case st_C_class:
2851          case st_C_struct:
2852          case st_C_enum:
2853            typdef = ttypeseen;
2854          }
2855        break;
2856      case ttypeseen:
2857        if (structdef == snone && fvdef == fvnone)
2858          {
2859            fvdef = fvnameseen;
2860            return TRUE;
2861          }
2862        break;
2863      case tend:
2864        switch (toktype)
2865          {
2866          case st_C_class:
2867          case st_C_struct:
2868          case st_C_enum:
2869            return FALSE;
2870          }
2871        return TRUE;
2872      }
2873
2874    switch (toktype)
2875      {
2876      case st_C_javastruct:
2877        if (structdef == stagseen)
2878          structdef = scolonseen;
2879        return FALSE;
2880      case st_C_template:
2881      case st_C_class:
2882        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2883            && bracelev == 0
2884            && definedef == dnone && structdef == snone
2885            && typdef == tnone && fvdef == fvnone)
2886          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2887        if (toktype == st_C_template)
2888          break;
2889        /* FALLTHRU */
2890      case st_C_struct:
2891      case st_C_enum:
2892        if (parlev == 0
2893            && fvdef != vignore
2894            && (typdef == tkeyseen
2895                || (typedefs_or_cplusplus && structdef == snone)))
2896          {
2897            structdef = skeyseen;
2898            structtype = toktype;
2899            structbracelev = bracelev;
2900            if (fvdef == fvnameseen)
2901              fvdef = fvnone;
2902          }
2903        return FALSE;
2904      }
2905
2906    if (structdef == skeyseen)
2907      {
2908        structdef = stagseen;
2909        return TRUE;
2910      }
2911
2912    if (typdef != tnone)
2913      definedef = dnone;
2914
2915    /* Detect Objective C constructs. */
2916    switch (objdef)
2917      {
2918      case onone:
2919        switch (toktype)
2920          {
2921          case st_C_objprot:
2922            objdef = oprotocol;
2923            return FALSE;
2924          case st_C_objimpl:
2925            objdef = oimplementation;
2926            return FALSE;
2927          }
2928        break;
2929      case oimplementation:
2930        /* Save the class tag for functions or variables defined inside. */
2931        objtag = savenstr (str, len);
2932        objdef = oinbody;
2933        return FALSE;
2934      case oprotocol:
2935        /* Save the class tag for categories. */
2936        objtag = savenstr (str, len);
2937        objdef = otagseen;
2938        *is_func_or_var = TRUE;
2939        return TRUE;
2940      case oparenseen:
2941        objdef = ocatseen;
2942        *is_func_or_var = TRUE;
2943        return TRUE;
2944      case oinbody:
2945        break;
2946      case omethodsign:
2947        if (parlev == 0)
2948          {
2949            fvdef = fvnone;
2950            objdef = omethodtag;
2951            linebuffer_setlen (&token_name, len);
2952            strncpy (token_name.buffer, str, len);
2953            token_name.buffer[len] = '\0';
2954            return TRUE;
2955          }
2956        return FALSE;
2957      case omethodcolon:
2958        if (parlev == 0)
2959          objdef = omethodparm;
2960        return FALSE;
2961      case omethodparm:
2962        if (parlev == 0)
2963          {
2964            fvdef = fvnone;
2965            objdef = omethodtag;
2966            linebuffer_setlen (&token_name, token_name.len + len);
2967            strncat (token_name.buffer, str, len);
2968            return TRUE;
2969          }
2970        return FALSE;
2971      case oignore:
2972        if (toktype == st_C_objend)
2973          {
2974            /* Memory leakage here: the string pointed by objtag is
2975               never released, because many tests would be needed to
2976               avoid breaking on incorrect input code.  The amount of
2977               memory leaked here is the sum of the lengths of the
2978               class tags.
2979            free (objtag); */
2980            objdef = onone;
2981          }
2982        return FALSE;
2983      }
2984
2985    /* A function, variable or enum constant? */
2986    switch (toktype)
2987      {
2988      case st_C_extern:
2989        fvextern = TRUE;
2990        switch  (fvdef)
2991          {
2992          case finlist:
2993          case flistseen:
2994          case fignore:
2995          case vignore:
2996            break;
2997          default:
2998            fvdef = fvnone;
2999          }
3000        return FALSE;
3001      case st_C_ignore:
3002        fvextern = FALSE;
3003        fvdef = vignore;
3004        return FALSE;
3005      case st_C_operator:
3006        fvdef = foperator;
3007        *is_func_or_var = TRUE;
3008        return TRUE;
3009      case st_none:
3010        if (constantypedefs
3011            && structdef == snone
3012            && structtype == st_C_enum && bracelev > structbracelev)
3013          return TRUE;           /* enum constant */
3014        switch (fvdef)
3015          {
3016          case fdefunkey:
3017            if (bracelev > 0)
3018              break;
3019            fvdef = fdefunname;  /* GNU macro */
3020            *is_func_or_var = TRUE;
3021            return TRUE;
3022          case fvnone:
3023            switch (typdef)
3024              {
3025              case ttypeseen:
3026                return FALSE;
3027              case tnone:
3028                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3029                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3030                  {
3031                    fvdef = vignore;
3032                    return FALSE;
3033                  }
3034                break;
3035              }
3036           /* FALLTHRU */
3037           case fvnameseen:
3038           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3039             {
3040               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3041                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3042               fvdef = foperator;
3043               *is_func_or_var = TRUE;
3044               return TRUE;
3045             }
3046           if (bracelev > 0 && !instruct)
3047             break;
3048           fvdef = fvnameseen;   /* function or variable */
3049           *is_func_or_var = TRUE;
3050           return TRUE;
3051         }
3052       break;
3053     }
3054
3055   return FALSE;
3056 }
3057
3058 \f
3059 /*
3060  * C_entries often keeps pointers to tokens or lines which are older than
3061  * the line currently read.  By keeping two line buffers, and switching
3062  * them at end of line, it is possible to use those pointers.
3063  */
3064 static struct
3065 {
3066   long linepos;
3067   linebuffer lb;
3068 } lbs[2];
3069
3070 #define current_lb_is_new (newndx == curndx)
3071 #define switch_line_buffers() (curndx = 1 - curndx)
3072
3073 #define curlb (lbs[curndx].lb)
3074 #define newlb (lbs[newndx].lb)
3075 #define curlinepos (lbs[curndx].linepos)
3076 #define newlinepos (lbs[newndx].linepos)
3077
3078 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3079 #define cplpl (c_ext & C_PLPL)
3080 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3081
3082 #define CNL_SAVE_DEFINEDEF()                                            \
3083 do {                                                                    \
3084   curlinepos = charno;                                                  \
3085   readline (&curlb, inf);                                               \
3086   lp = curlb.buffer;                                                    \
3087   quotednl = FALSE;                                                     \
3088   newndx = curndx;                                                      \
3089 } while (0)
3090
3091 #define CNL()                                                           \
3092 do {                                                                    \
3093   CNL_SAVE_DEFINEDEF();                                                 \
3094   if (savetoken.valid)                                                  \
3095     {                                                                   \
3096       token = savetoken;                                                \
3097       savetoken.valid = FALSE;                                          \
3098     }                                                                   \
3099   definedef = dnone;                                                    \
3100 } while (0)
3101
3102
3103 static void
3104 make_C_tag (isfun)
3105      bool isfun;
3106 {
3107   /* This function is never called when token.valid is FALSE, but
3108      we must protect against invalid input or internal errors. */
3109   if (token.valid)
3110     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3111               token.offset+token.length+1, token.lineno, token.linepos);
3112   else if (DEBUG)
3113     {                             /* this branch is optimised away if !DEBUG */
3114       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3115                 token_name.len + 17, isfun, token.line,
3116                 token.offset+token.length+1, token.lineno, token.linepos);
3117       error ("INVALID TOKEN", NULL);
3118     }
3119
3120   token.valid = FALSE;
3121 }
3122
3123
3124 /*
3125  * C_entries ()
3126  *      This routine finds functions, variables, typedefs,
3127  *      #define's, enum constants and struct/union/enum definitions in
3128  *      C syntax and adds them to the list.
3129  */
3130 static void
3131 C_entries (c_ext, inf)
3132      int c_ext;                 /* extension of C */
3133      FILE *inf;                 /* input file */
3134 {
3135   register char c;              /* latest char read; '\0' for end of line */
3136   register char *lp;            /* pointer one beyond the character `c' */
3137   int curndx, newndx;           /* indices for current and new lb */
3138   register int tokoff;          /* offset in line of start of current token */
3139   register int toklen;          /* length of current token */
3140   char *qualifier;              /* string used to qualify names */
3141   int qlen;                     /* length of qualifier */
3142   int bracelev;                 /* current brace level */
3143   int bracketlev;               /* current bracket level */
3144   int parlev;                   /* current parenthesis level */
3145   int attrparlev;               /* __attribute__ parenthesis level */
3146   int templatelev;              /* current template level */
3147   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3148   bool incomm, inquote, inchar, quotednl, midtoken;
3149   bool yacc_rules;              /* in the rules part of a yacc file */
3150   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3151
3152
3153   linebuffer_init (&lbs[0].lb);
3154   linebuffer_init (&lbs[1].lb);
3155   if (cstack.size == 0)
3156     {
3157       cstack.size = (DEBUG) ? 1 : 4;
3158       cstack.nl = 0;
3159       cstack.cname = xnew (cstack.size, char *);
3160       cstack.bracelev = xnew (cstack.size, int);
3161     }
3162
3163   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3164   curndx = newndx = 0;
3165   lp = curlb.buffer;
3166   *lp = 0;
3167
3168   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3169   structdef = snone; definedef = dnone; objdef = onone;
3170   yacc_rules = FALSE;
3171   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3172   token.valid = savetoken.valid = FALSE;
3173   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3174   if (cjava)
3175     { qualifier = "."; qlen = 1; }
3176   else
3177     { qualifier = "::"; qlen = 2; }
3178
3179
3180   while (!feof (inf))
3181     {
3182       c = *lp++;
3183       if (c == '\\')
3184         {
3185           /* If we are at the end of the line, the next character is a
3186              '\0'; do not skip it, because it is what tells us
3187              to read the next line.  */
3188           if (*lp == '\0')
3189             {
3190               quotednl = TRUE;
3191               continue;
3192             }
3193           lp++;
3194           c = ' ';
3195         }
3196       else if (incomm)
3197         {
3198           switch (c)
3199             {
3200             case '*':
3201               if (*lp == '/')
3202                 {
3203                   c = *lp++;
3204                   incomm = FALSE;
3205                 }
3206               break;
3207             case '\0':
3208               /* Newlines inside comments do not end macro definitions in
3209                  traditional cpp. */
3210               CNL_SAVE_DEFINEDEF ();
3211               break;
3212             }
3213           continue;
3214         }
3215       else if (inquote)
3216         {
3217           switch (c)
3218             {
3219             case '"':
3220               inquote = FALSE;
3221               break;
3222             case '\0':
3223               /* Newlines inside strings do not end macro definitions
3224                  in traditional cpp, even though compilers don't
3225                  usually accept them. */
3226               CNL_SAVE_DEFINEDEF ();
3227               break;
3228             }
3229           continue;
3230         }
3231       else if (inchar)
3232         {
3233           switch (c)
3234             {
3235             case '\0':
3236               /* Hmmm, something went wrong. */
3237               CNL ();
3238               /* FALLTHRU */
3239             case '\'':
3240               inchar = FALSE;
3241               break;
3242             }
3243           continue;
3244         }
3245       else if (bracketlev > 0)
3246         {
3247           switch (c)
3248             {
3249             case ']':
3250               if (--bracketlev > 0)
3251                 continue;
3252               break;
3253             case '\0':
3254               CNL_SAVE_DEFINEDEF ();
3255               break;
3256             }
3257           continue;
3258         }
3259       else switch (c)
3260         {
3261         case '"':
3262           inquote = TRUE;
3263           if (inattribute)
3264             break;
3265           switch (fvdef)
3266             {
3267             case fdefunkey:
3268             case fstartlist:
3269             case finlist:
3270             case fignore:
3271             case vignore:
3272               break;
3273             default:
3274               fvextern = FALSE;
3275               fvdef = fvnone;
3276             }
3277           continue;
3278         case '\'':
3279           inchar = TRUE;
3280           if (inattribute)
3281             break;
3282           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3283             {
3284               fvextern = FALSE;
3285               fvdef = fvnone;
3286             }
3287           continue;
3288         case '/':
3289           if (*lp == '*')
3290             {
3291               incomm = TRUE;
3292               lp++;
3293               c = ' ';
3294             }
3295           else if (/* cplpl && */ *lp == '/')
3296             {
3297               c = '\0';
3298             }
3299           break;
3300         case '%':
3301           if ((c_ext & YACC) && *lp == '%')
3302             {
3303               /* Entering or exiting rules section in yacc file. */
3304               lp++;
3305               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3306               typdef = tnone; structdef = snone;
3307               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3308               bracelev = 0;
3309               yacc_rules = !yacc_rules;
3310               continue;
3311             }
3312           else
3313             break;
3314         case '#':
3315           if (definedef == dnone)
3316             {
3317               char *cp;
3318               bool cpptoken = TRUE;
3319
3320               /* Look back on this line.  If all blanks, or nonblanks
3321                  followed by an end of comment, this is a preprocessor
3322                  token. */
3323               for (cp = newlb.buffer; cp < lp-1; cp++)
3324                 if (!iswhite (*cp))
3325                   {
3326                     if (*cp == '*' && *(cp+1) == '/')
3327                       {
3328                         cp++;
3329                         cpptoken = TRUE;
3330                       }
3331                     else
3332                       cpptoken = FALSE;
3333                   }
3334               if (cpptoken)
3335                 definedef = dsharpseen;
3336             } /* if (definedef == dnone) */
3337           continue;
3338         case '[':
3339           bracketlev++;
3340             continue;
3341         } /* switch (c) */
3342
3343
3344       /* Consider token only if some involved conditions are satisfied. */
3345       if (typdef != tignore
3346           && definedef != dignorerest
3347           && fvdef != finlist
3348           && templatelev == 0
3349           && (definedef != dnone
3350               || structdef != scolonseen)
3351           && !inattribute)
3352         {
3353           if (midtoken)
3354             {
3355               if (endtoken (c))
3356                 {
3357                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3358                     /* This handles :: in the middle,
3359                        but not at the beginning of an identifier.
3360                        Also, space-separated :: is not recognised. */
3361                     {
3362                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3363                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3364                       lp += 2;
3365                       toklen += 2;
3366                       c = lp[-1];
3367                       goto still_in_token;
3368                     }
3369                   else
3370                     {
3371                       bool funorvar = FALSE;
3372
3373                       if (yacc_rules
3374                           || consider_token (newlb.buffer + tokoff, toklen, c,
3375                                              &c_ext, bracelev, parlev,
3376                                              &funorvar))
3377                         {
3378                           if (fvdef == foperator)
3379                             {
3380                               char *oldlp = lp;
3381                               lp = skip_spaces (lp-1);
3382                               if (*lp != '\0')
3383                                 lp += 1;
3384                               while (*lp != '\0'
3385                                      && !iswhite (*lp) && *lp != '(')
3386                                 lp += 1;
3387                               c = *lp++;
3388                               toklen += lp - oldlp;
3389                             }
3390                           token.named = FALSE;
3391                           if (!plainc
3392                               && nestlev > 0 && definedef == dnone)
3393                             /* in struct body */
3394                             {
3395                               write_classname (&token_name, qualifier);
3396                               linebuffer_setlen (&token_name,
3397                                                  token_name.len+qlen+toklen);
3398                               strcat (token_name.buffer, qualifier);
3399                               strncat (token_name.buffer,
3400                                        newlb.buffer + tokoff, toklen);
3401                               token.named = TRUE;
3402                             }
3403                           else if (objdef == ocatseen)
3404                             /* Objective C category */
3405                             {
3406                               int len = strlen (objtag) + 2 + toklen;
3407                               linebuffer_setlen (&token_name, len);
3408                               strcpy (token_name.buffer, objtag);
3409                               strcat (token_name.buffer, "(");
3410                               strncat (token_name.buffer,
3411                                        newlb.buffer + tokoff, toklen);
3412                               strcat (token_name.buffer, ")");
3413                               token.named = TRUE;
3414                             }
3415                           else if (objdef == omethodtag
3416                                    || objdef == omethodparm)
3417                             /* Objective C method */
3418                             {
3419                               token.named = TRUE;
3420                             }
3421                           else if (fvdef == fdefunname)
3422                             /* GNU DEFUN and similar macros */
3423                             {
3424                               bool defun = (newlb.buffer[tokoff] == 'F');
3425                               int off = tokoff;
3426                               int len = toklen;
3427
3428                               /* Rewrite the tag so that emacs lisp DEFUNs
3429                                  can be found by their elisp name */
3430                               if (defun)
3431                                 {
3432                                   off += 1;
3433                                   len -= 1;
3434                                 }
3435                               linebuffer_setlen (&token_name, len);
3436                               strncpy (token_name.buffer,
3437                                        newlb.buffer + off, len);
3438                               token_name.buffer[len] = '\0';
3439                               if (defun)
3440                                 while (--len >= 0)
3441                                   if (token_name.buffer[len] == '_')
3442                                     token_name.buffer[len] = '-';
3443                               token.named = defun;
3444                             }
3445                           else
3446                             {
3447                               linebuffer_setlen (&token_name, toklen);
3448                               strncpy (token_name.buffer,
3449                                        newlb.buffer + tokoff, toklen);
3450                               token_name.buffer[toklen] = '\0';
3451                               /* Name macros and members. */
3452                               token.named = (structdef == stagseen
3453                                              || typdef == ttypeseen
3454                                              || typdef == tend
3455                                              || (funorvar
3456                                                  && definedef == dignorerest)
3457                                              || (funorvar
3458                                                  && definedef == dnone
3459                                                  && structdef == snone
3460                                                  && bracelev > 0));
3461                             }
3462                           token.lineno = lineno;
3463                           token.offset = tokoff;
3464                           token.length = toklen;
3465                           token.line = newlb.buffer;
3466                           token.linepos = newlinepos;
3467                           token.valid = TRUE;
3468
3469                           if (definedef == dnone
3470                               && (fvdef == fvnameseen
3471                                   || fvdef == foperator
3472                                   || structdef == stagseen
3473                                   || typdef == tend
3474                                   || typdef == ttypeseen
3475                                   || objdef != onone))
3476                             {
3477                               if (current_lb_is_new)
3478                                 switch_line_buffers ();
3479                             }
3480                           else if (definedef != dnone
3481                                    || fvdef == fdefunname
3482                                    || instruct)
3483                             make_C_tag (funorvar);
3484                         }
3485                       else /* not yacc and consider_token failed */
3486                         {
3487                           if (inattribute && fvdef == fignore)
3488                             {
3489                               /* We have just met __attribute__ after a
3490                                  function parameter list: do not tag the
3491                                  function again. */
3492                               fvdef = fvnone;
3493                             }
3494                         }
3495                       midtoken = FALSE;
3496                     }
3497                 } /* if (endtoken (c)) */
3498               else if (intoken (c))
3499                 still_in_token:
3500                 {
3501                   toklen++;
3502                   continue;
3503                 }
3504             } /* if (midtoken) */
3505           else if (begtoken (c))
3506             {
3507               switch (definedef)
3508                 {
3509                 case dnone:
3510                   switch (fvdef)
3511                     {
3512                     case fstartlist:
3513                       /* This prevents tagging fb in
3514                          void (__attribute__((noreturn)) *fb) (void);
3515                          Fixing this is not easy and not very important. */
3516                       fvdef = finlist;
3517                       continue;
3518                     case flistseen:
3519                       if (plainc || declarations)
3520                         {
3521                           make_C_tag (TRUE); /* a function */
3522                           fvdef = fignore;
3523                         }
3524                       break;
3525                     }
3526                   if (structdef == stagseen && !cjava)
3527                     {
3528                       popclass_above (bracelev);
3529                       structdef = snone;
3530                     }
3531                   break;
3532                 case dsharpseen:
3533                   savetoken = token;
3534                   break;
3535                 }
3536               if (!yacc_rules || lp == newlb.buffer + 1)
3537                 {
3538                   tokoff = lp - 1 - newlb.buffer;
3539                   toklen = 1;
3540                   midtoken = TRUE;
3541                 }
3542               continue;
3543             } /* if (begtoken) */
3544         } /* if must look at token */
3545
3546
3547       /* Detect end of line, colon, comma, semicolon and various braces
3548          after having handled a token.*/
3549       switch (c)
3550         {
3551         case ':':
3552           if (inattribute)
3553             break;
3554           if (yacc_rules && token.offset == 0 && token.valid)
3555             {
3556               make_C_tag (FALSE); /* a yacc function */
3557               break;
3558             }
3559           if (definedef != dnone)
3560             break;
3561           switch (objdef)
3562             {
3563             case  otagseen:
3564               objdef = oignore;
3565               make_C_tag (TRUE); /* an Objective C class */
3566               break;
3567             case omethodtag:
3568             case omethodparm:
3569               objdef = omethodcolon;
3570               linebuffer_setlen (&token_name, token_name.len + 1);
3571               strcat (token_name.buffer, ":");
3572               break;
3573             }
3574           if (structdef == stagseen)
3575             {
3576               structdef = scolonseen;
3577               break;
3578             }
3579           /* Should be useless, but may be work as a safety net. */
3580           if (cplpl && fvdef == flistseen)
3581             {
3582               make_C_tag (TRUE); /* a function */
3583               fvdef = fignore;
3584               break;
3585             }
3586           break;
3587         case ';':
3588           if (definedef != dnone || inattribute)
3589             break;
3590           switch (typdef)
3591             {
3592             case tend:
3593             case ttypeseen:
3594               make_C_tag (FALSE); /* a typedef */
3595               typdef = tnone;
3596               fvdef = fvnone;
3597               break;
3598             case tnone:
3599             case tinbody:
3600             case tignore:
3601               switch (fvdef)
3602                 {
3603                 case fignore:
3604                   if (typdef == tignore || cplpl)
3605                     fvdef = fvnone;
3606                   break;
3607                 case fvnameseen:
3608                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3609                       || (members && instruct))
3610                     make_C_tag (FALSE); /* a variable */
3611                   fvextern = FALSE;
3612                   fvdef = fvnone;
3613                   token.valid = FALSE;
3614                   break;
3615                 case flistseen:
3616                   if ((declarations
3617                        && (cplpl || !instruct)
3618                        && (typdef == tnone || (typdef != tignore && instruct)))
3619                       || (members
3620                           && plainc && instruct))
3621                     make_C_tag (TRUE);  /* a function */
3622                   /* FALLTHRU */
3623                 default:
3624                   fvextern = FALSE;
3625                   fvdef = fvnone;
3626                   if (declarations
3627                        && cplpl && structdef == stagseen)
3628                     make_C_tag (FALSE); /* forward declaration */
3629                   else
3630                     token.valid = FALSE;
3631                 } /* switch (fvdef) */
3632               /* FALLTHRU */
3633             default:
3634               if (!instruct)
3635                 typdef = tnone;
3636             }
3637           if (structdef == stagseen)
3638             structdef = snone;
3639           break;
3640         case ',':
3641           if (definedef != dnone || inattribute)
3642             break;
3643           switch (objdef)
3644             {
3645             case omethodtag:
3646             case omethodparm:
3647               make_C_tag (TRUE); /* an Objective C method */
3648               objdef = oinbody;
3649               break;
3650             }
3651           switch (fvdef)
3652             {
3653             case fdefunkey:
3654             case foperator:
3655             case fstartlist:
3656             case finlist:
3657             case fignore:
3658             case vignore:
3659               break;
3660             case fdefunname:
3661               fvdef = fignore;
3662               break;
3663             case fvnameseen:
3664               if (parlev == 0
3665                   && ((globals
3666                        && bracelev == 0
3667                        && templatelev == 0
3668                        && (!fvextern || declarations))
3669                       || (members && instruct)))
3670                   make_C_tag (FALSE); /* a variable */
3671               break;
3672             case flistseen:
3673               if ((declarations && typdef == tnone && !instruct)
3674                   || (members && typdef != tignore && instruct))
3675                 {
3676                   make_C_tag (TRUE); /* a function */
3677                   fvdef = fvnameseen;
3678                 }
3679               else if (!declarations)
3680                 fvdef = fvnone;
3681               token.valid = FALSE;
3682               break;
3683             default:
3684               fvdef = fvnone;
3685             }
3686           if (structdef == stagseen)
3687             structdef = snone;
3688           break;
3689         case ']':
3690           if (definedef != dnone || inattribute)
3691             break;
3692           if (structdef == stagseen)
3693             structdef = snone;
3694           switch (typdef)
3695             {
3696             case ttypeseen:
3697             case tend:
3698               typdef = tignore;
3699               make_C_tag (FALSE);       /* a typedef */
3700               break;
3701             case tnone:
3702             case tinbody:
3703               switch (fvdef)
3704                 {
3705                 case foperator:
3706                 case finlist:
3707                 case fignore:
3708                 case vignore:
3709                   break;
3710                 case fvnameseen:
3711                   if ((members && bracelev == 1)
3712                       || (globals && bracelev == 0
3713                           && (!fvextern || declarations)))
3714                     make_C_tag (FALSE); /* a variable */
3715                   /* FALLTHRU */
3716                 default:
3717                   fvdef = fvnone;
3718                 }
3719               break;
3720             }
3721           break;
3722         case '(':
3723           if (inattribute)
3724             {
3725               attrparlev++;
3726               break;
3727             }
3728           if (definedef != dnone)
3729             break;
3730           if (objdef == otagseen && parlev == 0)
3731             objdef = oparenseen;
3732           switch (fvdef)
3733             {
3734             case fvnameseen:
3735               if (typdef == ttypeseen
3736                   && *lp != '*'
3737                   && !instruct)
3738                 {
3739                   /* This handles constructs like:
3740                      typedef void OperatorFun (int fun); */
3741                   make_C_tag (FALSE);
3742                   typdef = tignore;
3743                   fvdef = fignore;
3744                   break;
3745                 }
3746               /* FALLTHRU */
3747             case foperator:
3748               fvdef = fstartlist;
3749               break;
3750             case flistseen:
3751               fvdef = finlist;
3752               break;
3753             }
3754           parlev++;
3755           break;
3756         case ')':
3757           if (inattribute)
3758             {
3759               if (--attrparlev == 0)
3760                 inattribute = FALSE;
3761               break;
3762             }
3763           if (definedef != dnone)
3764             break;
3765           if (objdef == ocatseen && parlev == 1)
3766             {
3767               make_C_tag (TRUE); /* an Objective C category */
3768               objdef = oignore;
3769             }
3770           if (--parlev == 0)
3771             {
3772               switch (fvdef)
3773                 {
3774                 case fstartlist:
3775                 case finlist:
3776                   fvdef = flistseen;
3777                   break;
3778                 }
3779               if (!instruct
3780                   && (typdef == tend
3781                       || typdef == ttypeseen))
3782                 {
3783                   typdef = tignore;
3784                   make_C_tag (FALSE); /* a typedef */
3785                 }
3786             }
3787           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3788             parlev = 0;
3789           break;
3790         case '{':
3791           if (definedef != dnone)
3792             break;
3793           if (typdef == ttypeseen)
3794             {
3795               /* Whenever typdef is set to tinbody (currently only
3796                  here), typdefbracelev should be set to bracelev. */
3797               typdef = tinbody;
3798               typdefbracelev = bracelev;
3799             }
3800           switch (fvdef)
3801             {
3802             case flistseen:
3803               make_C_tag (TRUE);    /* a function */
3804               /* FALLTHRU */
3805             case fignore:
3806               fvdef = fvnone;
3807               break;
3808             case fvnone:
3809               switch (objdef)
3810                 {
3811                 case otagseen:
3812                   make_C_tag (TRUE); /* an Objective C class */
3813                   objdef = oignore;
3814                   break;
3815                 case omethodtag:
3816                 case omethodparm:
3817                   make_C_tag (TRUE); /* an Objective C method */
3818                   objdef = oinbody;
3819                   break;
3820                 default:
3821                   /* Neutralize `extern "C" {' grot. */
3822                   if (bracelev == 0 && structdef == snone && nestlev == 0
3823                       && typdef == tnone)
3824                     bracelev = -1;
3825                 }
3826               break;
3827             }
3828           switch (structdef)
3829             {
3830             case skeyseen:         /* unnamed struct */
3831               pushclass_above (bracelev, NULL, 0);
3832               structdef = snone;
3833               break;
3834             case stagseen:         /* named struct or enum */
3835             case scolonseen:       /* a class */
3836               pushclass_above (bracelev,token.line+token.offset, token.length);
3837               structdef = snone;
3838               make_C_tag (FALSE);  /* a struct or enum */
3839               break;
3840             }
3841           bracelev += 1;
3842           break;
3843         case '*':
3844           if (definedef != dnone)
3845             break;
3846           if (fvdef == fstartlist)
3847             {
3848               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3849               token.valid = FALSE;
3850             }
3851           break;
3852         case '}':
3853           if (definedef != dnone)
3854             break;
3855           bracelev -= 1;
3856           if (!ignoreindent && lp == newlb.buffer + 1)
3857             {
3858               if (bracelev != 0)
3859                 token.valid = FALSE; /* unexpected value, token unreliable */
3860               bracelev = 0;     /* reset brace level if first column */
3861               parlev = 0;       /* also reset paren level, just in case... */
3862             }
3863           else if (bracelev < 0)
3864             {
3865               token.valid = FALSE; /* something gone amiss, token unreliable */
3866               bracelev = 0;
3867             }
3868           if (bracelev == 0 && fvdef == vignore)
3869             fvdef = fvnone;             /* end of function */
3870           popclass_above (bracelev);
3871           structdef = snone;
3872           /* Only if typdef == tinbody is typdefbracelev significant. */
3873           if (typdef == tinbody && bracelev <= typdefbracelev)
3874             {
3875               assert (bracelev == typdefbracelev);
3876               typdef = tend;
3877             }
3878           break;
3879         case '=':
3880           if (definedef != dnone)
3881             break;
3882           switch (fvdef)
3883             {
3884             case foperator:
3885             case finlist:
3886             case fignore:
3887             case vignore:
3888               break;
3889             case fvnameseen:
3890               if ((members && bracelev == 1)
3891                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3892                 make_C_tag (FALSE); /* a variable */
3893               /* FALLTHRU */
3894             default:
3895               fvdef = vignore;
3896             }
3897           break;
3898         case '<':
3899           if (cplpl
3900               && (structdef == stagseen || fvdef == fvnameseen))
3901             {
3902               templatelev++;
3903               break;
3904             }
3905           goto resetfvdef;
3906         case '>':
3907           if (templatelev > 0)
3908             {
3909               templatelev--;
3910               break;
3911             }
3912           goto resetfvdef;
3913         case '+':
3914         case '-':
3915           if (objdef == oinbody && bracelev == 0)
3916             {
3917               objdef = omethodsign;
3918               break;
3919             }
3920           /* FALLTHRU */
3921         resetfvdef:
3922         case '#': case '~': case '&': case '%': case '/':
3923         case '|': case '^': case '!': case '.': case '?':
3924           if (definedef != dnone)
3925             break;
3926           /* These surely cannot follow a function tag in C. */
3927           switch (fvdef)
3928             {
3929             case foperator:
3930             case finlist:
3931             case fignore:
3932             case vignore:
3933               break;
3934             default:
3935               fvdef = fvnone;
3936             }
3937           break;
3938         case '\0':
3939           if (objdef == otagseen)
3940             {
3941               make_C_tag (TRUE); /* an Objective C class */
3942               objdef = oignore;
3943             }
3944           /* If a macro spans multiple lines don't reset its state. */
3945           if (quotednl)
3946             CNL_SAVE_DEFINEDEF ();
3947           else
3948             CNL ();
3949           break;
3950         } /* switch (c) */
3951
3952     } /* while not eof */
3953
3954   free (lbs[0].lb.buffer);
3955   free (lbs[1].lb.buffer);
3956 }
3957
3958 /*
3959  * Process either a C++ file or a C file depending on the setting
3960  * of a global flag.
3961  */
3962 static void
3963 default_C_entries (inf)
3964      FILE *inf;
3965 {
3966   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3967 }
3968
3969 /* Always do plain C. */
3970 static void
3971 plain_C_entries (inf)
3972      FILE *inf;
3973 {
3974   C_entries (0, inf);
3975 }
3976
3977 /* Always do C++. */
3978 static void
3979 Cplusplus_entries (inf)
3980      FILE *inf;
3981 {
3982   C_entries (C_PLPL, inf);
3983 }
3984
3985 /* Always do Java. */
3986 static void
3987 Cjava_entries (inf)
3988      FILE *inf;
3989 {
3990   C_entries (C_JAVA, inf);
3991 }
3992
3993 /* Always do C*. */
3994 static void
3995 Cstar_entries (inf)
3996      FILE *inf;
3997 {
3998   C_entries (C_STAR, inf);
3999 }
4000
4001 /* Always do Yacc. */
4002 static void
4003 Yacc_entries (inf)
4004      FILE *inf;
4005 {
4006   C_entries (YACC, inf);
4007 }
4008
4009 \f
4010 /* Useful macros. */
4011 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4012   for (;                        /* loop initialization */               \
4013        !feof (file_pointer)     /* loop test */                         \
4014        &&                       /* instructions at start of loop */     \
4015           (readline (&line_buffer, file_pointer),                       \
4016            char_pointer = line_buffer.buffer,                           \
4017            TRUE);                                                       \
4018       )
4019
4020 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4021   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4022    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4023    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4024    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4025
4026 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4027 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4028   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4029    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4030    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4031
4032 /*
4033  * Read a file, but do no processing.  This is used to do regexp
4034  * matching on files that have no language defined.
4035  */
4036 static void
4037 just_read_file (inf)
4038      FILE *inf;
4039 {
4040   register char *dummy;
4041
4042   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4043     continue;
4044 }
4045
4046 \f
4047 /* Fortran parsing */
4048
4049 static void F_takeprec __P((void));
4050 static void F_getit __P((FILE *));
4051
4052 static void
4053 F_takeprec ()
4054 {
4055   dbp = skip_spaces (dbp);
4056   if (*dbp != '*')
4057     return;
4058   dbp++;
4059   dbp = skip_spaces (dbp);
4060   if (strneq (dbp, "(*)", 3))
4061     {
4062       dbp += 3;
4063       return;
4064     }
4065   if (!ISDIGIT (*dbp))
4066     {
4067       --dbp;                    /* force failure */
4068       return;
4069     }
4070   do
4071     dbp++;
4072   while (ISDIGIT (*dbp));
4073 }
4074
4075 static void
4076 F_getit (inf)
4077      FILE *inf;
4078 {
4079   register char *cp;
4080
4081   dbp = skip_spaces (dbp);
4082   if (*dbp == '\0')
4083     {
4084       readline (&lb, inf);
4085       dbp = lb.buffer;
4086       if (dbp[5] != '&')
4087         return;
4088       dbp += 6;
4089       dbp = skip_spaces (dbp);
4090     }
4091   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4092     return;
4093   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4094     continue;
4095   make_tag (dbp, cp-dbp, TRUE,
4096             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4097 }
4098
4099
4100 static void
4101 Fortran_functions (inf)
4102      FILE *inf;
4103 {
4104   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4105     {
4106       if (*dbp == '%')
4107         dbp++;                  /* Ratfor escape to fortran */
4108       dbp = skip_spaces (dbp);
4109       if (*dbp == '\0')
4110         continue;
4111       switch (lowcase (*dbp))
4112         {
4113         case 'i':
4114           if (nocase_tail ("integer"))
4115             F_takeprec ();
4116           break;
4117         case 'r':
4118           if (nocase_tail ("real"))
4119             F_takeprec ();
4120           break;
4121         case 'l':
4122           if (nocase_tail ("logical"))
4123             F_takeprec ();
4124           break;
4125         case 'c':
4126           if (nocase_tail ("complex") || nocase_tail ("character"))
4127             F_takeprec ();
4128           break;
4129         case 'd':
4130           if (nocase_tail ("double"))
4131             {
4132               dbp = skip_spaces (dbp);
4133               if (*dbp == '\0')
4134                 continue;
4135               if (nocase_tail ("precision"))
4136                 break;
4137               continue;
4138             }
4139           break;
4140         }
4141       dbp = skip_spaces (dbp);
4142       if (*dbp == '\0')
4143         continue;
4144       switch (lowcase (*dbp))
4145         {
4146         case 'f':
4147           if (nocase_tail ("function"))
4148             F_getit (inf);
4149           continue;
4150         case 's':
4151           if (nocase_tail ("subroutine"))
4152             F_getit (inf);
4153           continue;
4154         case 'e':
4155           if (nocase_tail ("entry"))
4156             F_getit (inf);
4157           continue;
4158         case 'b':
4159           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4160             {
4161               dbp = skip_spaces (dbp);
4162               if (*dbp == '\0') /* assume un-named */
4163                 make_tag ("blockdata", 9, TRUE,
4164                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4165               else
4166                 F_getit (inf);  /* look for name */
4167             }
4168           continue;
4169         }
4170     }
4171 }
4172
4173 \f
4174 /*
4175  * Ada parsing
4176  * Original code by
4177  * Philippe Waroquiers (1998)
4178  */
4179
4180 static void Ada_getit __P((FILE *, char *));
4181
4182 /* Once we are positioned after an "interesting" keyword, let's get
4183    the real tag value necessary. */
4184 static void
4185 Ada_getit (inf, name_qualifier)
4186      FILE *inf;
4187      char *name_qualifier;
4188 {
4189   register char *cp;
4190   char *name;
4191   char c;
4192
4193   while (!feof (inf))
4194     {
4195       dbp = skip_spaces (dbp);
4196       if (*dbp == '\0'
4197           || (dbp[0] == '-' && dbp[1] == '-'))
4198         {
4199           readline (&lb, inf);
4200           dbp = lb.buffer;
4201         }
4202       switch (lowcase(*dbp))
4203         {
4204         case 'b':
4205           if (nocase_tail ("body"))
4206             {
4207               /* Skipping body of   procedure body   or   package body or ....
4208                  resetting qualifier to body instead of spec. */
4209               name_qualifier = "/b";
4210               continue;
4211             }
4212           break;
4213         case 't':
4214           /* Skipping type of   task type   or   protected type ... */
4215           if (nocase_tail ("type"))
4216             continue;
4217           break;
4218         }
4219       if (*dbp == '"')
4220         {
4221           dbp += 1;
4222           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4223             continue;
4224         }
4225       else
4226         {
4227           dbp = skip_spaces (dbp);
4228           for (cp = dbp;
4229                (*cp != '\0'
4230                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4231                cp++)
4232             continue;
4233           if (cp == dbp)
4234             return;
4235         }
4236       c = *cp;
4237       *cp = '\0';
4238       name = concat (dbp, name_qualifier, "");
4239       *cp = c;
4240       make_tag (name, strlen (name), TRUE,
4241                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4242       free (name);
4243       if (c == '"')
4244         dbp = cp + 1;
4245       return;
4246     }
4247 }
4248
4249 static void
4250 Ada_funcs (inf)
4251      FILE *inf;
4252 {
4253   bool inquote = FALSE;
4254   bool skip_till_semicolumn = FALSE;
4255
4256   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4257     {
4258       while (*dbp != '\0')
4259         {
4260           /* Skip a string i.e. "abcd". */
4261           if (inquote || (*dbp == '"'))
4262             {
4263               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4264               if (dbp != NULL)
4265                 {
4266                   inquote = FALSE;
4267                   dbp += 1;
4268                   continue;     /* advance char */
4269                 }
4270               else
4271                 {
4272                   inquote = TRUE;
4273                   break;        /* advance line */
4274                 }
4275             }
4276
4277           /* Skip comments. */
4278           if (dbp[0] == '-' && dbp[1] == '-')
4279             break;              /* advance line */
4280
4281           /* Skip character enclosed in single quote i.e. 'a'
4282              and skip single quote starting an attribute i.e. 'Image. */
4283           if (*dbp == '\'')
4284             {
4285               dbp++ ;
4286               if (*dbp != '\0')
4287                 dbp++;
4288               continue;
4289             }
4290
4291           if (skip_till_semicolumn)
4292             {
4293               if (*dbp == ';')
4294                 skip_till_semicolumn = FALSE;
4295               dbp++;
4296               continue;         /* advance char */
4297             }
4298
4299           /* Search for beginning of a token.  */
4300           if (!begtoken (*dbp))
4301             {
4302               dbp++;
4303               continue;         /* advance char */
4304             }
4305
4306           /* We are at the beginning of a token. */
4307           switch (lowcase(*dbp))
4308             {
4309             case 'f':
4310               if (!packages_only && nocase_tail ("function"))
4311                 Ada_getit (inf, "/f");
4312               else
4313                 break;          /* from switch */
4314               continue;         /* advance char */
4315             case 'p':
4316               if (!packages_only && nocase_tail ("procedure"))
4317                 Ada_getit (inf, "/p");
4318               else if (nocase_tail ("package"))
4319                 Ada_getit (inf, "/s");
4320               else if (nocase_tail ("protected")) /* protected type */
4321                 Ada_getit (inf, "/t");
4322               else
4323                 break;          /* from switch */
4324               continue;         /* advance char */
4325
4326             case 'u':
4327               if (typedefs && !packages_only && nocase_tail ("use"))
4328                 {
4329                   /* when tagging types, avoid tagging  use type Pack.Typename;
4330                      for this, we will skip everything till a ; */
4331                   skip_till_semicolumn = TRUE;
4332                   continue;     /* advance char */
4333                 }
4334
4335             case 't':
4336               if (!packages_only && nocase_tail ("task"))
4337                 Ada_getit (inf, "/k");
4338               else if (typedefs && !packages_only && nocase_tail ("type"))
4339                 {
4340                   Ada_getit (inf, "/t");
4341                   while (*dbp != '\0')
4342                     dbp += 1;
4343                 }
4344               else
4345                 break;          /* from switch */
4346               continue;         /* advance char */
4347             }
4348
4349           /* Look for the end of the token. */
4350           while (!endtoken (*dbp))
4351             dbp++;
4352
4353         } /* advance char */
4354     } /* advance line */
4355 }
4356
4357 \f
4358 /*
4359  * Unix and microcontroller assembly tag handling
4360  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4361  * Idea by Bob Weiner, Motorola Inc. (1994)
4362  */
4363 static void
4364 Asm_labels (inf)
4365      FILE *inf;
4366 {
4367   register char *cp;
4368
4369   LOOP_ON_INPUT_LINES (inf, lb, cp)
4370     {
4371       /* If first char is alphabetic or one of [_.$], test for colon
4372          following identifier. */
4373       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4374         {
4375           /* Read past label. */
4376           cp++;
4377           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4378             cp++;
4379           if (*cp == ':' || iswhite (*cp))
4380             /* Found end of label, so copy it and add it to the table. */
4381             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4382                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4383         }
4384     }
4385 }
4386
4387 \f
4388 /*
4389  * Perl support
4390  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4391  * Perl variable names: /^(my|local).../
4392  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4393  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4394  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4395  */
4396 static void
4397 Perl_functions (inf)
4398      FILE *inf;
4399 {
4400   char *package = savestr ("main"); /* current package name */
4401   register char *cp;
4402
4403   LOOP_ON_INPUT_LINES (inf, lb, cp)
4404     {
4405       cp = skip_spaces (cp);
4406
4407       if (LOOKING_AT (cp, "package"))
4408         {
4409           free (package);
4410           get_tag (cp, &package);
4411         }
4412       else if (LOOKING_AT (cp, "sub"))
4413         {
4414           char *pos;
4415           char *sp = cp;
4416
4417           while (!notinname (*cp))
4418             cp++;
4419           if (cp == sp)
4420             continue;           /* nothing found */
4421           if ((pos = etags_strchr (sp, ':')) != NULL
4422               && pos < cp && pos[1] == ':')
4423             /* The name is already qualified. */
4424             make_tag (sp, cp - sp, TRUE,
4425                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4426           else
4427             /* Qualify it. */
4428             {
4429               char savechar, *name;
4430
4431               savechar = *cp;
4432               *cp = '\0';
4433               name = concat (package, "::", sp);
4434               *cp = savechar;
4435               make_tag (name, strlen(name), TRUE,
4436                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4437               free (name);
4438             }
4439         }
4440        else if (globals)        /* only if we are tagging global vars */
4441         {
4442           /* Skip a qualifier, if any. */
4443           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4444           /* After "my" or "local", but before any following paren or space. */
4445           char *varstart = cp;
4446
4447           if (qual              /* should this be removed?  If yes, how? */
4448               && (*cp == '$' || *cp == '@' || *cp == '%'))
4449             {
4450               varstart += 1;
4451               do
4452                 cp++;
4453               while (ISALNUM (*cp) || *cp == '_');
4454             }
4455           else if (qual)
4456             {
4457               /* Should be examining a variable list at this point;
4458                  could insist on seeing an open parenthesis. */
4459               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4460                 cp++;
4461             }
4462           else
4463             continue;
4464
4465           make_tag (varstart, cp - varstart, FALSE,
4466                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4467         }
4468     }
4469   free (package);
4470 }
4471
4472
4473 /*
4474  * Python support
4475  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4476  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4477  * More ideas by seb bacon <seb@jamkit.com> (2002)
4478  */
4479 static void
4480 Python_functions (inf)
4481      FILE *inf;
4482 {
4483   register char *cp;
4484
4485   LOOP_ON_INPUT_LINES (inf, lb, cp)
4486     {
4487       cp = skip_spaces (cp);
4488       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4489         {
4490           char *name = cp;
4491           while (!notinname (*cp) && *cp != ':')
4492             cp++;
4493           make_tag (name, cp - name, TRUE,
4494                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4495         }
4496     }
4497 }
4498
4499 \f
4500 /*
4501  * PHP support
4502  * Look for:
4503  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4504  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4505  *  - /^[ \t]*define\(\"[^\"]+/
4506  * Only with --members:
4507  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4508  * Idea by Diez B. Roggisch (2001)
4509  */
4510 static void
4511 PHP_functions (inf)
4512      FILE *inf;
4513 {
4514   register char *cp, *name;
4515   bool search_identifier = FALSE;
4516
4517   LOOP_ON_INPUT_LINES (inf, lb, cp)
4518     {
4519       cp = skip_spaces (cp);
4520       name = cp;
4521       if (search_identifier
4522           && *cp != '\0')
4523         {
4524           while (!notinname (*cp))
4525             cp++;
4526           make_tag (name, cp - name, TRUE,
4527                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4528           search_identifier = FALSE;
4529         }
4530       else if (LOOKING_AT (cp, "function"))
4531         {
4532           if(*cp == '&')
4533             cp = skip_spaces (cp+1);
4534           if(*cp != '\0')
4535             {
4536               name = cp;
4537               while (!notinname (*cp))
4538                 cp++;
4539               make_tag (name, cp - name, TRUE,
4540                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4541             }
4542           else
4543             search_identifier = TRUE;
4544         }
4545       else if (LOOKING_AT (cp, "class"))
4546         {
4547           if (*cp != '\0')
4548             {
4549               name = cp;
4550               while (*cp != '\0' && !iswhite (*cp))
4551                 cp++;
4552               make_tag (name, cp - name, FALSE,
4553                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4554             }
4555           else
4556             search_identifier = TRUE;
4557         }
4558       else if (strneq (cp, "define", 6)
4559                && (cp = skip_spaces (cp+6))
4560                && *cp++ == '('
4561                && (*cp == '"' || *cp == '\''))
4562         {
4563           char quote = *cp++;
4564           name = cp;
4565           while (*cp != quote && *cp != '\0')
4566             cp++;
4567           make_tag (name, cp - name, FALSE,
4568                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4569         }
4570       else if (members
4571                && LOOKING_AT (cp, "var")
4572                && *cp == '$')
4573         {
4574           name = cp;
4575           while (!notinname(*cp))
4576             cp++;
4577           make_tag (name, cp - name, FALSE,
4578                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4579         }
4580     }
4581 }
4582
4583 \f
4584 /*
4585  * Cobol tag functions
4586  * We could look for anything that could be a paragraph name.
4587  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4588  * Idea by Corny de Souza (1993)
4589  */
4590 static void
4591 Cobol_paragraphs (inf)
4592      FILE *inf;
4593 {
4594   register char *bp, *ep;
4595
4596   LOOP_ON_INPUT_LINES (inf, lb, bp)
4597     {
4598       if (lb.len < 9)
4599         continue;
4600       bp += 8;
4601
4602       /* If eoln, compiler option or comment ignore whole line. */
4603       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4604         continue;
4605
4606       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4607         continue;
4608       if (*ep++ == '.')
4609         make_tag (bp, ep - bp, TRUE,
4610                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4611     }
4612 }
4613
4614 \f
4615 /*
4616  * Makefile support
4617  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4618  */
4619 static void
4620 Makefile_targets (inf)
4621      FILE *inf;
4622 {
4623   register char *bp;
4624
4625   LOOP_ON_INPUT_LINES (inf, lb, bp)
4626     {
4627       if (*bp == '\t' || *bp == '#')
4628         continue;
4629       while (*bp != '\0' && *bp != '=' && *bp != ':')
4630         bp++;
4631       if (*bp == ':' || (globals && *bp == '='))
4632         {
4633           /* We should detect if there is more than one tag, but we do not.
4634              We just skip initial and final spaces. */
4635           char * namestart = skip_spaces (lb.buffer);
4636           while (--bp > namestart)
4637             if (!notinname (*bp))
4638               break;
4639           make_tag (namestart, bp - namestart + 1, TRUE,
4640                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4641         }
4642     }
4643 }
4644
4645 \f
4646 /*
4647  * Pascal parsing
4648  * Original code by Mosur K. Mohan (1989)
4649  *
4650  *  Locates tags for procedures & functions.  Doesn't do any type- or
4651  *  var-definitions.  It does look for the keyword "extern" or
4652  *  "forward" immediately following the procedure statement; if found,
4653  *  the tag is skipped.
4654  */
4655 static void
4656 Pascal_functions (inf)
4657      FILE *inf;
4658 {
4659   linebuffer tline;             /* mostly copied from C_entries */
4660   long save_lcno;
4661   int save_lineno, namelen, taglen;
4662   char c, *name;
4663
4664   bool                          /* each of these flags is TRUE if: */
4665     incomment,                  /* point is inside a comment */
4666     inquote,                    /* point is inside '..' string */
4667     get_tagname,                /* point is after PROCEDURE/FUNCTION
4668                                    keyword, so next item = potential tag */
4669     found_tag,                  /* point is after a potential tag */
4670     inparms,                    /* point is within parameter-list */
4671     verify_tag;                 /* point has passed the parm-list, so the
4672                                    next token will determine whether this
4673                                    is a FORWARD/EXTERN to be ignored, or
4674                                    whether it is a real tag */
4675
4676   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4677   name = NULL;                  /* keep compiler quiet */
4678   dbp = lb.buffer;
4679   *dbp = '\0';
4680   linebuffer_init (&tline);
4681
4682   incomment = inquote = FALSE;
4683   found_tag = FALSE;            /* have a proc name; check if extern */
4684   get_tagname = FALSE;          /* found "procedure" keyword         */
4685   inparms = FALSE;              /* found '(' after "proc"            */
4686   verify_tag = FALSE;           /* check if "extern" is ahead        */
4687
4688
4689   while (!feof (inf))           /* long main loop to get next char */
4690     {
4691       c = *dbp++;
4692       if (c == '\0')            /* if end of line */
4693         {
4694           readline (&lb, inf);
4695           dbp = lb.buffer;
4696           if (*dbp == '\0')
4697             continue;
4698           if (!((found_tag && verify_tag)
4699                 || get_tagname))
4700             c = *dbp++;         /* only if don't need *dbp pointing
4701                                    to the beginning of the name of
4702                                    the procedure or function */
4703         }
4704       if (incomment)
4705         {
4706           if (c == '}')         /* within { } comments */
4707             incomment = FALSE;
4708           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4709             {
4710               dbp++;
4711               incomment = FALSE;
4712             }
4713           continue;
4714         }
4715       else if (inquote)
4716         {
4717           if (c == '\'')
4718             inquote = FALSE;
4719           continue;
4720         }
4721       else
4722         switch (c)
4723           {
4724           case '\'':
4725             inquote = TRUE;     /* found first quote */
4726             continue;
4727           case '{':             /* found open { comment */
4728             incomment = TRUE;
4729             continue;
4730           case '(':
4731             if (*dbp == '*')    /* found open (* comment */
4732               {
4733                 incomment = TRUE;
4734                 dbp++;
4735               }
4736             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4737               inparms = TRUE;
4738             continue;
4739           case ')':             /* end of parms list */
4740             if (inparms)
4741               inparms = FALSE;
4742             continue;
4743           case ';':
4744             if (found_tag && !inparms) /* end of proc or fn stmt */
4745               {
4746                 verify_tag = TRUE;
4747                 break;
4748               }
4749             continue;
4750           }
4751       if (found_tag && verify_tag && (*dbp != ' '))
4752         {
4753           /* Check if this is an "extern" declaration. */
4754           if (*dbp == '\0')
4755             continue;
4756           if (lowcase (*dbp == 'e'))
4757             {
4758               if (nocase_tail ("extern")) /* superfluous, really! */
4759                 {
4760                   found_tag = FALSE;
4761                   verify_tag = FALSE;
4762                 }
4763             }
4764           else if (lowcase (*dbp) == 'f')
4765             {
4766               if (nocase_tail ("forward")) /* check for forward reference */
4767                 {
4768                   found_tag = FALSE;
4769                   verify_tag = FALSE;
4770                 }
4771             }
4772           if (found_tag && verify_tag) /* not external proc, so make tag */
4773             {
4774               found_tag = FALSE;
4775               verify_tag = FALSE;
4776               make_tag (name, namelen, TRUE,
4777                         tline.buffer, taglen, save_lineno, save_lcno);
4778               continue;
4779             }
4780         }
4781       if (get_tagname)          /* grab name of proc or fn */
4782         {
4783           char *cp;
4784
4785           if (*dbp == '\0')
4786             continue;
4787
4788           /* Find block name. */
4789           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4790             continue;
4791
4792           /* Save all values for later tagging. */
4793           linebuffer_setlen (&tline, lb.len);
4794           strcpy (tline.buffer, lb.buffer);
4795           save_lineno = lineno;
4796           save_lcno = linecharno;
4797           name = tline.buffer + (dbp - lb.buffer);
4798           namelen = cp - dbp;
4799           taglen = cp - lb.buffer + 1;
4800
4801           dbp = cp;             /* set dbp to e-o-token */
4802           get_tagname = FALSE;
4803           found_tag = TRUE;
4804           continue;
4805
4806           /* And proceed to check for "extern". */
4807         }
4808       else if (!incomment && !inquote && !found_tag)
4809         {
4810           /* Check for proc/fn keywords. */
4811           switch (lowcase (c))
4812             {
4813             case 'p':
4814               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4815                 get_tagname = TRUE;
4816               continue;
4817             case 'f':
4818               if (nocase_tail ("unction"))
4819                 get_tagname = TRUE;
4820               continue;
4821             }
4822         }
4823     } /* while not eof */
4824
4825   free (tline.buffer);
4826 }
4827
4828 \f
4829 /*
4830  * Lisp tag functions
4831  *  look for (def or (DEF, quote or QUOTE
4832  */
4833
4834 static void L_getit __P((void));
4835
4836 static void
4837 L_getit ()
4838 {
4839   if (*dbp == '\'')             /* Skip prefix quote */
4840     dbp++;
4841   else if (*dbp == '(')
4842   {
4843     dbp++;
4844     /* Try to skip "(quote " */
4845     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4846       /* Ok, then skip "(" before name in (defstruct (foo)) */
4847       dbp = skip_spaces (dbp);
4848   }
4849   get_tag (dbp, NULL);
4850 }
4851
4852 static void
4853 Lisp_functions (inf)
4854      FILE *inf;
4855 {
4856   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4857     {
4858       if (dbp[0] != '(')
4859         continue;
4860
4861       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4862         {
4863           dbp = skip_non_spaces (dbp);
4864           dbp = skip_spaces (dbp);
4865           L_getit ();
4866         }
4867       else
4868         {
4869           /* Check for (foo::defmumble name-defined ... */
4870           do
4871             dbp++;
4872           while (!notinname (*dbp) && *dbp != ':');
4873           if (*dbp == ':')
4874             {
4875               do
4876                 dbp++;
4877               while (*dbp == ':');
4878
4879               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4880                 {
4881                   dbp = skip_non_spaces (dbp);
4882                   dbp = skip_spaces (dbp);
4883                   L_getit ();
4884                 }
4885             }
4886         }
4887     }
4888 }
4889
4890 \f
4891 /*
4892  * Lua script language parsing
4893  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4894  *
4895  *  "function" and "local function" are tags if they start at column 1.
4896  */
4897 static void
4898 Lua_functions (inf)
4899      FILE *inf;
4900 {
4901   register char *bp;
4902
4903   LOOP_ON_INPUT_LINES (inf, lb, bp)
4904     {
4905       if (bp[0] != 'f' && bp[0] != 'l')
4906         continue;
4907
4908       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4909
4910       if (LOOKING_AT (bp, "function"))
4911         get_tag (bp, NULL);
4912     }
4913 }
4914
4915 \f
4916 /*
4917  * Postscript tags
4918  * Just look for lines where the first character is '/'
4919  * Also look at "defineps" for PSWrap
4920  * Ideas by:
4921  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4922  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4923  */
4924 static void
4925 PS_functions (inf)
4926      FILE *inf;
4927 {
4928   register char *bp, *ep;
4929
4930   LOOP_ON_INPUT_LINES (inf, lb, bp)
4931     {
4932       if (bp[0] == '/')
4933         {
4934           for (ep = bp+1;
4935                *ep != '\0' && *ep != ' ' && *ep != '{';
4936                ep++)
4937             continue;
4938           make_tag (bp, ep - bp, TRUE,
4939                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4940         }
4941       else if (LOOKING_AT (bp, "defineps"))
4942         get_tag (bp, NULL);
4943     }
4944 }
4945
4946 \f
4947 /*
4948  * Forth tags
4949  * Ignore anything after \ followed by space or in ( )
4950  * Look for words defined by :
4951  * Look for constant, code, create, defer, value, and variable
4952  * OBP extensions:  Look for buffer:, field,
4953  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4954  */
4955 static void
4956 Forth_words (inf)
4957      FILE *inf;
4958 {
4959   register char *bp;
4960
4961   LOOP_ON_INPUT_LINES (inf, lb, bp)
4962     while ((bp = skip_spaces (bp))[0] != '\0')
4963       if (bp[0] == '\\' && iswhite(bp[1]))
4964         break;                  /* read next line */
4965       else if (bp[0] == '(' && iswhite(bp[1]))
4966         do                      /* skip to ) or eol */
4967           bp++;
4968         while (*bp != ')' && *bp != '\0');
4969       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4970                || LOOKING_AT_NOCASE (bp, "constant")
4971                || LOOKING_AT_NOCASE (bp, "code")
4972                || LOOKING_AT_NOCASE (bp, "create")
4973                || LOOKING_AT_NOCASE (bp, "defer")
4974                || LOOKING_AT_NOCASE (bp, "value")
4975                || LOOKING_AT_NOCASE (bp, "variable")
4976                || LOOKING_AT_NOCASE (bp, "buffer:")
4977                || LOOKING_AT_NOCASE (bp, "field"))
4978         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4979       else
4980         bp = skip_non_spaces (bp);
4981 }
4982
4983 \f
4984 /*
4985  * Scheme tag functions
4986  * look for (def... xyzzy
4987  *          (def... (xyzzy
4988  *          (def ... ((...(xyzzy ....
4989  *          (set! xyzzy
4990  * Original code by Ken Haase (1985?)
4991  */
4992 static void
4993 Scheme_functions (inf)
4994      FILE *inf;
4995 {
4996   register char *bp;
4997
4998   LOOP_ON_INPUT_LINES (inf, lb, bp)
4999     {
5000       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5001         {
5002           bp = skip_non_spaces (bp+4);
5003           /* Skip over open parens and white space */
5004           while (notinname (*bp))
5005             bp++;
5006           get_tag (bp, NULL);
5007         }
5008       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5009         get_tag (bp, NULL);
5010     }
5011 }
5012
5013 \f
5014 /* Find tags in TeX and LaTeX input files.  */
5015
5016 /* TEX_toktab is a table of TeX control sequences that define tags.
5017  * Each entry records one such control sequence.
5018  *
5019  * Original code from who knows whom.
5020  * Ideas by:
5021  *   Stefan Monnier (2002)
5022  */
5023
5024 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5025
5026 /* Default set of control sequences to put into TEX_toktab.
5027    The value of environment var TEXTAGS is prepended to this.  */
5028 static char *TEX_defenv = "\
5029 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5030 :part:appendix:entry:index:def\
5031 :newcommand:renewcommand:newenvironment:renewenvironment";
5032
5033 static void TEX_mode __P((FILE *));
5034 static void TEX_decode_env __P((char *, char *));
5035
5036 static char TEX_esc = '\\';
5037 static char TEX_opgrp = '{';
5038 static char TEX_clgrp = '}';
5039
5040 /*
5041  * TeX/LaTeX scanning loop.
5042  */
5043 static void
5044 TeX_commands (inf)
5045      FILE *inf;
5046 {
5047   char *cp;
5048   linebuffer *key;
5049
5050   /* Select either \ or ! as escape character.  */
5051   TEX_mode (inf);
5052
5053   /* Initialize token table once from environment. */
5054   if (TEX_toktab == NULL)
5055     TEX_decode_env ("TEXTAGS", TEX_defenv);
5056
5057   LOOP_ON_INPUT_LINES (inf, lb, cp)
5058     {
5059       /* Look at each TEX keyword in line. */
5060       for (;;)
5061         {
5062           /* Look for a TEX escape. */
5063           while (*cp++ != TEX_esc)
5064             if (cp[-1] == '\0' || cp[-1] == '%')
5065               goto tex_next_line;
5066
5067           for (key = TEX_toktab; key->buffer != NULL; key++)
5068             if (strneq (cp, key->buffer, key->len))
5069               {
5070                 register char *p;
5071                 int namelen, linelen;
5072                 bool opgrp = FALSE;
5073
5074                 cp = skip_spaces (cp + key->len);
5075                 if (*cp == TEX_opgrp)
5076                   {
5077                     opgrp = TRUE;
5078                     cp++;
5079                   }
5080                 for (p = cp;
5081                      (!iswhite (*p) && *p != '#' &&
5082                       *p != TEX_opgrp && *p != TEX_clgrp);
5083                      p++)
5084                   continue;
5085                 namelen = p - cp;
5086                 linelen = lb.len;
5087                 if (!opgrp || *p == TEX_clgrp)
5088                   {
5089                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5090                       p++;
5091                     linelen = p - lb.buffer + 1;
5092                   }
5093                 make_tag (cp, namelen, TRUE,
5094                           lb.buffer, linelen, lineno, linecharno);
5095                 goto tex_next_line; /* We only tag a line once */
5096               }
5097         }
5098     tex_next_line:
5099       ;
5100     }
5101 }
5102
5103 #define TEX_LESC '\\'
5104 #define TEX_SESC '!'
5105
5106 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5107    chars accordingly. */
5108 static void
5109 TEX_mode (inf)
5110      FILE *inf;
5111 {
5112   int c;
5113
5114   while ((c = getc (inf)) != EOF)
5115     {
5116       /* Skip to next line if we hit the TeX comment char. */
5117       if (c == '%')
5118         while (c != '\n' && c != EOF)
5119           c = getc (inf);
5120       else if (c == TEX_LESC || c == TEX_SESC )
5121         break;
5122     }
5123
5124   if (c == TEX_LESC)
5125     {
5126       TEX_esc = TEX_LESC;
5127       TEX_opgrp = '{';
5128       TEX_clgrp = '}';
5129     }
5130   else
5131     {
5132       TEX_esc = TEX_SESC;
5133       TEX_opgrp = '<';
5134       TEX_clgrp = '>';
5135     }
5136   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5137      No attempt is made to correct the situation. */
5138   rewind (inf);
5139 }
5140
5141 /* Read environment and prepend it to the default string.
5142    Build token table. */
5143 static void
5144 TEX_decode_env (evarname, defenv)
5145      char *evarname;
5146      char *defenv;
5147 {
5148   register char *env, *p;
5149   int i, len;
5150
5151   /* Append default string to environment. */
5152   env = getenv (evarname);
5153   if (!env)
5154     env = defenv;
5155   else
5156     {
5157       char *oldenv = env;
5158       env = concat (oldenv, defenv, "");
5159     }
5160
5161   /* Allocate a token table */
5162   for (len = 1, p = env; p;)
5163     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5164       len++;
5165   TEX_toktab = xnew (len, linebuffer);
5166
5167   /* Unpack environment string into token table. Be careful about */
5168   /* zero-length strings (leading ':', "::" and trailing ':') */
5169   for (i = 0; *env != '\0';)
5170     {
5171       p = etags_strchr (env, ':');
5172       if (!p)                   /* End of environment string. */
5173         p = env + strlen (env);
5174       if (p - env > 0)
5175         {                       /* Only non-zero strings. */
5176           TEX_toktab[i].buffer = savenstr (env, p - env);
5177           TEX_toktab[i].len = p - env;
5178           i++;
5179         }
5180       if (*p)
5181         env = p + 1;
5182       else
5183         {
5184           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5185           TEX_toktab[i].len = 0;
5186           break;
5187         }
5188     }
5189 }
5190
5191 \f
5192 /* Texinfo support.  Dave Love, Mar. 2000.  */
5193 static void
5194 Texinfo_nodes (inf)
5195      FILE * inf;
5196 {
5197   char *cp, *start;
5198   LOOP_ON_INPUT_LINES (inf, lb, cp)
5199     if (LOOKING_AT (cp, "@node"))
5200       {
5201         start = cp;
5202         while (*cp != '\0' && *cp != ',')
5203           cp++;
5204         make_tag (start, cp - start, TRUE,
5205                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5206       }
5207 }
5208
5209 \f
5210 /*
5211  * HTML support.
5212  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5213  * Contents of <a name=xxx> are tags with name xxx.
5214  *
5215  * Francesco Potortì, 2002.
5216  */
5217 static void
5218 HTML_labels (inf)
5219      FILE * inf;
5220 {
5221   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5222   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5223   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5224   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5225   char *end;
5226
5227
5228   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5229
5230   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5231     for (;;)                    /* loop on the same line */
5232       {
5233         if (skiptag)            /* skip HTML tag */
5234           {
5235             while (*dbp != '\0' && *dbp != '>')
5236               dbp++;
5237             if (*dbp == '>')
5238               {
5239                 dbp += 1;
5240                 skiptag = FALSE;
5241                 continue;       /* look on the same line */
5242               }
5243             break;              /* go to next line */
5244           }
5245
5246         else if (intag) /* look for "name=" or "id=" */
5247           {
5248             while (*dbp != '\0' && *dbp != '>'
5249                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5250               dbp++;
5251             if (*dbp == '\0')
5252               break;            /* go to next line */
5253             if (*dbp == '>')
5254               {
5255                 dbp += 1;
5256                 intag = FALSE;
5257                 continue;       /* look on the same line */
5258               }
5259             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5260                 || LOOKING_AT_NOCASE (dbp, "id="))
5261               {
5262                 bool quoted = (dbp[0] == '"');
5263
5264                 if (quoted)
5265                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5266                     continue;
5267                 else
5268                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5269                     continue;
5270                 linebuffer_setlen (&token_name, end - dbp);
5271                 strncpy (token_name.buffer, dbp, end - dbp);
5272                 token_name.buffer[end - dbp] = '\0';
5273
5274                 dbp = end;
5275                 intag = FALSE;  /* we found what we looked for */
5276                 skiptag = TRUE; /* skip to the end of the tag */
5277                 getnext = TRUE; /* then grab the text */
5278                 continue;       /* look on the same line */
5279               }
5280             dbp += 1;
5281           }
5282
5283         else if (getnext)       /* grab next tokens and tag them */
5284           {
5285             dbp = skip_spaces (dbp);
5286             if (*dbp == '\0')
5287               break;            /* go to next line */
5288             if (*dbp == '<')
5289               {
5290                 intag = TRUE;
5291                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5292                 continue;       /* look on the same line */
5293               }
5294
5295             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5296               continue;
5297             make_tag (token_name.buffer, token_name.len, TRUE,
5298                       dbp, end - dbp, lineno, linecharno);
5299             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5300             getnext = FALSE;
5301             break;              /* go to next line */
5302           }
5303
5304         else                    /* look for an interesting HTML tag */
5305           {
5306             while (*dbp != '\0' && *dbp != '<')
5307               dbp++;
5308             if (*dbp == '\0')
5309               break;            /* go to next line */
5310             intag = TRUE;
5311             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5312               {
5313                 inanchor = TRUE;
5314                 continue;       /* look on the same line */
5315               }
5316             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5317                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5318                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5319                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5320               {
5321                 intag = FALSE;
5322                 getnext = TRUE;
5323                 continue;       /* look on the same line */
5324               }
5325             dbp += 1;
5326           }
5327       }
5328 }
5329
5330 \f
5331 /*
5332  * Prolog support
5333  *
5334  * Assumes that the predicate or rule starts at column 0.
5335  * Only the first clause of a predicate or rule is added.
5336  * Original code by Sunichirou Sugou (1989)
5337  * Rewritten by Anders Lindgren (1996)
5338  */
5339 static int prolog_pr __P((char *, char *));
5340 static void prolog_skip_comment __P((linebuffer *, FILE *));
5341 static int prolog_atom __P((char *, int));
5342
5343 static void
5344 Prolog_functions (inf)
5345      FILE *inf;
5346 {
5347   char *cp, *last;
5348   int len;
5349   int allocated;
5350
5351   allocated = 0;
5352   len = 0;
5353   last = NULL;
5354
5355   LOOP_ON_INPUT_LINES (inf, lb, cp)
5356     {
5357       if (cp[0] == '\0')        /* Empty line */
5358         continue;
5359       else if (iswhite (cp[0])) /* Not a predicate */
5360         continue;
5361       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5362         prolog_skip_comment (&lb, inf);
5363       else if ((len = prolog_pr (cp, last)) > 0)
5364         {
5365           /* Predicate or rule.  Store the function name so that we
5366              only generate a tag for the first clause.  */
5367           if (last == NULL)
5368             last = xnew(len + 1, char);
5369           else if (len + 1 > allocated)
5370             xrnew (last, len + 1, char);
5371           allocated = len + 1;
5372           strncpy (last, cp, len);
5373           last[len] = '\0';
5374         }
5375     }
5376   free (last);
5377 }
5378
5379
5380 static void
5381 prolog_skip_comment (plb, inf)
5382      linebuffer *plb;
5383      FILE *inf;
5384 {
5385   char *cp;
5386
5387   do
5388     {
5389       for (cp = plb->buffer; *cp != '\0'; cp++)
5390         if (cp[0] == '*' && cp[1] == '/')
5391           return;
5392       readline (plb, inf);
5393     }
5394   while (!feof(inf));
5395 }
5396
5397 /*
5398  * A predicate or rule definition is added if it matches:
5399  *     <beginning of line><Prolog Atom><whitespace>(
5400  * or  <beginning of line><Prolog Atom><whitespace>:-
5401  *
5402  * It is added to the tags database if it doesn't match the
5403  * name of the previous clause header.
5404  *
5405  * Return the size of the name of the predicate or rule, or 0 if no
5406  * header was found.
5407  */
5408 static int
5409 prolog_pr (s, last)
5410      char *s;
5411      char *last;                /* Name of last clause. */
5412 {
5413   int pos;
5414   int len;
5415
5416   pos = prolog_atom (s, 0);
5417   if (pos < 1)
5418     return 0;
5419
5420   len = pos;
5421   pos = skip_spaces (s + pos) - s;
5422
5423   if ((s[pos] == '.'
5424        || (s[pos] == '(' && (pos += 1))
5425        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5426       && (last == NULL          /* save only the first clause */
5427           || len != (int)strlen (last)
5428           || !strneq (s, last, len)))
5429         {
5430           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5431           return len;
5432         }
5433   else
5434     return 0;
5435 }
5436
5437 /*
5438  * Consume a Prolog atom.
5439  * Return the number of bytes consumed, or -1 if there was an error.
5440  *
5441  * A prolog atom, in this context, could be one of:
5442  * - An alphanumeric sequence, starting with a lower case letter.
5443  * - A quoted arbitrary string. Single quotes can escape themselves.
5444  *   Backslash quotes everything.
5445  */
5446 static int
5447 prolog_atom (s, pos)
5448      char *s;
5449      int pos;
5450 {
5451   int origpos;
5452
5453   origpos = pos;
5454
5455   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5456     {
5457       /* The atom is unquoted. */
5458       pos++;
5459       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5460         {
5461           pos++;
5462         }
5463       return pos - origpos;
5464     }
5465   else if (s[pos] == '\'')
5466     {
5467       pos++;
5468
5469       for (;;)
5470         {
5471           if (s[pos] == '\'')
5472             {
5473               pos++;
5474               if (s[pos] != '\'')
5475                 break;
5476               pos++;            /* A double quote */
5477             }
5478           else if (s[pos] == '\0')
5479             /* Multiline quoted atoms are ignored. */
5480             return -1;
5481           else if (s[pos] == '\\')
5482             {
5483               if (s[pos+1] == '\0')
5484                 return -1;
5485               pos += 2;
5486             }
5487           else
5488             pos++;
5489         }
5490       return pos - origpos;
5491     }
5492   else
5493     return -1;
5494 }
5495
5496 \f
5497 /*
5498  * Support for Erlang
5499  *
5500  * Generates tags for functions, defines, and records.
5501  * Assumes that Erlang functions start at column 0.
5502  * Original code by Anders Lindgren (1996)
5503  */
5504 static int erlang_func __P((char *, char *));
5505 static void erlang_attribute __P((char *));
5506 static int erlang_atom __P((char *));
5507
5508 static void
5509 Erlang_functions (inf)
5510      FILE *inf;
5511 {
5512   char *cp, *last;
5513   int len;
5514   int allocated;
5515
5516   allocated = 0;
5517   len = 0;
5518   last = NULL;
5519
5520   LOOP_ON_INPUT_LINES (inf, lb, cp)
5521     {
5522       if (cp[0] == '\0')        /* Empty line */
5523         continue;
5524       else if (iswhite (cp[0])) /* Not function nor attribute */
5525         continue;
5526       else if (cp[0] == '%')    /* comment */
5527         continue;
5528       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5529         continue;
5530       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5531         {
5532           erlang_attribute (cp);
5533           if (last != NULL)
5534             {
5535               free (last);
5536               last = NULL;
5537             }
5538         }
5539       else if ((len = erlang_func (cp, last)) > 0)
5540         {
5541           /*
5542            * Function.  Store the function name so that we only
5543            * generates a tag for the first clause.
5544            */
5545           if (last == NULL)
5546             last = xnew (len + 1, char);
5547           else if (len + 1 > allocated)
5548             xrnew (last, len + 1, char);
5549           allocated = len + 1;
5550           strncpy (last, cp, len);
5551           last[len] = '\0';
5552         }
5553     }
5554   free (last);
5555 }
5556
5557
5558 /*
5559  * A function definition is added if it matches:
5560  *     <beginning of line><Erlang Atom><whitespace>(
5561  *
5562  * It is added to the tags database if it doesn't match the
5563  * name of the previous clause header.
5564  *
5565  * Return the size of the name of the function, or 0 if no function
5566  * was found.
5567  */
5568 static int
5569 erlang_func (s, last)
5570      char *s;
5571      char *last;                /* Name of last clause. */
5572 {
5573   int pos;
5574   int len;
5575
5576   pos = erlang_atom (s);
5577   if (pos < 1)
5578     return 0;
5579
5580   len = pos;
5581   pos = skip_spaces (s + pos) - s;
5582
5583   /* Save only the first clause. */
5584   if (s[pos++] == '('
5585       && (last == NULL
5586           || len != (int)strlen (last)
5587           || !strneq (s, last, len)))
5588         {
5589           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5590           return len;
5591         }
5592
5593   return 0;
5594 }
5595
5596
5597 /*
5598  * Handle attributes.  Currently, tags are generated for defines
5599  * and records.
5600  *
5601  * They are on the form:
5602  * -define(foo, bar).
5603  * -define(Foo(M, N), M+N).
5604  * -record(graph, {vtab = notable, cyclic = true}).
5605  */
5606 static void
5607 erlang_attribute (s)
5608      char *s;
5609 {
5610   char *cp = s;
5611
5612   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5613       && *cp++ == '(')
5614     {
5615       int len = erlang_atom (skip_spaces (cp));
5616       if (len > 0)
5617         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5618     }
5619   return;
5620 }
5621
5622
5623 /*
5624  * Consume an Erlang atom (or variable).
5625  * Return the number of bytes consumed, or -1 if there was an error.
5626  */
5627 static int
5628 erlang_atom (s)
5629      char *s;
5630 {
5631   int pos = 0;
5632
5633   if (ISALPHA (s[pos]) || s[pos] == '_')
5634     {
5635       /* The atom is unquoted. */
5636       do
5637         pos++;
5638       while (ISALNUM (s[pos]) || s[pos] == '_');
5639     }
5640   else if (s[pos] == '\'')
5641     {
5642       for (pos++; s[pos] != '\''; pos++)
5643         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5644             || (s[pos] == '\\' && s[++pos] == '\0'))
5645           return 0;
5646       pos++;
5647     }
5648
5649   return pos;
5650 }
5651
5652 \f
5653 static char *scan_separators __P((char *));
5654 static void add_regex __P((char *, language *));
5655 static char *substitute __P((char *, char *, struct re_registers *));
5656
5657 /*
5658  * Take a string like "/blah/" and turn it into "blah", verifying
5659  * that the first and last characters are the same, and handling
5660  * quoted separator characters.  Actually, stops on the occurrence of
5661  * an unquoted separator.  Also process \t, \n, etc. and turn into
5662  * appropriate characters. Works in place.  Null terminates name string.
5663  * Returns pointer to terminating separator, or NULL for
5664  * unterminated regexps.
5665  */
5666 static char *
5667 scan_separators (name)
5668      char *name;
5669 {
5670   char sep = name[0];
5671   char *copyto = name;
5672   bool quoted = FALSE;
5673
5674   for (++name; *name != '\0'; ++name)
5675     {
5676       if (quoted)
5677         {
5678           switch (*name)
5679             {
5680             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5681             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5682             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5683             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5684             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5685             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5686             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5687             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5688             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5689             default:
5690               if (*name == sep)
5691                 *copyto++ = sep;
5692               else
5693                 {
5694                   /* Something else is quoted, so preserve the quote. */
5695                   *copyto++ = '\\';
5696                   *copyto++ = *name;
5697                 }
5698               break;
5699             }
5700           quoted = FALSE;
5701         }
5702       else if (*name == '\\')
5703         quoted = TRUE;
5704       else if (*name == sep)
5705         break;
5706       else
5707         *copyto++ = *name;
5708     }
5709   if (*name != sep)
5710     name = NULL;                /* signal unterminated regexp */
5711
5712   /* Terminate copied string. */
5713   *copyto = '\0';
5714   return name;
5715 }
5716
5717 /* Look at the argument of --regex or --no-regex and do the right
5718    thing.  Same for each line of a regexp file. */
5719 static void
5720 analyse_regex (regex_arg)
5721      char *regex_arg;
5722 {
5723   if (regex_arg == NULL)
5724     {
5725       free_regexps ();          /* --no-regex: remove existing regexps */
5726       return;
5727     }
5728
5729   /* A real --regexp option or a line in a regexp file. */
5730   switch (regex_arg[0])
5731     {
5732       /* Comments in regexp file or null arg to --regex. */
5733     case '\0':
5734     case ' ':
5735     case '\t':
5736       break;
5737
5738       /* Read a regex file.  This is recursive and may result in a
5739          loop, which will stop when the file descriptors are exhausted. */
5740     case '@':
5741       {
5742         FILE *regexfp;
5743         linebuffer regexbuf;
5744         char *regexfile = regex_arg + 1;
5745
5746         /* regexfile is a file containing regexps, one per line. */
5747         regexfp = fopen (regexfile, "r");
5748         if (regexfp == NULL)
5749           {
5750             pfatal (regexfile);
5751             return;
5752           }
5753         linebuffer_init (&regexbuf);
5754         while (readline_internal (&regexbuf, regexfp) > 0)
5755           analyse_regex (regexbuf.buffer);
5756         free (regexbuf.buffer);
5757         fclose (regexfp);
5758       }
5759       break;
5760
5761       /* Regexp to be used for a specific language only. */
5762     case '{':
5763       {
5764         language *lang;
5765         char *lang_name = regex_arg + 1;
5766         char *cp;
5767
5768         for (cp = lang_name; *cp != '}'; cp++)
5769           if (*cp == '\0')
5770             {
5771               error ("unterminated language name in regex: %s", regex_arg);
5772               return;
5773             }
5774         *cp++ = '\0';
5775         lang = get_language_from_langname (lang_name);
5776         if (lang == NULL)
5777           return;
5778         add_regex (cp, lang);
5779       }
5780       break;
5781
5782       /* Regexp to be used for any language. */
5783     default:
5784       add_regex (regex_arg, NULL);
5785       break;
5786     }
5787 }
5788
5789 /* Separate the regexp pattern, compile it,
5790    and care for optional name and modifiers. */
5791 static void
5792 add_regex (regexp_pattern, lang)
5793      char *regexp_pattern;
5794      language *lang;
5795 {
5796   static struct re_pattern_buffer zeropattern;
5797   char sep, *pat, *name, *modifiers;
5798   const char *err;
5799   struct re_pattern_buffer *patbuf;
5800   regexp *rp;
5801   bool
5802     force_explicit_name = TRUE, /* do not use implicit tag names */
5803     ignore_case = FALSE,        /* case is significant */
5804     multi_line = FALSE,         /* matches are done one line at a time */
5805     single_line = FALSE;        /* dot does not match newline */
5806
5807
5808   if (strlen(regexp_pattern) < 3)
5809     {
5810       error ("null regexp", (char *)NULL);
5811       return;
5812     }
5813   sep = regexp_pattern[0];
5814   name = scan_separators (regexp_pattern);
5815   if (name == NULL)
5816     {
5817       error ("%s: unterminated regexp", regexp_pattern);
5818       return;
5819     }
5820   if (name[1] == sep)
5821     {
5822       error ("null name for regexp \"%s\"", regexp_pattern);
5823       return;
5824     }
5825   modifiers = scan_separators (name);
5826   if (modifiers == NULL)        /* no terminating separator --> no name */
5827     {
5828       modifiers = name;
5829       name = "";
5830     }
5831   else
5832     modifiers += 1;             /* skip separator */
5833
5834   /* Parse regex modifiers. */
5835   for (; modifiers[0] != '\0'; modifiers++)
5836     switch (modifiers[0])
5837       {
5838       case 'N':
5839         if (modifiers == name)
5840           error ("forcing explicit tag name but no name, ignoring", NULL);
5841         force_explicit_name = TRUE;
5842         break;
5843       case 'i':
5844         ignore_case = TRUE;
5845         break;
5846       case 's':
5847         single_line = TRUE;
5848         /* FALLTHRU */
5849       case 'm':
5850         multi_line = TRUE;
5851         need_filebuf = TRUE;
5852         break;
5853       default:
5854         {
5855           char wrongmod [2];
5856           wrongmod[0] = modifiers[0];
5857           wrongmod[1] = '\0';
5858           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5859         }
5860         break;
5861       }
5862
5863   patbuf = xnew (1, struct re_pattern_buffer);
5864   *patbuf = zeropattern;
5865   if (ignore_case)
5866     {
5867       static char lc_trans[CHARS];
5868       int i;
5869       for (i = 0; i < CHARS; i++)
5870         lc_trans[i] = lowcase (i);
5871       patbuf->translate = lc_trans;     /* translation table to fold case  */
5872     }
5873
5874   if (multi_line)
5875     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5876   else
5877     pat = regexp_pattern;
5878
5879   if (single_line)
5880     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5881   else
5882     re_set_syntax (RE_SYNTAX_EMACS);
5883
5884   err = re_compile_pattern (pat, strlen (pat), patbuf);
5885   if (multi_line)
5886     free (pat);
5887   if (err != NULL)
5888     {
5889       error ("%s while compiling pattern", err);
5890       return;
5891     }
5892
5893   rp = p_head;
5894   p_head = xnew (1, regexp);
5895   p_head->pattern = savestr (regexp_pattern);
5896   p_head->p_next = rp;
5897   p_head->lang = lang;
5898   p_head->pat = patbuf;
5899   p_head->name = savestr (name);
5900   p_head->error_signaled = FALSE;
5901   p_head->force_explicit_name = force_explicit_name;
5902   p_head->ignore_case = ignore_case;
5903   p_head->multi_line = multi_line;
5904 }
5905
5906 /*
5907  * Do the substitutions indicated by the regular expression and
5908  * arguments.
5909  */
5910 static char *
5911 substitute (in, out, regs)
5912      char *in, *out;
5913      struct re_registers *regs;
5914 {
5915   char *result, *t;
5916   int size, dig, diglen;
5917
5918   result = NULL;
5919   size = strlen (out);
5920
5921   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5922   if (out[size - 1] == '\\')
5923     fatal ("pattern error in \"%s\"", out);
5924   for (t = etags_strchr (out, '\\');
5925        t != NULL;
5926        t = etags_strchr (t + 2, '\\'))
5927     if (ISDIGIT (t[1]))
5928       {
5929         dig = t[1] - '0';
5930         diglen = regs->end[dig] - regs->start[dig];
5931         size += diglen - 2;
5932       }
5933     else
5934       size -= 1;
5935
5936   /* Allocate space and do the substitutions. */
5937   assert (size >= 0);
5938   result = xnew (size + 1, char);
5939
5940   for (t = result; *out != '\0'; out++)
5941     if (*out == '\\' && ISDIGIT (*++out))
5942       {
5943         dig = *out - '0';
5944         diglen = regs->end[dig] - regs->start[dig];
5945         strncpy (t, in + regs->start[dig], diglen);
5946         t += diglen;
5947       }
5948     else
5949       *t++ = *out;
5950   *t = '\0';
5951
5952   assert (t <= result + size);
5953   assert (t - result == (int)strlen (result));
5954
5955   return result;
5956 }
5957
5958 /* Deallocate all regexps. */
5959 static void
5960 free_regexps ()
5961 {
5962   regexp *rp;
5963   while (p_head != NULL)
5964     {
5965       rp = p_head->p_next;
5966       free (p_head->pattern);
5967       free (p_head->name);
5968       free (p_head);
5969       p_head = rp;
5970     }
5971   return;
5972 }
5973
5974 /*
5975  * Reads the whole file as a single string from `filebuf' and looks for
5976  * multi-line regular expressions, creating tags on matches.
5977  * readline already dealt with normal regexps.
5978  *
5979  * Idea by Ben Wing <ben@666.com> (2002).
5980  */
5981 static void
5982 regex_tag_multiline ()
5983 {
5984   char *buffer = filebuf.buffer;
5985   regexp *rp;
5986   char *name;
5987
5988   for (rp = p_head; rp != NULL; rp = rp->p_next)
5989     {
5990       int match = 0;
5991
5992       if (!rp->multi_line)
5993         continue;               /* skip normal regexps */
5994
5995       /* Generic initialisations before parsing file from memory. */
5996       lineno = 1;               /* reset global line number */
5997       charno = 0;               /* reset global char number */
5998       linecharno = 0;           /* reset global char number of line start */
5999
6000       /* Only use generic regexps or those for the current language. */
6001       if (rp->lang != NULL && rp->lang != curfdp->lang)
6002         continue;
6003
6004       while (match >= 0 && match < filebuf.len)
6005         {
6006           match = re_search (rp->pat, buffer, filebuf.len, charno,
6007                              filebuf.len - match, &rp->regs);
6008           switch (match)
6009             {
6010             case -2:
6011               /* Some error. */
6012               if (!rp->error_signaled)
6013                 {
6014                   error ("regexp stack overflow while matching \"%s\"",
6015                          rp->pattern);
6016                   rp->error_signaled = TRUE;
6017                 }
6018               break;
6019             case -1:
6020               /* No match. */
6021               break;
6022             default:
6023               if (match == rp->regs.end[0])
6024                 {
6025                   if (!rp->error_signaled)
6026                     {
6027                       error ("regexp matches the empty string: \"%s\"",
6028                              rp->pattern);
6029                       rp->error_signaled = TRUE;
6030                     }
6031                   match = -3;   /* exit from while loop */
6032                   break;
6033                 }
6034
6035               /* Match occurred.  Construct a tag. */
6036               while (charno < rp->regs.end[0])
6037                 if (buffer[charno++] == '\n')
6038                   lineno++, linecharno = charno;
6039               name = rp->name;
6040               if (name[0] == '\0')
6041                 name = NULL;
6042               else /* make a named tag */
6043                 name = substitute (buffer, rp->name, &rp->regs);
6044               if (rp->force_explicit_name)
6045                 /* Force explicit tag name, if a name is there. */
6046                 pfnote (name, TRUE, buffer + linecharno,
6047                         charno - linecharno + 1, lineno, linecharno);
6048               else
6049                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6050                           charno - linecharno + 1, lineno, linecharno);
6051               break;
6052             }
6053         }
6054     }
6055 }
6056
6057 \f
6058 static bool
6059 nocase_tail (cp)
6060      char *cp;
6061 {
6062   register int len = 0;
6063
6064   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6065     cp++, len++;
6066   if (*cp == '\0' && !intoken (dbp[len]))
6067     {
6068       dbp += len;
6069       return TRUE;
6070     }
6071   return FALSE;
6072 }
6073
6074 static void
6075 get_tag (bp, namepp)
6076      register char *bp;
6077      char **namepp;
6078 {
6079   register char *cp = bp;
6080
6081   if (*bp != '\0')
6082     {
6083       /* Go till you get to white space or a syntactic break */
6084       for (cp = bp + 1; !notinname (*cp); cp++)
6085         continue;
6086       make_tag (bp, cp - bp, TRUE,
6087                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6088     }
6089
6090   if (namepp != NULL)
6091     *namepp = savenstr (bp, cp - bp);
6092 }
6093
6094 /*
6095  * Read a line of text from `stream' into `lbp', excluding the
6096  * newline or CR-NL, if any.  Return the number of characters read from
6097  * `stream', which is the length of the line including the newline.
6098  *
6099  * On DOS or Windows we do not count the CR character, if any before the
6100  * NL, in the returned length; this mirrors the behavior of Emacs on those
6101  * platforms (for text files, it translates CR-NL to NL as it reads in the
6102  * file).
6103  *
6104  * If multi-line regular expressions are requested, each line read is
6105  * appended to `filebuf'.
6106  */
6107 static long
6108 readline_internal (lbp, stream)
6109      linebuffer *lbp;
6110      register FILE *stream;
6111 {
6112   char *buffer = lbp->buffer;
6113   register char *p = lbp->buffer;
6114   register char *pend;
6115   int chars_deleted;
6116
6117   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6118
6119   for (;;)
6120     {
6121       register int c = getc (stream);
6122       if (p == pend)
6123         {
6124           /* We're at the end of linebuffer: expand it. */
6125           lbp->size *= 2;
6126           xrnew (buffer, lbp->size, char);
6127           p += buffer - lbp->buffer;
6128           pend = buffer + lbp->size;
6129           lbp->buffer = buffer;
6130         }
6131       if (c == EOF)
6132         {
6133           *p = '\0';
6134           chars_deleted = 0;
6135           break;
6136         }
6137       if (c == '\n')
6138         {
6139           if (p > buffer && p[-1] == '\r')
6140             {
6141               p -= 1;
6142 #ifdef DOS_NT
6143              /* Assume CRLF->LF translation will be performed by Emacs
6144                 when loading this file, so CRs won't appear in the buffer.
6145                 It would be cleaner to compensate within Emacs;
6146                 however, Emacs does not know how many CRs were deleted
6147                 before any given point in the file.  */
6148               chars_deleted = 1;
6149 #else
6150               chars_deleted = 2;
6151 #endif
6152             }
6153           else
6154             {
6155               chars_deleted = 1;
6156             }
6157           *p = '\0';
6158           break;
6159         }
6160       *p++ = c;
6161     }
6162   lbp->len = p - buffer;
6163
6164   if (need_filebuf              /* we need filebuf for multi-line regexps */
6165       && chars_deleted > 0)     /* not at EOF */
6166     {
6167       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6168         {
6169           /* Expand filebuf. */
6170           filebuf.size *= 2;
6171           xrnew (filebuf.buffer, filebuf.size, char);
6172         }
6173       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6174       filebuf.len += lbp->len;
6175       filebuf.buffer[filebuf.len++] = '\n';
6176       filebuf.buffer[filebuf.len] = '\0';
6177     }
6178
6179   return lbp->len + chars_deleted;
6180 }
6181
6182 /*
6183  * Like readline_internal, above, but in addition try to match the
6184  * input line against relevant regular expressions and manage #line
6185  * directives.
6186  */
6187 static void
6188 readline (lbp, stream)
6189      linebuffer *lbp;
6190      FILE *stream;
6191 {
6192   long result;
6193
6194   linecharno = charno;          /* update global char number of line start */
6195   result = readline_internal (lbp, stream); /* read line */
6196   lineno += 1;                  /* increment global line number */
6197   charno += result;             /* increment global char number */
6198
6199   /* Honour #line directives. */
6200   if (!no_line_directive)
6201     {
6202       static bool discard_until_line_directive;
6203
6204       /* Check whether this is a #line directive. */
6205       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6206         {
6207           unsigned int lno;
6208           int start = 0;
6209
6210           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6211               && start > 0)     /* double quote character found */
6212             {
6213               char *endp = lbp->buffer + start;
6214
6215               while ((endp = etags_strchr (endp, '"')) != NULL
6216                      && endp[-1] == '\\')
6217                 endp++;
6218               if (endp != NULL)
6219                 /* Ok, this is a real #line directive.  Let's deal with it. */
6220                 {
6221                   char *taggedabsname;  /* absolute name of original file */
6222                   char *taggedfname;    /* name of original file as given */
6223                   char *name;           /* temp var */
6224
6225                   discard_until_line_directive = FALSE; /* found it */
6226                   name = lbp->buffer + start;
6227                   *endp = '\0';
6228                   canonicalize_filename (name);
6229                   taggedabsname = absolute_filename (name, tagfiledir);
6230                   if (filename_is_absolute (name)
6231                       || filename_is_absolute (curfdp->infname))
6232                     taggedfname = savestr (taggedabsname);
6233                   else
6234                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6235
6236                   if (streq (curfdp->taggedfname, taggedfname))
6237                     /* The #line directive is only a line number change.  We
6238                        deal with this afterwards. */
6239                     free (taggedfname);
6240                   else
6241                     /* The tags following this #line directive should be
6242                        attributed to taggedfname.  In order to do this, set
6243                        curfdp accordingly. */
6244                     {
6245                       fdesc *fdp; /* file description pointer */
6246
6247                       /* Go look for a file description already set up for the
6248                          file indicated in the #line directive.  If there is
6249                          one, use it from now until the next #line
6250                          directive. */
6251                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6252                         if (streq (fdp->infname, curfdp->infname)
6253                             && streq (fdp->taggedfname, taggedfname))
6254                           /* If we remove the second test above (after the &&)
6255                              then all entries pertaining to the same file are
6256                              coalesced in the tags file.  If we use it, then
6257                              entries pertaining to the same file but generated
6258                              from different files (via #line directives) will
6259                              go into separate sections in the tags file.  These
6260                              alternatives look equivalent.  The first one
6261                              destroys some apparently useless information. */
6262                           {
6263                             curfdp = fdp;
6264                             free (taggedfname);
6265                             break;
6266                           }
6267                       /* Else, if we already tagged the real file, skip all
6268                          input lines until the next #line directive. */
6269                       if (fdp == NULL) /* not found */
6270                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6271                           if (streq (fdp->infabsname, taggedabsname))
6272                             {
6273                               discard_until_line_directive = TRUE;
6274                               free (taggedfname);
6275                               break;
6276                             }
6277                       /* Else create a new file description and use that from
6278                          now on, until the next #line directive. */
6279                       if (fdp == NULL) /* not found */
6280                         {
6281                           fdp = fdhead;
6282                           fdhead = xnew (1, fdesc);
6283                           *fdhead = *curfdp; /* copy curr. file description */
6284                           fdhead->next = fdp;
6285                           fdhead->infname = savestr (curfdp->infname);
6286                           fdhead->infabsname = savestr (curfdp->infabsname);
6287                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6288                           fdhead->taggedfname = taggedfname;
6289                           fdhead->usecharno = FALSE;
6290                           fdhead->prop = NULL;
6291                           fdhead->written = FALSE;
6292                           curfdp = fdhead;
6293                         }
6294                     }
6295                   free (taggedabsname);
6296                   lineno = lno - 1;
6297                   readline (lbp, stream);
6298                   return;
6299                 } /* if a real #line directive */
6300             } /* if #line is followed by a a number */
6301         } /* if line begins with "#line " */
6302
6303       /* If we are here, no #line directive was found. */
6304       if (discard_until_line_directive)
6305         {
6306           if (result > 0)
6307             {
6308               /* Do a tail recursion on ourselves, thus discarding the contents
6309                  of the line buffer. */
6310               readline (lbp, stream);
6311               return;
6312             }
6313           /* End of file. */
6314           discard_until_line_directive = FALSE;
6315           return;
6316         }
6317     } /* if #line directives should be considered */
6318
6319   {
6320     int match;
6321     regexp *rp;
6322     char *name;
6323
6324     /* Match against relevant regexps. */
6325     if (lbp->len > 0)
6326       for (rp = p_head; rp != NULL; rp = rp->p_next)
6327         {
6328           /* Only use generic regexps or those for the current language.
6329              Also do not use multiline regexps, which is the job of
6330              regex_tag_multiline. */
6331           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6332               || rp->multi_line)
6333             continue;
6334
6335           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6336           switch (match)
6337             {
6338             case -2:
6339               /* Some error. */
6340               if (!rp->error_signaled)
6341                 {
6342                   error ("regexp stack overflow while matching \"%s\"",
6343                          rp->pattern);
6344                   rp->error_signaled = TRUE;
6345                 }
6346               break;
6347             case -1:
6348               /* No match. */
6349               break;
6350             case 0:
6351               /* Empty string matched. */
6352               if (!rp->error_signaled)
6353                 {
6354                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6355                   rp->error_signaled = TRUE;
6356                 }
6357               break;
6358             default:
6359               /* Match occurred.  Construct a tag. */
6360               name = rp->name;
6361               if (name[0] == '\0')
6362                 name = NULL;
6363               else /* make a named tag */
6364                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6365               if (rp->force_explicit_name)
6366                 /* Force explicit tag name, if a name is there. */
6367                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6368               else
6369                 make_tag (name, strlen (name), TRUE,
6370                           lbp->buffer, match, lineno, linecharno);
6371               break;
6372             }
6373         }
6374   }
6375 }
6376
6377 \f
6378 /*
6379  * Return a pointer to a space of size strlen(cp)+1 allocated
6380  * with xnew where the string CP has been copied.
6381  */
6382 static char *
6383 savestr (cp)
6384      char *cp;
6385 {
6386   return savenstr (cp, strlen (cp));
6387 }
6388
6389 /*
6390  * Return a pointer to a space of size LEN+1 allocated with xnew where
6391  * the string CP has been copied for at most the first LEN characters.
6392  */
6393 static char *
6394 savenstr (cp, len)
6395      char *cp;
6396      int len;
6397 {
6398   register char *dp;
6399
6400   dp = xnew (len + 1, char);
6401   strncpy (dp, cp, len);
6402   dp[len] = '\0';
6403   return dp;
6404 }
6405
6406 /*
6407  * Return the ptr in sp at which the character c last
6408  * appears; NULL if not found
6409  *
6410  * Identical to POSIX strrchr, included for portability.
6411  */
6412 static char *
6413 etags_strrchr (sp, c)
6414      register const char *sp;
6415      register int c;
6416 {
6417   register const char *r;
6418
6419   r = NULL;
6420   do
6421     {
6422       if (*sp == c)
6423         r = sp;
6424   } while (*sp++);
6425   return (char *)r;
6426 }
6427
6428 /*
6429  * Return the ptr in sp at which the character c first
6430  * appears; NULL if not found
6431  *
6432  * Identical to POSIX strchr, included for portability.
6433  */
6434 static char *
6435 etags_strchr (sp, c)
6436      register const char *sp;
6437      register int c;
6438 {
6439   do
6440     {
6441       if (*sp == c)
6442         return (char *)sp;
6443     } while (*sp++);
6444   return NULL;
6445 }
6446
6447 /*
6448  * Compare two strings, ignoring case for alphabetic characters.
6449  *
6450  * Same as BSD's strcasecmp, included for portability.
6451  */
6452 static int
6453 etags_strcasecmp (s1, s2)
6454      register const char *s1;
6455      register const char *s2;
6456 {
6457   while (*s1 != '\0'
6458          && (ISALPHA (*s1) && ISALPHA (*s2)
6459              ? lowcase (*s1) == lowcase (*s2)
6460              : *s1 == *s2))
6461     s1++, s2++;
6462
6463   return (ISALPHA (*s1) && ISALPHA (*s2)
6464           ? lowcase (*s1) - lowcase (*s2)
6465           : *s1 - *s2);
6466 }
6467
6468 /*
6469  * Compare two strings, ignoring case for alphabetic characters.
6470  * Stop after a given number of characters
6471  *
6472  * Same as BSD's strncasecmp, included for portability.
6473  */
6474 static int
6475 etags_strncasecmp (s1, s2, n)
6476      register const char *s1;
6477      register const char *s2;
6478      register int n;
6479 {
6480   while (*s1 != '\0' && n-- > 0
6481          && (ISALPHA (*s1) && ISALPHA (*s2)
6482              ? lowcase (*s1) == lowcase (*s2)
6483              : *s1 == *s2))
6484     s1++, s2++;
6485
6486   if (n < 0)
6487     return 0;
6488   else
6489     return (ISALPHA (*s1) && ISALPHA (*s2)
6490             ? lowcase (*s1) - lowcase (*s2)
6491             : *s1 - *s2);
6492 }
6493
6494 /* Skip spaces (end of string is not space), return new pointer. */
6495 static char *
6496 skip_spaces (cp)
6497      char *cp;
6498 {
6499   while (iswhite (*cp))
6500     cp++;
6501   return cp;
6502 }
6503
6504 /* Skip non spaces, except end of string, return new pointer. */
6505 static char *
6506 skip_non_spaces (cp)
6507      char *cp;
6508 {
6509   while (*cp != '\0' && !iswhite (*cp))
6510     cp++;
6511   return cp;
6512 }
6513
6514 /* Print error message and exit.  */
6515 void
6516 fatal (s1, s2)
6517      char *s1, *s2;
6518 {
6519   error (s1, s2);
6520   exit (EXIT_FAILURE);
6521 }
6522
6523 static void
6524 pfatal (s1)
6525      char *s1;
6526 {
6527   perror (s1);
6528   exit (EXIT_FAILURE);
6529 }
6530
6531 static void
6532 suggest_asking_for_help ()
6533 {
6534   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6535            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6536   exit (EXIT_FAILURE);
6537 }
6538
6539 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6540 static void
6541 error (s1, s2)
6542      const char *s1, *s2;
6543 {
6544   fprintf (stderr, "%s: ", progname);
6545   fprintf (stderr, s1, s2);
6546   fprintf (stderr, "\n");
6547 }
6548
6549 /* Return a newly-allocated string whose contents
6550    concatenate those of s1, s2, s3.  */
6551 static char *
6552 concat (s1, s2, s3)
6553      char *s1, *s2, *s3;
6554 {
6555   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6556   char *result = xnew (len1 + len2 + len3 + 1, char);
6557
6558   strcpy (result, s1);
6559   strcpy (result + len1, s2);
6560   strcpy (result + len1 + len2, s3);
6561   result[len1 + len2 + len3] = '\0';
6562
6563   return result;
6564 }
6565
6566 \f
6567 /* Does the same work as the system V getcwd, but does not need to
6568    guess the buffer size in advance. */
6569 static char *
6570 etags_getcwd ()
6571 {
6572 #ifdef HAVE_GETCWD
6573   int bufsize = 200;
6574   char *path = xnew (bufsize, char);
6575
6576   while (getcwd (path, bufsize) == NULL)
6577     {
6578       if (errno != ERANGE)
6579         pfatal ("getcwd");
6580       bufsize *= 2;
6581       free (path);
6582       path = xnew (bufsize, char);
6583     }
6584
6585   canonicalize_filename (path);
6586   return path;
6587
6588 #else /* not HAVE_GETCWD */
6589 #if MSDOS
6590
6591   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6592
6593   getwd (path);
6594
6595   for (p = path; *p != '\0'; p++)
6596     if (*p == '\\')
6597       *p = '/';
6598     else
6599       *p = lowcase (*p);
6600
6601   return strdup (path);
6602 #else /* not MSDOS */
6603   linebuffer path;
6604   FILE *pipe;
6605
6606   linebuffer_init (&path);
6607   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6608   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6609     pfatal ("pwd");
6610   pclose (pipe);
6611
6612   return path.buffer;
6613 #endif /* not MSDOS */
6614 #endif /* not HAVE_GETCWD */
6615 }
6616
6617 /* Return a newly allocated string containing the file name of FILE
6618    relative to the absolute directory DIR (which should end with a slash). */
6619 static char *
6620 relative_filename (file, dir)
6621      char *file, *dir;
6622 {
6623   char *fp, *dp, *afn, *res;
6624   int i;
6625
6626   /* Find the common root of file and dir (with a trailing slash). */
6627   afn = absolute_filename (file, cwd);
6628   fp = afn;
6629   dp = dir;
6630   while (*fp++ == *dp++)
6631     continue;
6632   fp--, dp--;                   /* back to the first differing char */
6633 #ifdef DOS_NT
6634   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6635     return afn;
6636 #endif
6637   do                            /* look at the equal chars until '/' */
6638     fp--, dp--;
6639   while (*fp != '/');
6640
6641   /* Build a sequence of "../" strings for the resulting relative file name. */
6642   i = 0;
6643   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6644     i += 1;
6645   res = xnew (3*i + strlen (fp + 1) + 1, char);
6646   res[0] = '\0';
6647   while (i-- > 0)
6648     strcat (res, "../");
6649
6650   /* Add the file name relative to the common root of file and dir. */
6651   strcat (res, fp + 1);
6652   free (afn);
6653
6654   return res;
6655 }
6656
6657 /* Return a newly allocated string containing the absolute file name
6658    of FILE given DIR (which should end with a slash). */
6659 static char *
6660 absolute_filename (file, dir)
6661      char *file, *dir;
6662 {
6663   char *slashp, *cp, *res;
6664
6665   if (filename_is_absolute (file))
6666     res = savestr (file);
6667 #ifdef DOS_NT
6668   /* We don't support non-absolute file names with a drive
6669      letter, like `d:NAME' (it's too much hassle).  */
6670   else if (file[1] == ':')
6671     fatal ("%s: relative file names with drive letters not supported", file);
6672 #endif
6673   else
6674     res = concat (dir, file, "");
6675
6676   /* Delete the "/dirname/.." and "/." substrings. */
6677   slashp = etags_strchr (res, '/');
6678   while (slashp != NULL && slashp[0] != '\0')
6679     {
6680       if (slashp[1] == '.')
6681         {
6682           if (slashp[2] == '.'
6683               && (slashp[3] == '/' || slashp[3] == '\0'))
6684             {
6685               cp = slashp;
6686               do
6687                 cp--;
6688               while (cp >= res && !filename_is_absolute (cp));
6689               if (cp < res)
6690                 cp = slashp;    /* the absolute name begins with "/.." */
6691 #ifdef DOS_NT
6692               /* Under MSDOS and NT we get `d:/NAME' as absolute
6693                  file name, so the luser could say `d:/../NAME'.
6694                  We silently treat this as `d:/NAME'.  */
6695               else if (cp[0] != '/')
6696                 cp = slashp;
6697 #endif
6698 #ifdef HAVE_MEMMOVE
6699               memmove (cp, slashp + 3, strlen (slashp + 2));
6700 #else
6701               /* Overlapping copy isn't really okay */
6702               strcpy (cp, slashp + 3);
6703 #endif
6704               slashp = cp;
6705               continue;
6706             }
6707           else if (slashp[2] == '/' || slashp[2] == '\0')
6708             {
6709 #ifdef HAVE_MEMMOVE
6710               memmove (slashp, slashp + 2, strlen (slashp + 1));
6711 #else
6712               strcpy (slashp, slashp + 2);
6713 #endif
6714               continue;
6715             }
6716         }
6717
6718       slashp = etags_strchr (slashp + 1, '/');
6719     }
6720
6721   if (res[0] == '\0')           /* just a safety net: should never happen */
6722     {
6723       free (res);
6724       return savestr ("/");
6725     }
6726   else
6727     return res;
6728 }
6729
6730 /* Return a newly allocated string containing the absolute
6731    file name of dir where FILE resides given DIR (which should
6732    end with a slash). */
6733 static char *
6734 absolute_dirname (file, dir)
6735      char *file, *dir;
6736 {
6737   char *slashp, *res;
6738   char save;
6739
6740   slashp = etags_strrchr (file, '/');
6741   if (slashp == NULL)
6742     return savestr (dir);
6743   save = slashp[1];
6744   slashp[1] = '\0';
6745   res = absolute_filename (file, dir);
6746   slashp[1] = save;
6747
6748   return res;
6749 }
6750
6751 /* Whether the argument string is an absolute file name.  The argument
6752    string must have been canonicalized with canonicalize_filename. */
6753 static bool
6754 filename_is_absolute (fn)
6755      char *fn;
6756 {
6757   return (fn[0] == '/'
6758 #ifdef DOS_NT
6759           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6760 #endif
6761           );
6762 }
6763
6764 /* Upcase DOS drive letter and collapse separators into single slashes.
6765    Works in place. */
6766 static void
6767 canonicalize_filename (fn)
6768      register char *fn;
6769 {
6770   register char* cp;
6771   char sep = '/';
6772
6773 #ifdef DOS_NT
6774   /* Canonicalize drive letter case.  */
6775   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6776     fn[0] = upcase (fn[0]);
6777
6778   sep = '\\';
6779 #endif
6780
6781   /* Collapse multiple separators into a single slash. */
6782   for (cp = fn; *cp != '\0'; cp++, fn++)
6783     if (*cp == sep)
6784       {
6785         *fn = '/';
6786         while (cp[1] == sep)
6787           cp++;
6788       }
6789     else
6790       *fn = *cp;
6791   *fn = '\0';
6792 }
6793
6794 \f
6795 /* Initialize a linebuffer for use. */
6796 static void
6797 linebuffer_init (lbp)
6798      linebuffer *lbp;
6799 {
6800   lbp->size = (DEBUG) ? 3 : 200;
6801   lbp->buffer = xnew (lbp->size, char);
6802   lbp->buffer[0] = '\0';
6803   lbp->len = 0;
6804 }
6805
6806 /* Set the minimum size of a string contained in a linebuffer. */
6807 static void
6808 linebuffer_setlen (lbp, toksize)
6809      linebuffer *lbp;
6810      int toksize;
6811 {
6812   while (lbp->size <= toksize)
6813     {
6814       lbp->size *= 2;
6815       xrnew (lbp->buffer, lbp->size, char);
6816     }
6817   lbp->len = toksize;
6818 }
6819
6820 /* Like malloc but get fatal error if memory is exhausted. */
6821 static PTR
6822 xmalloc (size)
6823      unsigned int size;
6824 {
6825   PTR result = (PTR) malloc (size);
6826   if (result == NULL)
6827     fatal ("virtual memory exhausted", (char *)NULL);
6828   return result;
6829 }
6830
6831 static PTR
6832 xrealloc (ptr, size)
6833      char *ptr;
6834      unsigned int size;
6835 {
6836   PTR result = (PTR) realloc (ptr, size);
6837   if (result == NULL)
6838     fatal ("virtual memory exhausted", (char *)NULL);
6839   return result;
6840 }
6841
6842 /*
6843  * Local Variables:
6844  * indent-tabs-mode: t
6845  * tab-width: 8
6846  * fill-column: 79
6847  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6848  * c-file-style: "gnu"
6849  * End:
6850  */
6851
6852 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6853    (do not change this comment) */
6854
6855 /* etags.c ends here */