lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
  32   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  33   Free Software Foundation, Inc.
  34
  35 This file is not considered part of GNU Emacs.
  36
  37 This program is free software: you can redistribute it and/or modify
  38 it under the terms of the GNU General Public License as published by
  39 the Free Software Foundation, either version 3 of the License, or
  40 (at your option) any later version.
  41
  42 This program is distributed in the hope that it will be useful,
  43 but WITHOUT ANY WARRANTY; without even the implied warranty of
  44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  45 GNU General Public License for more details.
  46
  47 You should have received a copy of the GNU General Public License
  48 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  49
  50
  51 /* NB To comply with the above BSD license, copyright information is
  52 reproduced in etc/ETAGS.README.  That file should be updated when the
  53 above notices are.
  54
  55 To the best of our knowledge, this code was originally based on the
  56 ctags.c distributed with BSD4.2, which was copyrighted by the
  57 University of California, as described above. */
  58
  59
  60 /*
  61  * Authors:
  62  * 1983 Ctags originally by Ken Arnold.
  63  * 1984 Fortran added by Jim Kleckner.
  64  * 1984 Ed Pelegri-Llopart added C typedefs.
  65  * 1985 Emacs TAGS format by Richard Stallman.
  66  * 1989 Sam Kendall added C++.
  67  * 1992 Joseph B. Wells improved C and C++ parsing.
  68  * 1993 Francesco Potortì reorganized C and C++.
  69  * 1994 Line-by-line regexp tags by Tom Tromey.
  70  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  71  * 2002 #line directives by Francesco Potortì.
  72  *
  73  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  74  */
  75
  76 /*
  77  * If you want to add support for a new language, start by looking at the LUA
  78  * language, which is the simplest.  Alternatively, consider distributing etags
  79  * together with a configuration file containing regexp definitions for etags.
  80  */
  81
  82 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.3";
  83
  84 #define TRUE    1
  85 #define FALSE   0
  86
  87 #ifdef DEBUG
  88 #  undef DEBUG
  89 #  define DEBUG TRUE
  90 #else
  91 #  define DEBUG  FALSE
  92 #  define NDEBUG                /* disable assert */
  93 #endif
  94
  95 #ifdef HAVE_CONFIG_H
  96 # include <config.h>
  97   /* On some systems, Emacs defines static as nothing for the sake
  98      of unexec.  We don't want that here since we don't use unexec. */
  99 # undef static
 100 # ifndef PTR                    /* for XEmacs */
 101 #   define PTR void *
 102 # endif
 103 # ifndef __P                    /* for XEmacs */
 104 #   define __P(args) args
 105 # endif
 106 #else  /* no config.h */
 107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 108 #   define __P(args) args       /* use prototypes */
 109 #   define PTR void *           /* for generic pointers */
 110 # else /* not standard C */
 111 #   define __P(args) ()         /* no prototypes */
 112 #   define const                /* remove const for old compilers' sake */
 113 #   define PTR long *           /* don't use void* */
 114 # endif
 115 #endif /* !HAVE_CONFIG_H */
 116
 117 #ifndef _GNU_SOURCE
 118 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 119 #endif
 120
 121 /* WIN32_NATIVE is for XEmacs.
 122    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 123 #ifdef WIN32_NATIVE
 124 # undef MSDOS
 125 # undef  WINDOWSNT
 126 # define WINDOWSNT
 127 #endif /* WIN32_NATIVE */
 128
 129 #ifdef MSDOS
 130 # undef MSDOS
 131 # define MSDOS TRUE
 132 # include <fcntl.h>
 133 # include <sys/param.h>
 134 # include <io.h>
 135 # ifndef HAVE_CONFIG_H
 136 #   define DOS_NT
 137 #   include <sys/config.h>
 138 # endif
 139 #else
 140 # define MSDOS FALSE
 141 #endif /* MSDOS */
 142
 143 #ifdef WINDOWSNT
 144 # include <stdlib.h>
 145 # include <fcntl.h>
 146 # include <string.h>
 147 # include <direct.h>
 148 # include <io.h>
 149 # define MAXPATHLEN _MAX_PATH
 150 # undef HAVE_NTGUI
 151 # undef  DOS_NT
 152 # define DOS_NT
 153 # ifndef HAVE_GETCWD
 154 #   define HAVE_GETCWD
 155 # endif /* undef HAVE_GETCWD */
 156 #else /* not WINDOWSNT */
 157 # ifdef STDC_HEADERS
 158 #  include <stdlib.h>
 159 #  include <string.h>
 160 # else /* no standard C headers */
 161    extern char *getenv __P((const char *));
 162    extern char *strcpy __P((char *, const char *));
 163    extern char *strncpy __P((char *, const char *, unsigned long));
 164    extern char *strcat __P((char *, const char *));
 165    extern char *strncat __P((char *, const char *, unsigned long));
 166    extern int strcmp __P((const char *, const char *));
 167    extern int strncmp __P((const char *, const char *, unsigned long));
 168    extern int system __P((const char *));
 169    extern unsigned long strlen __P((const char *));
 170    extern void *malloc __P((unsigned long));
 171    extern void *realloc __P((void *, unsigned long));
 172    extern void exit __P((int));
 173    extern void free __P((void *));
 174    extern void *memmove __P((void *, const void *, unsigned long));
 175 #  define EXIT_SUCCESS  0
 176 #  define EXIT_FAILURE  1
 177 # endif
 178 #endif /* !WINDOWSNT */
 179
 180 #ifdef HAVE_UNISTD_H
 181 # include <unistd.h>
 182 #else
 183 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 184     extern char *getcwd (char *buf, size_t size);
 185 # endif
 186 #endif /* HAVE_UNISTD_H */
 187
 188 #include <stdio.h>
 189 #include <ctype.h>
 190 #include <errno.h>
 191 #ifndef errno
 192   extern int errno;
 193 #endif
 194 #include <sys/types.h>
 195 #include <sys/stat.h>
 196
 197 #include <assert.h>
 198 #ifdef NDEBUG
 199 # undef  assert                 /* some systems have a buggy assert.h */
 200 # define assert(x) ((void) 0)
 201 #endif
 202
 203 #if !defined (S_ISREG) && defined (S_IFREG)
 204 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 205 #endif
 206
 207 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 208 # define NO_LONG_OPTIONS TRUE
 209 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 210   extern char *optarg;
 211   extern int optind, opterr;
 212 #else
 213 # define NO_LONG_OPTIONS FALSE
 214 # include <getopt.h>
 215 #endif /* NO_LONG_OPTIONS */
 216
 217 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 218 # ifdef __CYGWIN__              /* compiling on Cygwin */
 219                              !!! NOTICE !!!
 220  the regex.h distributed with Cygwin is not compatible with etags, alas!
 221 If you want regular expression support, you should delete this notice and
 222               arrange to use the GNU regex.h and regex.c.
 223 # endif
 224 #endif
 225 #include <regex.h>
 226
 227 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 228  Leave it undefined to make the program "etags", which makes emacs-style
 229  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 230 #ifdef CTAGS
 231 # undef  CTAGS
 232 # define CTAGS TRUE
 233 #else
 234 # define CTAGS FALSE
 235 #endif
 236
 237 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 238 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 239 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 240 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 241
 242 #define CHARS 256               /* 2^sizeof(char) */
 243 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 244 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 245 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 246 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 247 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 248 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 249
 250 #define ISALNUM(c)      isalnum (CHAR(c))
 251 #define ISALPHA(c)      isalpha (CHAR(c))
 252 #define ISDIGIT(c)      isdigit (CHAR(c))
 253 #define ISLOWER(c)      islower (CHAR(c))
 254
 255 #define lowcase(c)      tolower (CHAR(c))
 256 #define upcase(c)       toupper (CHAR(c))
 257
 258
 259 /*
 260  *      xnew, xrnew -- allocate, reallocate storage
 261  *
 262  * SYNOPSIS:    Type *xnew (int n, Type);
 263  *              void xrnew (OldPointer, int n, Type);
 264  */
 265 #if DEBUG
 266 # include "chkmalloc.h"
 267 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 268                                                   (n) * sizeof (Type)))
 269 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 270                                         (char *) (op), (n) * sizeof (Type)))
 271 #else
 272 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 273 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 274                                         (char *) (op), (n) * sizeof (Type)))
 275 #endif
 276
 277 #define bool int
 278
 279 typedef void Lang_function __P((FILE *));
 280
 281 typedef struct
 282 {
 283   char *suffix;                 /* file name suffix for this compressor */
 284   char *command;                /* takes one arg and decompresses to stdout */
 285 } compressor;
 286
 287 typedef struct
 288 {
 289   char *name;                   /* language name */
 290   char *help;                   /* detailed help for the language */
 291   Lang_function *function;      /* parse function */
 292   char **suffixes;              /* name suffixes of this language's files */
 293   char **filenames;             /* names of this language's files */
 294   char **interpreters;          /* interpreters for this language */
 295   bool metasource;              /* source used to generate other sources */
 296 } language;
 297
 298 typedef struct fdesc
 299 {
 300   struct fdesc *next;           /* for the linked list */
 301   char *infname;                /* uncompressed input file name */
 302   char *infabsname;             /* absolute uncompressed input file name */
 303   char *infabsdir;              /* absolute dir of input file */
 304   char *taggedfname;            /* file name to write in tagfile */
 305   language *lang;               /* language of file */
 306   char *prop;                   /* file properties to write in tagfile */
 307   bool usecharno;               /* etags tags shall contain char number */
 308   bool written;                 /* entry written in the tags file */
 309 } fdesc;
 310
 311 typedef struct node_st
 312 {                               /* sorting structure */
 313   struct node_st *left, *right; /* left and right sons */
 314   fdesc *fdp;                   /* description of file to whom tag belongs */
 315   char *name;                   /* tag name */
 316   char *regex;                  /* search regexp */
 317   bool valid;                   /* write this tag on the tag file */
 318   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 319   bool been_warned;             /* warning already given for duplicated tag */
 320   int lno;                      /* line number tag is on */
 321   long cno;                     /* character number line starts on */
 322 } node;
 323
 324 /*
 325  * A `linebuffer' is a structure which holds a line of text.
 326  * `readline_internal' reads a line from a stream into a linebuffer
 327  * and works regardless of the length of the line.
 328  * SIZE is the size of BUFFER, LEN is the length of the string in
 329  * BUFFER after readline reads it.
 330  */
 331 typedef struct
 332 {
 333   long size;
 334   int len;
 335   char *buffer;
 336 } linebuffer;
 337
 338 /* Used to support mixing of --lang and file names. */
 339 typedef struct
 340 {
 341   enum {
 342     at_language,                /* a language specification */
 343     at_regexp,                  /* a regular expression */
 344     at_filename,                /* a file name */
 345     at_stdin,                   /* read from stdin here */
 346     at_end                      /* stop parsing the list */
 347   } arg_type;                   /* argument type */
 348   language *lang;               /* language associated with the argument */
 349   char *what;                   /* the argument itself */
 350 } argument;
 351
 352 /* Structure defining a regular expression. */
 353 typedef struct regexp
 354 {
 355   struct regexp *p_next;        /* pointer to next in list */
 356   language *lang;               /* if set, use only for this language */
 357   char *pattern;                /* the regexp pattern */
 358   char *name;                   /* tag name */
 359   struct re_pattern_buffer *pat; /* the compiled pattern */
 360   struct re_registers regs;     /* re registers */
 361   bool error_signaled;          /* already signaled for this regexp */
 362   bool force_explicit_name;     /* do not allow implict tag name */
 363   bool ignore_case;             /* ignore case when matching */
 364   bool multi_line;              /* do a multi-line match on the whole file */
 365 } regexp;
 366
 367
 368 /* Many compilers barf on this:
 369         Lang_function Ada_funcs;
 370    so let's write it this way */
 371 static void Ada_funcs __P((FILE *));
 372 static void Asm_labels __P((FILE *));
 373 static void C_entries __P((int c_ext, FILE *));
 374 static void default_C_entries __P((FILE *));
 375 static void plain_C_entries __P((FILE *));
 376 static void Cjava_entries __P((FILE *));
 377 static void Cobol_paragraphs __P((FILE *));
 378 static void Cplusplus_entries __P((FILE *));
 379 static void Cstar_entries __P((FILE *));
 380 static void Erlang_functions __P((FILE *));
 381 static void Forth_words __P((FILE *));
 382 static void Fortran_functions __P((FILE *));
 383 static void HTML_labels __P((FILE *));
 384 static void Lisp_functions __P((FILE *));
 385 static void Lua_functions __P((FILE *));
 386 static void Makefile_targets __P((FILE *));
 387 static void Pascal_functions __P((FILE *));
 388 static void Perl_functions __P((FILE *));
 389 static void PHP_functions __P((FILE *));
 390 static void PS_functions __P((FILE *));
 391 static void Prolog_functions __P((FILE *));
 392 static void Python_functions __P((FILE *));
 393 static void Scheme_functions __P((FILE *));
 394 static void TeX_commands __P((FILE *));
 395 static void Texinfo_nodes __P((FILE *));
 396 static void Yacc_entries __P((FILE *));
 397 static void just_read_file __P((FILE *));
 398
 399 static void print_language_names __P((void));
 400 static void print_version __P((void));
 401 static void print_help __P((argument *));
 402 int main __P((int, char **));
 403
 404 static compressor *get_compressor_from_suffix __P((char *, char **));
 405 static language *get_language_from_langname __P((const char *));
 406 static language *get_language_from_interpreter __P((char *));
 407 static language *get_language_from_filename __P((char *, bool));
 408 static void readline __P((linebuffer *, FILE *));
 409 static long readline_internal __P((linebuffer *, FILE *));
 410 static bool nocase_tail __P((char *));
 411 static void get_tag __P((char *, char **));
 412
 413 static void analyse_regex __P((char *));
 414 static void free_regexps __P((void));
 415 static void regex_tag_multiline __P((void));
 416 static void error __P((const char *, const char *));
 417 static void suggest_asking_for_help __P((void));
 418 void fatal __P((char *, char *));
 419 static void pfatal __P((char *));
 420 static void add_node __P((node *, node **));
 421
 422 static void init __P((void));
 423 static void process_file_name __P((char *, language *));
 424 static void process_file __P((FILE *, char *, language *));
 425 static void find_entries __P((FILE *));
 426 static void free_tree __P((node *));
 427 static void free_fdesc __P((fdesc *));
 428 static void pfnote __P((char *, bool, char *, int, int, long));
 429 static void make_tag __P((char *, int, bool, char *, int, int, long));
 430 static void invalidate_nodes __P((fdesc *, node **));
 431 static void put_entries __P((node *));
 432
 433 static char *concat __P((char *, char *, char *));
 434 static char *skip_spaces __P((char *));
 435 static char *skip_non_spaces __P((char *));
 436 static char *savenstr __P((char *, int));
 437 static char *savestr __P((char *));
 438 static char *etags_strchr __P((const char *, int));
 439 static char *etags_strrchr __P((const char *, int));
 440 static int etags_strcasecmp __P((const char *, const char *));
 441 static int etags_strncasecmp __P((const char *, const char *, int));
 442 static char *etags_getcwd __P((void));
 443 static char *relative_filename __P((char *, char *));
 444 static char *absolute_filename __P((char *, char *));
 445 static char *absolute_dirname __P((char *, char *));
 446 static bool filename_is_absolute __P((char *f));
 447 static void canonicalize_filename __P((char *));
 448 static void linebuffer_init __P((linebuffer *));
 449 static void linebuffer_setlen __P((linebuffer *, int));
 450 static PTR xmalloc __P((unsigned int));
 451 static PTR xrealloc __P((char *, unsigned int));
 452
 453 \f
 454 static char searchar = '/';     /* use /.../ searches */
 455
 456 static char *tagfile;           /* output file */
 457 static char *progname;          /* name this program was invoked with */
 458 static char *cwd;               /* current working directory */
 459 static char *tagfiledir;        /* directory of tagfile */
 460 static FILE *tagf;              /* ioptr for tags file */
 461
 462 static fdesc *fdhead;           /* head of file description list */
 463 static fdesc *curfdp;           /* current file description */
 464 static int lineno;              /* line number of current line */
 465 static long charno;             /* current character number */
 466 static long linecharno;         /* charno of start of current line */
 467 static char *dbp;               /* pointer to start of current tag */
 468
 469 static const int invalidcharno = -1;
 470
 471 static node *nodehead;          /* the head of the binary tree of tags */
 472 static node *last_node;         /* the last node created */
 473
 474 static linebuffer lb;           /* the current line */
 475 static linebuffer filebuf;      /* a buffer containing the whole file */
 476 static linebuffer token_name;   /* a buffer containing a tag name */
 477
 478 /* boolean "functions" (see init)       */
 479 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 480 static char
 481   /* white chars */
 482   *white = " \f\t\n\r\v",
 483   /* not in a name */
 484   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 485   /* token ending chars */
 486   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 487   /* token starting chars */
 488   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 489   /* valid in-token chars */
 490   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 491
 492 static bool append_to_tagfile;  /* -a: append to tags */
 493 /* The next five default to TRUE in C and derived languages.  */
 494 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 495 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 496                                 /* 0 struct/enum/union decls, and C++ */
 497                                 /* member functions. */
 498 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 499                                 /* constants and variables. */
 500                                 /* -D: opposite of -d.  Default under ctags. */
 501 static bool globals;            /* create tags for global variables */
 502 static bool members;            /* create tags for C member variables */
 503 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 504 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 505 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 506 static bool update;             /* -u: update tags */
 507 static bool vgrind_style;       /* -v: create vgrind style index output */
 508 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 509 static bool cxref_style;        /* -x: create cxref style output */
 510 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 511 static bool ignoreindent;       /* -I: ignore indentation in C */
 512 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 513
 514 /* STDIN is defined in LynxOS system headers */
 515 #ifdef STDIN
 516 # undef STDIN
 517 #endif
 518
 519 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 520 static bool parsing_stdin;      /* --parse-stdin used */
 521
 522 static regexp *p_head;          /* list of all regexps */
 523 static bool need_filebuf;       /* some regexes are multi-line */
 524
 525 static struct option longopts[] =
 526 {
 527   { "append",             no_argument,       NULL,               'a'   },
 528   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 529   { "c++",                no_argument,       NULL,               'C'   },
 530   { "declarations",       no_argument,       &declarations,      TRUE  },
 531   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 532   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 533   { "help",               no_argument,       NULL,               'h'   },
 534   { "help",               no_argument,       NULL,               'H'   },
 535   { "ignore-indentation", no_argument,       NULL,               'I'   },
 536   { "language",           required_argument, NULL,               'l'   },
 537   { "members",            no_argument,       &members,           TRUE  },
 538   { "no-members",         no_argument,       &members,           FALSE },
 539   { "output",             required_argument, NULL,               'o'   },
 540   { "regex",              required_argument, NULL,               'r'   },
 541   { "no-regex",           no_argument,       NULL,               'R'   },
 542   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 543   { "parse-stdin",        required_argument, NULL,               STDIN },
 544   { "version",            no_argument,       NULL,               'V'   },
 545
 546 #if CTAGS /* Ctags options */
 547   { "backward-search",    no_argument,       NULL,               'B'   },
 548   { "cxref",              no_argument,       NULL,               'x'   },
 549   { "defines",            no_argument,       NULL,               'd'   },
 550   { "globals",            no_argument,       &globals,           TRUE  },
 551   { "typedefs",           no_argument,       NULL,               't'   },
 552   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 553   { "update",             no_argument,       NULL,               'u'   },
 554   { "vgrind",             no_argument,       NULL,               'v'   },
 555   { "no-warn",            no_argument,       NULL,               'w'   },
 556
 557 #else /* Etags options */
 558   { "no-defines",         no_argument,       NULL,               'D'   },
 559   { "no-globals",         no_argument,       &globals,           FALSE },
 560   { "include",            required_argument, NULL,               'i'   },
 561 #endif
 562   { NULL }
 563 };
 564
 565 static compressor compressors[] =
 566 {
 567   { "z", "gzip -d -c"},
 568   { "Z", "gzip -d -c"},
 569   { "gz", "gzip -d -c"},
 570   { "GZ", "gzip -d -c"},
 571   { "bz2", "bzip2 -d -c" },
 572   { NULL }
 573 };
 574
 575 /*
 576  * Language stuff.
 577  */
 578
 579 /* Ada code */
 580 static char *Ada_suffixes [] =
 581   { "ads", "adb", "ada", NULL };
 582 static char Ada_help [] =
 583 "In Ada code, functions, procedures, packages, tasks and types are\n\
 584 tags.  Use the `--packages-only' option to create tags for\n\
 585 packages only.\n\
 586 Ada tag names have suffixes indicating the type of entity:\n\
 587         Entity type:    Qualifier:\n\
 588         ------------    ----------\n\
 589         function        /f\n\
 590         procedure       /p\n\
 591         package spec    /s\n\
 592         package body    /b\n\
 593         type            /t\n\
 594         task            /k\n\
 595 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 596 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 597 will just search for any tag `bidule'.";
 598
 599 /* Assembly code */
 600 static char *Asm_suffixes [] =
 601   { "a",        /* Unix assembler */
 602     "asm", /* Microcontroller assembly */
 603     "def", /* BSO/Tasking definition includes  */
 604     "inc", /* Microcontroller include files */
 605     "ins", /* Microcontroller include files */
 606     "s", "sa", /* Unix assembler */
 607     "S",   /* cpp-processed Unix assembler */
 608     "src", /* BSO/Tasking C compiler output */
 609     NULL
 610   };
 611 static char Asm_help [] =
 612 "In assembler code, labels appearing at the beginning of a line,\n\
 613 followed by a colon, are tags.";
 614
 615
 616 /* Note that .c and .h can be considered C++, if the --c++ flag was
 617    given, or if the `class' or `template' keywords are met inside the file.
 618    That is why default_C_entries is called for these. */
 619 static char *default_C_suffixes [] =
 620   { "c", "h", NULL };
 621 #if CTAGS                               /* C help for Ctags */
 622 static char default_C_help [] =
 623 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 624 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 625 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 626 Use --globals to tag global variables.\n\
 627 You can tag function declarations and external variables by\n\
 628 using `--declarations', and struct members by using `--members'.";
 629 #else                                   /* C help for Etags */
 630 static char default_C_help [] =
 631 "In C code, any C function or typedef is a tag, and so are\n\
 632 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 633 definitions and `enum' constants are tags unless you specify\n\
 634 `--no-defines'.  Global variables are tags unless you specify\n\
 635 `--no-globals' and so are struct members unless you specify\n\
 636 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 637 `--no-members' can make the tags table file much smaller.\n\
 638 You can tag function declarations and external variables by\n\
 639 using `--declarations'.";
 640 #endif  /* C help for Ctags and Etags */
 641
 642 static char *Cplusplus_suffixes [] =
 643   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 644     "M",                        /* Objective C++ */
 645     "pdb",                      /* Postscript with C syntax */
 646     NULL };
 647 static char Cplusplus_help [] =
 648 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 649 --help --lang=c --lang=c++ for full help.)\n\
 650 In addition to C tags, member functions are also recognized.  Member\n\
 651 variables are recognized unless you use the `--no-members' option.\n\
 652 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 653 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 654 `operator+'.";
 655
 656 static char *Cjava_suffixes [] =
 657   { "java", NULL };
 658 static char Cjava_help [] =
 659 "In Java code, all the tags constructs of C and C++ code are\n\
 660 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 661
 662
 663 static char *Cobol_suffixes [] =
 664   { "COB", "cob", NULL };
 665 static char Cobol_help [] =
 666 "In Cobol code, tags are paragraph names; that is, any word\n\
 667 starting in column 8 and followed by a period.";
 668
 669 static char *Cstar_suffixes [] =
 670   { "cs", "hs", NULL };
 671
 672 static char *Erlang_suffixes [] =
 673   { "erl", "hrl", NULL };
 674 static char Erlang_help [] =
 675 "In Erlang code, the tags are the functions, records and macros\n\
 676 defined in the file.";
 677
 678 char *Forth_suffixes [] =
 679   { "fth", "tok", NULL };
 680 static char Forth_help [] =
 681 "In Forth code, tags are words defined by `:',\n\
 682 constant, code, create, defer, value, variable, buffer:, field.";
 683
 684 static char *Fortran_suffixes [] =
 685   { "F", "f", "f90", "for", NULL };
 686 static char Fortran_help [] =
 687 "In Fortran code, functions, subroutines and block data are tags.";
 688
 689 static char *HTML_suffixes [] =
 690   { "htm", "html", "shtml", NULL };
 691 static char HTML_help [] =
 692 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 693 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 694 occurrences of `id='.";
 695
 696 static char *Lisp_suffixes [] =
 697   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 698 static char Lisp_help [] =
 699 "In Lisp code, any function defined with `defun', any variable\n\
 700 defined with `defvar' or `defconst', and in general the first\n\
 701 argument of any expression that starts with `(def' in column zero\n\
 702 is a tag.";
 703
 704 static char *Lua_suffixes [] =
 705   { "lua", "LUA", NULL };
 706 static char Lua_help [] =
 707 "In Lua scripts, all functions are tags.";
 708
 709 static char *Makefile_filenames [] =
 710   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 711 static char Makefile_help [] =
 712 "In makefiles, targets are tags; additionally, variables are tags\n\
 713 unless you specify `--no-globals'.";
 714
 715 static char *Objc_suffixes [] =
 716   { "lm",                       /* Objective lex file */
 717     "m",                        /* Objective C file */
 718      NULL };
 719 static char Objc_help [] =
 720 "In Objective C code, tags include Objective C definitions for classes,\n\
 721 class categories, methods and protocols.  Tags for variables and\n\
 722 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 723 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 724
 725 static char *Pascal_suffixes [] =
 726   { "p", "pas", NULL };
 727 static char Pascal_help [] =
 728 "In Pascal code, the tags are the functions and procedures defined\n\
 729 in the file.";
 730 /* " // this is for working around an Emacs highlighting bug... */
 731
 732 static char *Perl_suffixes [] =
 733   { "pl", "pm", NULL };
 734 static char *Perl_interpreters [] =
 735   { "perl", "@PERL@", NULL };
 736 static char Perl_help [] =
 737 "In Perl code, the tags are the packages, subroutines and variables\n\
 738 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 739 `--globals' if you want to tag global variables.  Tags for\n\
 740 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 741 defined in the default package is `main::SUB'.";
 742
 743 static char *PHP_suffixes [] =
 744   { "php", "php3", "php4", NULL };
 745 static char PHP_help [] =
 746 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 747 the `--no-members' option, vars are tags too.";
 748
 749 static char *plain_C_suffixes [] =
 750   { "pc",                       /* Pro*C file */
 751      NULL };
 752
 753 static char *PS_suffixes [] =
 754   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 755 static char PS_help [] =
 756 "In PostScript code, the tags are the functions.";
 757
 758 static char *Prolog_suffixes [] =
 759   { "prolog", NULL };
 760 static char Prolog_help [] =
 761 "In Prolog code, tags are predicates and rules at the beginning of\n\
 762 line.";
 763
 764 static char *Python_suffixes [] =
 765   { "py", NULL };
 766 static char Python_help [] =
 767 "In Python code, `def' or `class' at the beginning of a line\n\
 768 generate a tag.";
 769
 770 /* Can't do the `SCM' or `scm' prefix with a version number. */
 771 static char *Scheme_suffixes [] =
 772   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 773 static char Scheme_help [] =
 774 "In Scheme code, tags include anything defined with `def' or with a\n\
 775 construct whose name starts with `def'.  They also include\n\
 776 variables set with `set!' at top level in the file.";
 777
 778 static char *TeX_suffixes [] =
 779   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 780 static char TeX_help [] =
 781 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 782 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 783 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 784 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 785 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 786 \n\
 787 Other commands can be specified by setting the environment variable\n\
 788 `TEXTAGS' to a colon-separated list like, for example,\n\
 789      TEXTAGS=\"mycommand:myothercommand\".";
 790
 791
 792 static char *Texinfo_suffixes [] =
 793   { "texi", "texinfo", "txi", NULL };
 794 static char Texinfo_help [] =
 795 "for texinfo files, lines starting with @node are tagged.";
 796
 797 static char *Yacc_suffixes [] =
 798   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 799 static char Yacc_help [] =
 800 "In Bison or Yacc input files, each rule defines as a tag the\n\
 801 nonterminal it constructs.  The portions of the file that contain\n\
 802 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 803 for full help).";
 804
 805 static char auto_help [] =
 806 "`auto' is not a real language, it indicates to use\n\
 807 a default language for files base on file name suffix and file contents.";
 808
 809 static char none_help [] =
 810 "`none' is not a real language, it indicates to only do\n\
 811 regexp processing on files.";
 812
 813 static char no_lang_help [] =
 814 "No detailed help available for this language.";
 815
 816
 817 /*
 818  * Table of languages.
 819  *
 820  * It is ok for a given function to be listed under more than one
 821  * name.  I just didn't.
 822  */
 823
 824 static language lang_names [] =
 825 {
 826   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 827   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 828   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 829   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 830   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 831   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 832   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 833   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 834   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 835   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 836   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 837   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 838   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 839   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 840   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 841   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 842   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 843   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 844   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 845   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 846   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 847   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 848   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 849   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 850   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 851   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 852   { "auto",      auto_help },                      /* default guessing scheme */
 853   { "none",      none_help,      just_read_file }, /* regexp matching only */
 854   { NULL }                /* end of list */
 855 };
 856
 857 \f
 858 static void
 859 print_language_names ()
 860 {
 861   language *lang;
 862   char **name, **ext;
 863
 864   puts ("\nThese are the currently supported languages, along with the\n\
 865 default file names and dot suffixes:");
 866   for (lang = lang_names; lang->name != NULL; lang++)
 867     {
 868       printf ("  %-*s", 10, lang->name);
 869       if (lang->filenames != NULL)
 870         for (name = lang->filenames; *name != NULL; name++)
 871           printf (" %s", *name);
 872       if (lang->suffixes != NULL)
 873         for (ext = lang->suffixes; *ext != NULL; ext++)
 874           printf (" .%s", *ext);
 875       puts ("");
 876     }
 877   puts ("where `auto' means use default language for files based on file\n\
 878 name suffix, and `none' means only do regexp processing on files.\n\
 879 If no language is specified and no matching suffix is found,\n\
 880 the first line of the file is read for a sharp-bang (#!) sequence\n\
 881 followed by the name of an interpreter.  If no such sequence is found,\n\
 882 Fortran is tried first; if no tags are found, C is tried next.\n\
 883 When parsing any C file, a \"class\" or \"template\" keyword\n\
 884 switches to C++.");
 885   puts ("Compressed files are supported using gzip and bzip2.\n\
 886 \n\
 887 For detailed help on a given language use, for example,\n\
 888 etags --help --lang=ada.");
 889 }
 890
 891 #ifndef EMACS_NAME
 892 # define EMACS_NAME "standalone"
 893 #endif
 894 #ifndef VERSION
 895 # define VERSION "17.38.1.3"
 896 #endif
 897 static void
 898 print_version ()
 899 {
 900   /* Makes it easier to update automatically. */
 901   char emacs_copyright[] = "Copyright (C) 2008 Free Software Foundation, Inc.";
 902
 903   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 904   puts (emacs_copyright);
 905   puts ("This program is distributed under the terms in ETAGS.README");
 906
 907   exit (EXIT_SUCCESS);
 908 }
 909
 910 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 911 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 912 #endif
 913
 914 static void
 915 print_help (argbuffer)
 916      argument *argbuffer;
 917 {
 918   bool help_for_lang = FALSE;
 919
 920   for (; argbuffer->arg_type != at_end; argbuffer++)
 921     if (argbuffer->arg_type == at_language)
 922       {
 923         if (help_for_lang)
 924           puts ("");
 925         puts (argbuffer->lang->help);
 926         help_for_lang = TRUE;
 927       }
 928
 929   if (help_for_lang)
 930     exit (EXIT_SUCCESS);
 931
 932   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 933 \n\
 934 These are the options accepted by %s.\n", progname, progname);
 935   if (NO_LONG_OPTIONS)
 936     puts ("WARNING: long option names do not work with this executable,\n\
 937 as it is not linked with GNU getopt.");
 938   else
 939     puts ("You may use unambiguous abbreviations for the long option names.");
 940   puts ("  A - as file name means read names from stdin (one per line).\n\
 941 Absolute names are stored in the output file as they are.\n\
 942 Relative ones are stored relative to the output file's directory.\n");
 943
 944   puts ("-a, --append\n\
 945         Append tag entries to existing tags file.");
 946
 947   puts ("--packages-only\n\
 948         For Ada files, only generate tags for packages.");
 949
 950   if (CTAGS)
 951     puts ("-B, --backward-search\n\
 952         Write the search commands for the tag entries using '?', the\n\
 953         backward-search command instead of '/', the forward-search command.");
 954
 955   /* This option is mostly obsolete, because etags can now automatically
 956      detect C++.  Retained for backward compatibility and for debugging and
 957      experimentation.  In principle, we could want to tag as C++ even
 958      before any "class" or "template" keyword.
 959   puts ("-C, --c++\n\
 960         Treat files whose name suffix defaults to C language as C++ files.");
 961   */
 962
 963   puts ("--declarations\n\
 964         In C and derived languages, create tags for function declarations,");
 965   if (CTAGS)
 966     puts ("\tand create tags for extern variables if --globals is used.");
 967   else
 968     puts
 969       ("\tand create tags for extern variables unless --no-globals is used.");
 970
 971   if (CTAGS)
 972     puts ("-d, --defines\n\
 973         Create tag entries for C #define constants and enum constants, too.");
 974   else
 975     puts ("-D, --no-defines\n\
 976         Don't create tag entries for C #define constants and enum constants.\n\
 977         This makes the tags file smaller.");
 978
 979   if (!CTAGS)
 980     puts ("-i FILE, --include=FILE\n\
 981         Include a note in tag file indicating that, when searching for\n\
 982         a tag, one should also consult the tags file FILE after\n\
 983         checking the current file.");
 984
 985   puts ("-l LANG, --language=LANG\n\
 986         Force the following files to be considered as written in the\n\
 987         named language up to the next --language=LANG option.");
 988
 989   if (CTAGS)
 990     puts ("--globals\n\
 991         Create tag entries for global variables in some languages.");
 992   else
 993     puts ("--no-globals\n\
 994         Do not create tag entries for global variables in some\n\
 995         languages.  This makes the tags file smaller.");
 996
 997   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 998     puts ("--no-line-directive\n\
 999         Ignore #line preprocessor directives in C and derived languages.");
1000
1001   if (CTAGS)
1002     puts ("--members\n\
1003         Create tag entries for members of structures in some languages.");
1004   else
1005     puts ("--no-members\n\
1006         Do not create tag entries for members of structures\n\
1007         in some languages.");
1008
1009   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1010         Make a tag for each line matching a regular expression pattern\n\
1011         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1012         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
1013         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1014         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
1015   puts ("       If TAGNAME/ is present, the tags created are named.\n\
1016         For example Tcl named tags can be created with:\n\
1017           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1018         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1019         `m' means to allow multi-line matches, `s' implies `m' and\n\
1020         causes dot to match any character, including newline.");
1021
1022   puts ("-R, --no-regex\n\
1023         Don't create tags from regexps for the following files.");
1024
1025   puts ("-I, --ignore-indentation\n\
1026         In C and C++ do not assume that a closing brace in the first\n\
1027         column is the final brace of a function or structure definition.");
1028
1029   puts ("-o FILE, --output=FILE\n\
1030         Write the tags to FILE.");
1031
1032   puts ("--parse-stdin=NAME\n\
1033         Read from standard input and record tags as belonging to file NAME.");
1034
1035   if (CTAGS)
1036     {
1037       puts ("-t, --typedefs\n\
1038         Generate tag entries for C and Ada typedefs.");
1039       puts ("-T, --typedefs-and-c++\n\
1040         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1041         and C++ member functions.");
1042     }
1043
1044   if (CTAGS)
1045     puts ("-u, --update\n\
1046         Update the tag entries for the given files, leaving tag\n\
1047         entries for other files in place.  Currently, this is\n\
1048         implemented by deleting the existing entries for the given\n\
1049         files and then rewriting the new entries at the end of the\n\
1050         tags file.  It is often faster to simply rebuild the entire\n\
1051         tag file than to use this.");
1052
1053   if (CTAGS)
1054     {
1055       puts ("-v, --vgrind\n\
1056         Print on the standard output an index of items intended for\n\
1057         human consumption, similar to the output of vgrind.  The index\n\
1058         is sorted, and gives the page number of each item.");
1059
1060       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1061         puts ("-w, --no-duplicates\n\
1062         Do not create duplicate tag entries, for compatibility with\n\
1063         traditional ctags.");
1064
1065       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1066         puts ("-w, --no-warn\n\
1067         Suppress warning messages about duplicate tag entries.");
1068
1069       puts ("-x, --cxref\n\
1070         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1071         The output uses line numbers instead of page numbers, but\n\
1072         beyond that the differences are cosmetic; try both to see\n\
1073         which you like.");
1074     }
1075
1076   puts ("-V, --version\n\
1077         Print the version of the program.\n\
1078 -h, --help\n\
1079         Print this help message.\n\
1080         Followed by one or more `--language' options prints detailed\n\
1081         help about tag generation for the specified languages.");
1082
1083   print_language_names ();
1084
1085   puts ("");
1086   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1087
1088   exit (EXIT_SUCCESS);
1089 }
1090
1091 \f
1092 int
1093 main (argc, argv)
1094      int argc;
1095      char *argv[];
1096 {
1097   int i;
1098   unsigned int nincluded_files;
1099   char **included_files;
1100   argument *argbuffer;
1101   int current_arg, file_count;
1102   linebuffer filename_lb;
1103   bool help_asked = FALSE;
1104  char *optstring;
1105  int opt;
1106
1107
1108 #ifdef DOS_NT
1109   _fmode = O_BINARY;   /* all of files are treated as binary files */
1110 #endif /* DOS_NT */
1111
1112   progname = argv[0];
1113   nincluded_files = 0;
1114   included_files = xnew (argc, char *);
1115   current_arg = 0;
1116   file_count = 0;
1117
1118   /* Allocate enough no matter what happens.  Overkill, but each one
1119      is small. */
1120   argbuffer = xnew (argc, argument);
1121
1122   /*
1123    * Always find typedefs and structure tags.
1124    * Also default to find macro constants, enum constants, struct
1125    * members and global variables.  Do it for both etags and ctags.
1126    */
1127   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1128   globals = members = TRUE;
1129
1130   /* When the optstring begins with a '-' getopt_long does not rearrange the
1131      non-options arguments to be at the end, but leaves them alone. */
1132   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1133                       "ac:Cf:Il:o:r:RSVhH",
1134                       (CTAGS) ? "BxdtTuvw" : "Di:");
1135
1136   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1137     switch (opt)
1138       {
1139       case 0:
1140         /* If getopt returns 0, then it has already processed a
1141            long-named option.  We should do nothing.  */
1142         break;
1143
1144       case 1:
1145         /* This means that a file name has been seen.  Record it. */
1146         argbuffer[current_arg].arg_type = at_filename;
1147         argbuffer[current_arg].what     = optarg;
1148         ++current_arg;
1149         ++file_count;
1150         break;
1151
1152       case STDIN:
1153         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1154         argbuffer[current_arg].arg_type = at_stdin;
1155         argbuffer[current_arg].what     = optarg;
1156         ++current_arg;
1157         ++file_count;
1158         if (parsing_stdin)
1159           fatal ("cannot parse standard input more than once", (char *)NULL);
1160         parsing_stdin = TRUE;
1161         break;
1162
1163         /* Common options. */
1164       case 'a': append_to_tagfile = TRUE;       break;
1165       case 'C': cplusplus = TRUE;               break;
1166       case 'f':         /* for compatibility with old makefiles */
1167       case 'o':
1168         if (tagfile)
1169           {
1170             error ("-o option may only be given once.", (char *)NULL);
1171             suggest_asking_for_help ();
1172             /* NOTREACHED */
1173           }
1174         tagfile = optarg;
1175         break;
1176       case 'I':
1177       case 'S':         /* for backward compatibility */
1178         ignoreindent = TRUE;
1179         break;
1180       case 'l':
1181         {
1182           language *lang = get_language_from_langname (optarg);
1183           if (lang != NULL)
1184             {
1185               argbuffer[current_arg].lang = lang;
1186               argbuffer[current_arg].arg_type = at_language;
1187               ++current_arg;
1188             }
1189         }
1190         break;
1191       case 'c':
1192         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1193         optarg = concat (optarg, "i", ""); /* memory leak here */
1194         /* FALLTHRU */
1195       case 'r':
1196         argbuffer[current_arg].arg_type = at_regexp;
1197         argbuffer[current_arg].what = optarg;
1198         ++current_arg;
1199         break;
1200       case 'R':
1201         argbuffer[current_arg].arg_type = at_regexp;
1202         argbuffer[current_arg].what = NULL;
1203         ++current_arg;
1204         break;
1205       case 'V':
1206         print_version ();
1207         break;
1208       case 'h':
1209       case 'H':
1210         help_asked = TRUE;
1211         break;
1212
1213         /* Etags options */
1214       case 'D': constantypedefs = FALSE;                        break;
1215       case 'i': included_files[nincluded_files++] = optarg;     break;
1216
1217         /* Ctags options. */
1218       case 'B': searchar = '?';                                 break;
1219       case 'd': constantypedefs = TRUE;                         break;
1220       case 't': typedefs = TRUE;                                break;
1221       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1222       case 'u': update = TRUE;                                  break;
1223       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1224       case 'x': cxref_style = TRUE;                             break;
1225       case 'w': no_warnings = TRUE;                             break;
1226       default:
1227         suggest_asking_for_help ();
1228         /* NOTREACHED */
1229       }
1230
1231   /* No more options.  Store the rest of arguments. */
1232   for (; optind < argc; optind++)
1233     {
1234       argbuffer[current_arg].arg_type = at_filename;
1235       argbuffer[current_arg].what = argv[optind];
1236       ++current_arg;
1237       ++file_count;
1238     }
1239
1240   argbuffer[current_arg].arg_type = at_end;
1241
1242   if (help_asked)
1243     print_help (argbuffer);
1244     /* NOTREACHED */
1245
1246   if (nincluded_files == 0 && file_count == 0)
1247     {
1248       error ("no input files specified.", (char *)NULL);
1249       suggest_asking_for_help ();
1250       /* NOTREACHED */
1251     }
1252
1253   if (tagfile == NULL)
1254     tagfile = CTAGS ? "tags" : "TAGS";
1255   cwd = etags_getcwd ();        /* the current working directory */
1256   if (cwd[strlen (cwd) - 1] != '/')
1257     {
1258       char *oldcwd = cwd;
1259       cwd = concat (oldcwd, "/", "");
1260       free (oldcwd);
1261     }
1262   /* Relative file names are made relative to the current directory. */
1263   if (streq (tagfile, "-")
1264       || strneq (tagfile, "/dev/", 5))
1265     tagfiledir = cwd;
1266   else
1267     {
1268       canonicalize_filename (tagfile);
1269       tagfiledir = absolute_dirname (tagfile, cwd);
1270     }
1271
1272   init ();                      /* set up boolean "functions" */
1273
1274   linebuffer_init (&lb);
1275   linebuffer_init (&filename_lb);
1276   linebuffer_init (&filebuf);
1277   linebuffer_init (&token_name);
1278
1279   if (!CTAGS)
1280     {
1281       if (streq (tagfile, "-"))
1282         {
1283           tagf = stdout;
1284 #ifdef DOS_NT
1285           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1286              doesn't take effect until after `stdout' is already open). */
1287           if (!isatty (fileno (stdout)))
1288             setmode (fileno (stdout), O_BINARY);
1289 #endif /* DOS_NT */
1290         }
1291       else
1292         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1293       if (tagf == NULL)
1294         pfatal (tagfile);
1295     }
1296
1297   /*
1298    * Loop through files finding functions.
1299    */
1300   for (i = 0; i < current_arg; i++)
1301     {
1302       static language *lang;    /* non-NULL if language is forced */
1303       char *this_file;
1304
1305       switch (argbuffer[i].arg_type)
1306         {
1307         case at_language:
1308           lang = argbuffer[i].lang;
1309           break;
1310         case at_regexp:
1311           analyse_regex (argbuffer[i].what);
1312           break;
1313         case at_filename:
1314               this_file = argbuffer[i].what;
1315               /* Input file named "-" means read file names from stdin
1316                  (one per line) and use them. */
1317               if (streq (this_file, "-"))
1318                 {
1319                   if (parsing_stdin)
1320                     fatal ("cannot parse standard input AND read file names from it",
1321                            (char *)NULL);
1322                   while (readline_internal (&filename_lb, stdin) > 0)
1323                     process_file_name (filename_lb.buffer, lang);
1324                 }
1325               else
1326                 process_file_name (this_file, lang);
1327           break;
1328         case at_stdin:
1329           this_file = argbuffer[i].what;
1330           process_file (stdin, this_file, lang);
1331           break;
1332         }
1333     }
1334
1335   free_regexps ();
1336   free (lb.buffer);
1337   free (filebuf.buffer);
1338   free (token_name.buffer);
1339
1340   if (!CTAGS || cxref_style)
1341     {
1342       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1343       put_entries (nodehead);
1344       free_tree (nodehead);
1345       nodehead = NULL;
1346       if (!CTAGS)
1347         {
1348           fdesc *fdp;
1349
1350           /* Output file entries that have no tags. */
1351           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1352             if (!fdp->written)
1353               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1354
1355           while (nincluded_files-- > 0)
1356             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1357
1358           if (fclose (tagf) == EOF)
1359             pfatal (tagfile);
1360         }
1361
1362       exit (EXIT_SUCCESS);
1363     }
1364
1365   /* From here on, we are in (CTAGS && !cxref_style) */
1366   if (update)
1367     {
1368       char cmd[BUFSIZ];
1369       for (i = 0; i < current_arg; ++i)
1370         {
1371           switch (argbuffer[i].arg_type)
1372             {
1373             case at_filename:
1374             case at_stdin:
1375               break;
1376             default:
1377               continue;         /* the for loop */
1378             }
1379           sprintf (cmd,
1380                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1381                    tagfile, argbuffer[i].what, tagfile);
1382           if (system (cmd) != EXIT_SUCCESS)
1383             fatal ("failed to execute shell command", (char *)NULL);
1384         }
1385       append_to_tagfile = TRUE;
1386     }
1387
1388   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1389   if (tagf == NULL)
1390     pfatal (tagfile);
1391   put_entries (nodehead);       /* write all the tags (CTAGS) */
1392   free_tree (nodehead);
1393   nodehead = NULL;
1394   if (fclose (tagf) == EOF)
1395     pfatal (tagfile);
1396
1397   if (CTAGS)
1398     if (append_to_tagfile || update)
1399       {
1400         char cmd[2*BUFSIZ+20];
1401         /* Maybe these should be used:
1402            setenv ("LC_COLLATE", "C", 1);
1403            setenv ("LC_ALL", "C", 1); */
1404         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1405         exit (system (cmd));
1406       }
1407   return EXIT_SUCCESS;
1408 }
1409
1410
1411 /*
1412  * Return a compressor given the file name.  If EXTPTR is non-zero,
1413  * return a pointer into FILE where the compressor-specific
1414  * extension begins.  If no compressor is found, NULL is returned
1415  * and EXTPTR is not significant.
1416  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1417  */
1418 static compressor *
1419 get_compressor_from_suffix (file, extptr)
1420      char *file;
1421      char **extptr;
1422 {
1423   compressor *compr;
1424   char *slash, *suffix;
1425
1426   /* File has been processed by canonicalize_filename,
1427      so we don't need to consider backslashes on DOS_NT.  */
1428   slash = etags_strrchr (file, '/');
1429   suffix = etags_strrchr (file, '.');
1430   if (suffix == NULL || suffix < slash)
1431     return NULL;
1432   if (extptr != NULL)
1433     *extptr = suffix;
1434   suffix += 1;
1435   /* Let those poor souls who live with DOS 8+3 file name limits get
1436      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1437      Only the first do loop is run if not MSDOS */
1438   do
1439     {
1440       for (compr = compressors; compr->suffix != NULL; compr++)
1441         if (streq (compr->suffix, suffix))
1442           return compr;
1443       if (!MSDOS)
1444         break;                  /* do it only once: not really a loop */
1445       if (extptr != NULL)
1446         *extptr = ++suffix;
1447     } while (*suffix != '\0');
1448   return NULL;
1449 }
1450
1451
1452
1453 /*
1454  * Return a language given the name.
1455  */
1456 static language *
1457 get_language_from_langname (name)
1458      const char *name;
1459 {
1460   language *lang;
1461
1462   if (name == NULL)
1463     error ("empty language name", (char *)NULL);
1464   else
1465     {
1466       for (lang = lang_names; lang->name != NULL; lang++)
1467         if (streq (name, lang->name))
1468           return lang;
1469       error ("unknown language \"%s\"", name);
1470     }
1471
1472   return NULL;
1473 }
1474
1475
1476 /*
1477  * Return a language given the interpreter name.
1478  */
1479 static language *
1480 get_language_from_interpreter (interpreter)
1481      char *interpreter;
1482 {
1483   language *lang;
1484   char **iname;
1485
1486   if (interpreter == NULL)
1487     return NULL;
1488   for (lang = lang_names; lang->name != NULL; lang++)
1489     if (lang->interpreters != NULL)
1490       for (iname = lang->interpreters; *iname != NULL; iname++)
1491         if (streq (*iname, interpreter))
1492             return lang;
1493
1494   return NULL;
1495 }
1496
1497
1498
1499 /*
1500  * Return a language given the file name.
1501  */
1502 static language *
1503 get_language_from_filename (file, case_sensitive)
1504      char *file;
1505      bool case_sensitive;
1506 {
1507   language *lang;
1508   char **name, **ext, *suffix;
1509
1510   /* Try whole file name first. */
1511   for (lang = lang_names; lang->name != NULL; lang++)
1512     if (lang->filenames != NULL)
1513       for (name = lang->filenames; *name != NULL; name++)
1514         if ((case_sensitive)
1515             ? streq (*name, file)
1516             : strcaseeq (*name, file))
1517           return lang;
1518
1519   /* If not found, try suffix after last dot. */
1520   suffix = etags_strrchr (file, '.');
1521   if (suffix == NULL)
1522     return NULL;
1523   suffix += 1;
1524   for (lang = lang_names; lang->name != NULL; lang++)
1525     if (lang->suffixes != NULL)
1526       for (ext = lang->suffixes; *ext != NULL; ext++)
1527         if ((case_sensitive)
1528             ? streq (*ext, suffix)
1529             : strcaseeq (*ext, suffix))
1530           return lang;
1531   return NULL;
1532 }
1533
1534 \f
1535 /*
1536  * This routine is called on each file argument.
1537  */
1538 static void
1539 process_file_name (file, lang)
1540      char *file;
1541      language *lang;
1542 {
1543   struct stat stat_buf;
1544   FILE *inf;
1545   fdesc *fdp;
1546   compressor *compr;
1547   char *compressed_name, *uncompressed_name;
1548   char *ext, *real_name;
1549   int retval;
1550
1551   canonicalize_filename (file);
1552   if (streq (file, tagfile) && !streq (tagfile, "-"))
1553     {
1554       error ("skipping inclusion of %s in self.", file);
1555       return;
1556     }
1557   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1558     {
1559       compressed_name = NULL;
1560       real_name = uncompressed_name = savestr (file);
1561     }
1562   else
1563     {
1564       real_name = compressed_name = savestr (file);
1565       uncompressed_name = savenstr (file, ext - file);
1566     }
1567
1568   /* If the canonicalized uncompressed name
1569      has already been dealt with, skip it silently. */
1570   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1571     {
1572       assert (fdp->infname != NULL);
1573       if (streq (uncompressed_name, fdp->infname))
1574         goto cleanup;
1575     }
1576
1577   if (stat (real_name, &stat_buf) != 0)
1578     {
1579       /* Reset real_name and try with a different name. */
1580       real_name = NULL;
1581       if (compressed_name != NULL) /* try with the given suffix */
1582         {
1583           if (stat (uncompressed_name, &stat_buf) == 0)
1584             real_name = uncompressed_name;
1585         }
1586       else                      /* try all possible suffixes */
1587         {
1588           for (compr = compressors; compr->suffix != NULL; compr++)
1589             {
1590               compressed_name = concat (file, ".", compr->suffix);
1591               if (stat (compressed_name, &stat_buf) != 0)
1592                 {
1593                   if (MSDOS)
1594                     {
1595                       char *suf = compressed_name + strlen (file);
1596                       size_t suflen = strlen (compr->suffix) + 1;
1597                       for ( ; suf[1]; suf++, suflen--)
1598                         {
1599                           memmove (suf, suf + 1, suflen);
1600                           if (stat (compressed_name, &stat_buf) == 0)
1601                             {
1602                               real_name = compressed_name;
1603                               break;
1604                             }
1605                         }
1606                       if (real_name != NULL)
1607                         break;
1608                     } /* MSDOS */
1609                   free (compressed_name);
1610                   compressed_name = NULL;
1611                 }
1612               else
1613                 {
1614                   real_name = compressed_name;
1615                   break;
1616                 }
1617             }
1618         }
1619       if (real_name == NULL)
1620         {
1621           perror (file);
1622           goto cleanup;
1623         }
1624     } /* try with a different name */
1625
1626   if (!S_ISREG (stat_buf.st_mode))
1627     {
1628       error ("skipping %s: it is not a regular file.", real_name);
1629       goto cleanup;
1630     }
1631   if (real_name == compressed_name)
1632     {
1633       char *cmd = concat (compr->command, " ", real_name);
1634       inf = (FILE *) popen (cmd, "r");
1635       free (cmd);
1636     }
1637   else
1638     inf = fopen (real_name, "r");
1639   if (inf == NULL)
1640     {
1641       perror (real_name);
1642       goto cleanup;
1643     }
1644
1645   process_file (inf, uncompressed_name, lang);
1646
1647   if (real_name == compressed_name)
1648     retval = pclose (inf);
1649   else
1650     retval = fclose (inf);
1651   if (retval < 0)
1652     pfatal (file);
1653
1654  cleanup:
1655   free (compressed_name);
1656   free (uncompressed_name);
1657   last_node = NULL;
1658   curfdp = NULL;
1659   return;
1660 }
1661
1662 static void
1663 process_file (fh, fn, lang)
1664      FILE *fh;
1665      char *fn;
1666      language *lang;
1667 {
1668   static const fdesc emptyfdesc;
1669   fdesc *fdp;
1670
1671   /* Create a new input file description entry. */
1672   fdp = xnew (1, fdesc);
1673   *fdp = emptyfdesc;
1674   fdp->next = fdhead;
1675   fdp->infname = savestr (fn);
1676   fdp->lang = lang;
1677   fdp->infabsname = absolute_filename (fn, cwd);
1678   fdp->infabsdir = absolute_dirname (fn, cwd);
1679   if (filename_is_absolute (fn))
1680     {
1681       /* An absolute file name.  Canonicalize it. */
1682       fdp->taggedfname = absolute_filename (fn, NULL);
1683     }
1684   else
1685     {
1686       /* A file name relative to cwd.  Make it relative
1687          to the directory of the tags file. */
1688       fdp->taggedfname = relative_filename (fn, tagfiledir);
1689     }
1690   fdp->usecharno = TRUE;        /* use char position when making tags */
1691   fdp->prop = NULL;
1692   fdp->written = FALSE;         /* not written on tags file yet */
1693
1694   fdhead = fdp;
1695   curfdp = fdhead;              /* the current file description */
1696
1697   find_entries (fh);
1698
1699   /* If not Ctags, and if this is not metasource and if it contained no #line
1700      directives, we can write the tags and free all nodes pointing to
1701      curfdp. */
1702   if (!CTAGS
1703       && curfdp->usecharno      /* no #line directives in this file */
1704       && !curfdp->lang->metasource)
1705     {
1706       node *np, *prev;
1707
1708       /* Look for the head of the sublist relative to this file.  See add_node
1709          for the structure of the node tree. */
1710       prev = NULL;
1711       for (np = nodehead; np != NULL; prev = np, np = np->left)
1712         if (np->fdp == curfdp)
1713           break;
1714
1715       /* If we generated tags for this file, write and delete them. */
1716       if (np != NULL)
1717         {
1718           /* This is the head of the last sublist, if any.  The following
1719              instructions depend on this being true. */
1720           assert (np->left == NULL);
1721
1722           assert (fdhead == curfdp);
1723           assert (last_node->fdp == curfdp);
1724           put_entries (np);     /* write tags for file curfdp->taggedfname */
1725           free_tree (np);       /* remove the written nodes */
1726           if (prev == NULL)
1727             nodehead = NULL;    /* no nodes left */
1728           else
1729             prev->left = NULL;  /* delete the pointer to the sublist */
1730         }
1731     }
1732 }
1733
1734 /*
1735  * This routine sets up the boolean pseudo-functions which work
1736  * by setting boolean flags dependent upon the corresponding character.
1737  * Every char which is NOT in that string is not a white char.  Therefore,
1738  * all of the array "_wht" is set to FALSE, and then the elements
1739  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1740  * of a char is TRUE if it is the string "white", else FALSE.
1741  */
1742 static void
1743 init ()
1744 {
1745   register char *sp;
1746   register int i;
1747
1748   for (i = 0; i < CHARS; i++)
1749     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1750   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1751   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1752   notinname('\0') = notinname('\n');
1753   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1754   begtoken('\0') = begtoken('\n');
1755   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1756   intoken('\0') = intoken('\n');
1757   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1758   endtoken('\0') = endtoken('\n');
1759 }
1760
1761 /*
1762  * This routine opens the specified file and calls the function
1763  * which finds the function and type definitions.
1764  */
1765 static void
1766 find_entries (inf)
1767      FILE *inf;
1768 {
1769   char *cp;
1770   language *lang = curfdp->lang;
1771   Lang_function *parser = NULL;
1772
1773   /* If user specified a language, use it. */
1774   if (lang != NULL && lang->function != NULL)
1775     {
1776       parser = lang->function;
1777     }
1778
1779   /* Else try to guess the language given the file name. */
1780   if (parser == NULL)
1781     {
1782       lang = get_language_from_filename (curfdp->infname, TRUE);
1783       if (lang != NULL && lang->function != NULL)
1784         {
1785           curfdp->lang = lang;
1786           parser = lang->function;
1787         }
1788     }
1789
1790   /* Else look for sharp-bang as the first two characters. */
1791   if (parser == NULL
1792       && readline_internal (&lb, inf) > 0
1793       && lb.len >= 2
1794       && lb.buffer[0] == '#'
1795       && lb.buffer[1] == '!')
1796     {
1797       char *lp;
1798
1799       /* Set lp to point at the first char after the last slash in the
1800          line or, if no slashes, at the first nonblank.  Then set cp to
1801          the first successive blank and terminate the string. */
1802       lp = etags_strrchr (lb.buffer+2, '/');
1803       if (lp != NULL)
1804         lp += 1;
1805       else
1806         lp = skip_spaces (lb.buffer + 2);
1807       cp = skip_non_spaces (lp);
1808       *cp = '\0';
1809
1810       if (strlen (lp) > 0)
1811         {
1812           lang = get_language_from_interpreter (lp);
1813           if (lang != NULL && lang->function != NULL)
1814             {
1815               curfdp->lang = lang;
1816               parser = lang->function;
1817             }
1818         }
1819     }
1820
1821   /* We rewind here, even if inf may be a pipe.  We fail if the
1822      length of the first line is longer than the pipe block size,
1823      which is unlikely. */
1824   rewind (inf);
1825
1826   /* Else try to guess the language given the case insensitive file name. */
1827   if (parser == NULL)
1828     {
1829       lang = get_language_from_filename (curfdp->infname, FALSE);
1830       if (lang != NULL && lang->function != NULL)
1831         {
1832           curfdp->lang = lang;
1833           parser = lang->function;
1834         }
1835     }
1836
1837   /* Else try Fortran or C. */
1838   if (parser == NULL)
1839     {
1840       node *old_last_node = last_node;
1841
1842       curfdp->lang = get_language_from_langname ("fortran");
1843       find_entries (inf);
1844
1845       if (old_last_node == last_node)
1846         /* No Fortran entries found.  Try C. */
1847         {
1848           /* We do not tag if rewind fails.
1849              Only the file name will be recorded in the tags file. */
1850           rewind (inf);
1851           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1852           find_entries (inf);
1853         }
1854       return;
1855     }
1856
1857   if (!no_line_directive
1858       && curfdp->lang != NULL && curfdp->lang->metasource)
1859     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1860        file, or anyway we parsed a file that is automatically generated from
1861        this one.  If this is the case, the bingo.c file contained #line
1862        directives that generated tags pointing to this file.  Let's delete
1863        them all before parsing this file, which is the real source. */
1864     {
1865       fdesc **fdpp = &fdhead;
1866       while (*fdpp != NULL)
1867         if (*fdpp != curfdp
1868             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1869           /* We found one of those!  We must delete both the file description
1870              and all tags referring to it. */
1871           {
1872             fdesc *badfdp = *fdpp;
1873
1874             /* Delete the tags referring to badfdp->taggedfname
1875                that were obtained from badfdp->infname. */
1876             invalidate_nodes (badfdp, &nodehead);
1877
1878             *fdpp = badfdp->next; /* remove the bad description from the list */
1879             free_fdesc (badfdp);
1880           }
1881         else
1882           fdpp = &(*fdpp)->next; /* advance the list pointer */
1883     }
1884
1885   assert (parser != NULL);
1886
1887   /* Generic initialisations before reading from file. */
1888   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1889
1890   /* Generic initialisations before parsing file with readline. */
1891   lineno = 0;                  /* reset global line number */
1892   charno = 0;                  /* reset global char number */
1893   linecharno = 0;              /* reset global char number of line start */
1894
1895   parser (inf);
1896
1897   regex_tag_multiline ();
1898 }
1899
1900 \f
1901 /*
1902  * Check whether an implicitly named tag should be created,
1903  * then call `pfnote'.
1904  * NAME is a string that is internally copied by this function.
1905  *
1906  * TAGS format specification
1907  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1908  * The following is explained in some more detail in etc/ETAGS.EBNF.
1909  *
1910  * make_tag creates tags with "implicit tag names" (unnamed tags)
1911  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1912  *  1. NAME does not contain any of the characters in NONAM;
1913  *  2. LINESTART contains name as either a rightmost, or rightmost but
1914  *     one character, substring;
1915  *  3. the character, if any, immediately before NAME in LINESTART must
1916  *     be a character in NONAM;
1917  *  4. the character, if any, immediately after NAME in LINESTART must
1918  *     also be a character in NONAM.
1919  *
1920  * The implementation uses the notinname() macro, which recognises the
1921  * characters stored in the string `nonam'.
1922  * etags.el needs to use the same characters that are in NONAM.
1923  */
1924 static void
1925 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1926      char *name;                /* tag name, or NULL if unnamed */
1927      int namelen;               /* tag length */
1928      bool is_func;              /* tag is a function */
1929      char *linestart;           /* start of the line where tag is */
1930      int linelen;               /* length of the line where tag is */
1931      int lno;                   /* line number */
1932      long cno;                  /* character number */
1933 {
1934   bool named = (name != NULL && namelen > 0);
1935
1936   if (!CTAGS && named)          /* maybe set named to false */
1937     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1938        such that etags.el can guess a name from it. */
1939     {
1940       int i;
1941       register char *cp = name;
1942
1943       for (i = 0; i < namelen; i++)
1944         if (notinname (*cp++))
1945           break;
1946       if (i == namelen)                         /* rule #1 */
1947         {
1948           cp = linestart + linelen - namelen;
1949           if (notinname (linestart[linelen-1]))
1950             cp -= 1;                            /* rule #4 */
1951           if (cp >= linestart                   /* rule #2 */
1952               && (cp == linestart
1953                   || notinname (cp[-1]))        /* rule #3 */
1954               && strneq (name, cp, namelen))    /* rule #2 */
1955             named = FALSE;      /* use implicit tag name */
1956         }
1957     }
1958
1959   if (named)
1960     name = savenstr (name, namelen);
1961   else
1962     name = NULL;
1963   pfnote (name, is_func, linestart, linelen, lno, cno);
1964 }
1965
1966 /* Record a tag. */
1967 static void
1968 pfnote (name, is_func, linestart, linelen, lno, cno)
1969      char *name;                /* tag name, or NULL if unnamed */
1970      bool is_func;              /* tag is a function */
1971      char *linestart;           /* start of the line where tag is */
1972      int linelen;               /* length of the line where tag is */
1973      int lno;                   /* line number */
1974      long cno;                  /* character number */
1975 {
1976   register node *np;
1977
1978   assert (name == NULL || name[0] != '\0');
1979   if (CTAGS && name == NULL)
1980     return;
1981
1982   np = xnew (1, node);
1983
1984   /* If ctags mode, change name "main" to M<thisfilename>. */
1985   if (CTAGS && !cxref_style && streq (name, "main"))
1986     {
1987       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1988       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1989       fp = etags_strrchr (np->name, '.');
1990       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1991         fp[0] = '\0';
1992     }
1993   else
1994     np->name = name;
1995   np->valid = TRUE;
1996   np->been_warned = FALSE;
1997   np->fdp = curfdp;
1998   np->is_func = is_func;
1999   np->lno = lno;
2000   if (np->fdp->usecharno)
2001     /* Our char numbers are 0-base, because of C language tradition?
2002        ctags compatibility?  old versions compatibility?   I don't know.
2003        Anyway, since emacs's are 1-base we expect etags.el to take care
2004        of the difference.  If we wanted to have 1-based numbers, we would
2005        uncomment the +1 below. */
2006     np->cno = cno /* + 1 */ ;
2007   else
2008     np->cno = invalidcharno;
2009   np->left = np->right = NULL;
2010   if (CTAGS && !cxref_style)
2011     {
2012       if (strlen (linestart) < 50)
2013         np->regex = concat (linestart, "$", "");
2014       else
2015         np->regex = savenstr (linestart, 50);
2016     }
2017   else
2018     np->regex = savenstr (linestart, linelen);
2019
2020   add_node (np, &nodehead);
2021 }
2022
2023 /*
2024  * free_tree ()
2025  *      recurse on left children, iterate on right children.
2026  */
2027 static void
2028 free_tree (np)
2029      register node *np;
2030 {
2031   while (np)
2032     {
2033       register node *node_right = np->right;
2034       free_tree (np->left);
2035       free (np->name);
2036       free (np->regex);
2037       free (np);
2038       np = node_right;
2039     }
2040 }
2041
2042 /*
2043  * free_fdesc ()
2044  *      delete a file description
2045  */
2046 static void
2047 free_fdesc (fdp)
2048      register fdesc *fdp;
2049 {
2050   free (fdp->infname);
2051   free (fdp->infabsname);
2052   free (fdp->infabsdir);
2053   free (fdp->taggedfname);
2054   free (fdp->prop);
2055   free (fdp);
2056 }
2057
2058 /*
2059  * add_node ()
2060  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2061  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2062  *      balancing.
2063  *
2064  *      add_node is the only function allowed to add nodes, so it can
2065  *      maintain state.
2066  */
2067 static void
2068 add_node (np, cur_node_p)
2069      node *np, **cur_node_p;
2070 {
2071   register int dif;
2072   register node *cur_node = *cur_node_p;
2073
2074   if (cur_node == NULL)
2075     {
2076       *cur_node_p = np;
2077       last_node = np;
2078       return;
2079     }
2080
2081   if (!CTAGS)
2082     /* Etags Mode */
2083     {
2084       /* For each file name, tags are in a linked sublist on the right
2085          pointer.  The first tags of different files are a linked list
2086          on the left pointer.  last_node points to the end of the last
2087          used sublist. */
2088       if (last_node != NULL && last_node->fdp == np->fdp)
2089         {
2090           /* Let's use the same sublist as the last added node. */
2091           assert (last_node->right == NULL);
2092           last_node->right = np;
2093           last_node = np;
2094         }
2095       else if (cur_node->fdp == np->fdp)
2096         {
2097           /* Scanning the list we found the head of a sublist which is
2098              good for us.  Let's scan this sublist. */
2099           add_node (np, &cur_node->right);
2100         }
2101       else
2102         /* The head of this sublist is not good for us.  Let's try the
2103            next one. */
2104         add_node (np, &cur_node->left);
2105     } /* if ETAGS mode */
2106
2107   else
2108     {
2109       /* Ctags Mode */
2110       dif = strcmp (np->name, cur_node->name);
2111
2112       /*
2113        * If this tag name matches an existing one, then
2114        * do not add the node, but maybe print a warning.
2115        */
2116       if (no_duplicates && !dif)
2117         {
2118           if (np->fdp == cur_node->fdp)
2119             {
2120               if (!no_warnings)
2121                 {
2122                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2123                            np->fdp->infname, lineno, np->name);
2124                   fprintf (stderr, "Second entry ignored\n");
2125                 }
2126             }
2127           else if (!cur_node->been_warned && !no_warnings)
2128             {
2129               fprintf
2130                 (stderr,
2131                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2132                  np->fdp->infname, cur_node->fdp->infname, np->name);
2133               cur_node->been_warned = TRUE;
2134             }
2135           return;
2136         }
2137
2138       /* Actually add the node */
2139       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2140     } /* if CTAGS mode */
2141 }
2142
2143 /*
2144  * invalidate_nodes ()
2145  *      Scan the node tree and invalidate all nodes pointing to the
2146  *      given file description (CTAGS case) or free them (ETAGS case).
2147  */
2148 static void
2149 invalidate_nodes (badfdp, npp)
2150      fdesc *badfdp;
2151      node **npp;
2152 {
2153   node *np = *npp;
2154
2155   if (np == NULL)
2156     return;
2157
2158   if (CTAGS)
2159     {
2160       if (np->left != NULL)
2161         invalidate_nodes (badfdp, &np->left);
2162       if (np->fdp == badfdp)
2163         np->valid = FALSE;
2164       if (np->right != NULL)
2165         invalidate_nodes (badfdp, &np->right);
2166     }
2167   else
2168     {
2169       assert (np->fdp != NULL);
2170       if (np->fdp == badfdp)
2171         {
2172           *npp = np->left;      /* detach the sublist from the list */
2173           np->left = NULL;      /* isolate it */
2174           free_tree (np);       /* free it */
2175           invalidate_nodes (badfdp, npp);
2176         }
2177       else
2178         invalidate_nodes (badfdp, &np->left);
2179     }
2180 }
2181
2182 \f
2183 static int total_size_of_entries __P((node *));
2184 static int number_len __P((long));
2185
2186 /* Length of a non-negative number's decimal representation. */
2187 static int
2188 number_len (num)
2189      long num;
2190 {
2191   int len = 1;
2192   while ((num /= 10) > 0)
2193     len += 1;
2194   return len;
2195 }
2196
2197 /*
2198  * Return total number of characters that put_entries will output for
2199  * the nodes in the linked list at the right of the specified node.
2200  * This count is irrelevant with etags.el since emacs 19.34 at least,
2201  * but is still supplied for backward compatibility.
2202  */
2203 static int
2204 total_size_of_entries (np)
2205      register node *np;
2206 {
2207   register int total = 0;
2208
2209   for (; np != NULL; np = np->right)
2210     if (np->valid)
2211       {
2212         total += strlen (np->regex) + 1;                /* pat\177 */
2213         if (np->name != NULL)
2214           total += strlen (np->name) + 1;               /* name\001 */
2215         total += number_len ((long) np->lno) + 1;       /* lno, */
2216         if (np->cno != invalidcharno)                   /* cno */
2217           total += number_len (np->cno);
2218         total += 1;                                     /* newline */
2219       }
2220
2221   return total;
2222 }
2223
2224 static void
2225 put_entries (np)
2226      register node *np;
2227 {
2228   register char *sp;
2229   static fdesc *fdp = NULL;
2230
2231   if (np == NULL)
2232     return;
2233
2234   /* Output subentries that precede this one */
2235   if (CTAGS)
2236     put_entries (np->left);
2237
2238   /* Output this entry */
2239   if (np->valid)
2240     {
2241       if (!CTAGS)
2242         {
2243           /* Etags mode */
2244           if (fdp != np->fdp)
2245             {
2246               fdp = np->fdp;
2247               fprintf (tagf, "\f\n%s,%d\n",
2248                        fdp->taggedfname, total_size_of_entries (np));
2249               fdp->written = TRUE;
2250             }
2251           fputs (np->regex, tagf);
2252           fputc ('\177', tagf);
2253           if (np->name != NULL)
2254             {
2255               fputs (np->name, tagf);
2256               fputc ('\001', tagf);
2257             }
2258           fprintf (tagf, "%d,", np->lno);
2259           if (np->cno != invalidcharno)
2260             fprintf (tagf, "%ld", np->cno);
2261           fputs ("\n", tagf);
2262         }
2263       else
2264         {
2265           /* Ctags mode */
2266           if (np->name == NULL)
2267             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2268
2269           if (cxref_style)
2270             {
2271               if (vgrind_style)
2272                 fprintf (stdout, "%s %s %d\n",
2273                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2274               else
2275                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2276                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2277             }
2278           else
2279             {
2280               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2281
2282               if (np->is_func)
2283                 {               /* function or #define macro with args */
2284                   putc (searchar, tagf);
2285                   putc ('^', tagf);
2286
2287                   for (sp = np->regex; *sp; sp++)
2288                     {
2289                       if (*sp == '\\' || *sp == searchar)
2290                         putc ('\\', tagf);
2291                       putc (*sp, tagf);
2292                     }
2293                   putc (searchar, tagf);
2294                 }
2295               else
2296                 {               /* anything else; text pattern inadequate */
2297                   fprintf (tagf, "%d", np->lno);
2298                 }
2299               putc ('\n', tagf);
2300             }
2301         }
2302     } /* if this node contains a valid tag */
2303
2304   /* Output subentries that follow this one */
2305   put_entries (np->right);
2306   if (!CTAGS)
2307     put_entries (np->left);
2308 }
2309
2310 \f
2311 /* C extensions. */
2312 #define C_EXT   0x00fff         /* C extensions */
2313 #define C_PLAIN 0x00000         /* C */
2314 #define C_PLPL  0x00001         /* C++ */
2315 #define C_STAR  0x00003         /* C* */
2316 #define C_JAVA  0x00005         /* JAVA */
2317 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2318 #define YACC    0x10000         /* yacc file */
2319
2320 /*
2321  * The C symbol tables.
2322  */
2323 enum sym_type
2324 {
2325   st_none,
2326   st_C_objprot, st_C_objimpl, st_C_objend,
2327   st_C_gnumacro,
2328   st_C_ignore, st_C_attribute,
2329   st_C_javastruct,
2330   st_C_operator,
2331   st_C_class, st_C_template,
2332   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2333 };
2334
2335 static unsigned int hash __P((const char *, unsigned int));
2336 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2337 static enum sym_type C_symtype __P((char *, int, int));
2338
2339 /* Feed stuff between (but not including) %[ and %] lines to:
2340      gperf -m 5
2341 %[
2342 %compare-strncmp
2343 %enum
2344 %struct-type
2345 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2346 %%
2347 if,             0,                      st_C_ignore
2348 for,            0,                      st_C_ignore
2349 while,          0,                      st_C_ignore
2350 switch,         0,                      st_C_ignore
2351 return,         0,                      st_C_ignore
2352 __attribute__,  0,                      st_C_attribute
2353 GTY,            0,                      st_C_attribute
2354 @interface,     0,                      st_C_objprot
2355 @protocol,      0,                      st_C_objprot
2356 @implementation,0,                      st_C_objimpl
2357 @end,           0,                      st_C_objend
2358 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2359 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2360 friend,         C_PLPL,                 st_C_ignore
2361 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2362 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2363 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2364 class,          0,                      st_C_class
2365 namespace,      C_PLPL,                 st_C_struct
2366 domain,         C_STAR,                 st_C_struct
2367 union,          0,                      st_C_struct
2368 struct,         0,                      st_C_struct
2369 extern,         0,                      st_C_extern
2370 enum,           0,                      st_C_enum
2371 typedef,        0,                      st_C_typedef
2372 define,         0,                      st_C_define
2373 undef,          0,                      st_C_define
2374 operator,       C_PLPL,                 st_C_operator
2375 template,       0,                      st_C_template
2376 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2377 DEFUN,          0,                      st_C_gnumacro
2378 SYSCALL,        0,                      st_C_gnumacro
2379 ENTRY,          0,                      st_C_gnumacro
2380 PSEUDO,         0,                      st_C_gnumacro
2381 # These are defined inside C functions, so currently they are not met.
2382 # EXFUN used in glibc, DEFVAR_* in emacs.
2383 #EXFUN,         0,                      st_C_gnumacro
2384 #DEFVAR_,       0,                      st_C_gnumacro
2385 %]
2386 and replace lines between %< and %> with its output, then:
2387  - remove the #if characterset check
2388  - make in_word_set static and not inline. */
2389 /*%<*/
2390 /* C code produced by gperf version 3.0.1 */
2391 /* Command-line: gperf -m 5  */
2392 /* Computed positions: -k'2-3' */
2393
2394 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2395 /* maximum key range = 33, duplicates = 0 */
2396
2397 #ifdef __GNUC__
2398 __inline
2399 #else
2400 #ifdef __cplusplus
2401 inline
2402 #endif
2403 #endif
2404 static unsigned int
2405 hash (str, len)
2406      register const char *str;
2407      register unsigned int len;
2408 {
2409   static unsigned char asso_values[] =
2410     {
2411       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2412       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2413       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2414       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2415       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2416       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2417       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2418       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2419       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2420       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2421       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2422        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2423        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2424       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2425       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2426       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2427       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2428       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2429       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2430       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2431       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2432       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2433       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2434       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2435       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2436       35, 35, 35, 35, 35, 35
2437     };
2438   register int hval = len;
2439
2440   switch (hval)
2441     {
2442       default:
2443         hval += asso_values[(unsigned char)str[2]];
2444       /*FALLTHROUGH*/
2445       case 2:
2446         hval += asso_values[(unsigned char)str[1]];
2447         break;
2448     }
2449   return hval;
2450 }
2451
2452 static struct C_stab_entry *
2453 in_word_set (str, len)
2454      register const char *str;
2455      register unsigned int len;
2456 {
2457   enum
2458     {
2459       TOTAL_KEYWORDS = 33,
2460       MIN_WORD_LENGTH = 2,
2461       MAX_WORD_LENGTH = 15,
2462       MIN_HASH_VALUE = 2,
2463       MAX_HASH_VALUE = 34
2464     };
2465
2466   static struct C_stab_entry wordlist[] =
2467     {
2468       {""}, {""},
2469       {"if",            0,                      st_C_ignore},
2470       {"GTY",           0,                      st_C_attribute},
2471       {"@end",          0,                      st_C_objend},
2472       {"union",         0,                      st_C_struct},
2473       {"define",                0,                      st_C_define},
2474       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2475       {"template",      0,                      st_C_template},
2476       {"operator",      C_PLPL,                 st_C_operator},
2477       {"@interface",    0,                      st_C_objprot},
2478       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2479       {"friend",                C_PLPL,                 st_C_ignore},
2480       {"typedef",       0,                      st_C_typedef},
2481       {"return",                0,                      st_C_ignore},
2482       {"@implementation",0,                     st_C_objimpl},
2483       {"@protocol",     0,                      st_C_objprot},
2484       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2485       {"extern",                0,                      st_C_extern},
2486       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2487       {"struct",                0,                      st_C_struct},
2488       {"domain",                C_STAR,                 st_C_struct},
2489       {"switch",                0,                      st_C_ignore},
2490       {"enum",          0,                      st_C_enum},
2491       {"for",           0,                      st_C_ignore},
2492       {"namespace",     C_PLPL,                 st_C_struct},
2493       {"class",         0,                      st_C_class},
2494       {"while",         0,                      st_C_ignore},
2495       {"undef",         0,                      st_C_define},
2496       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2497       {"__attribute__", 0,                      st_C_attribute},
2498       {"SYSCALL",       0,                      st_C_gnumacro},
2499       {"ENTRY",         0,                      st_C_gnumacro},
2500       {"PSEUDO",                0,                      st_C_gnumacro},
2501       {"DEFUN",         0,                      st_C_gnumacro}
2502     };
2503
2504   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2505     {
2506       register int key = hash (str, len);
2507
2508       if (key <= MAX_HASH_VALUE && key >= 0)
2509         {
2510           register const char *s = wordlist[key].name;
2511
2512           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2513             return &wordlist[key];
2514         }
2515     }
2516   return 0;
2517 }
2518 /*%>*/
2519
2520 static enum sym_type
2521 C_symtype (str, len, c_ext)
2522      char *str;
2523      int len;
2524      int c_ext;
2525 {
2526   register struct C_stab_entry *se = in_word_set (str, len);
2527
2528   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2529     return st_none;
2530   return se->type;
2531 }
2532
2533 \f
2534 /*
2535  * Ignoring __attribute__ ((list))
2536  */
2537 static bool inattribute;        /* looking at an __attribute__ construct */
2538
2539 /*
2540  * C functions and variables are recognized using a simple
2541  * finite automaton.  fvdef is its state variable.
2542  */
2543 static enum
2544 {
2545   fvnone,                       /* nothing seen */
2546   fdefunkey,                    /* Emacs DEFUN keyword seen */
2547   fdefunname,                   /* Emacs DEFUN name seen */
2548   foperator,                    /* func: operator keyword seen (cplpl) */
2549   fvnameseen,                   /* function or variable name seen */
2550   fstartlist,                   /* func: just after open parenthesis */
2551   finlist,                      /* func: in parameter list */
2552   flistseen,                    /* func: after parameter list */
2553   fignore,                      /* func: before open brace */
2554   vignore                       /* var-like: ignore until ';' */
2555 } fvdef;
2556
2557 static bool fvextern;           /* func or var: extern keyword seen; */
2558
2559 /*
2560  * typedefs are recognized using a simple finite automaton.
2561  * typdef is its state variable.
2562  */
2563 static enum
2564 {
2565   tnone,                        /* nothing seen */
2566   tkeyseen,                     /* typedef keyword seen */
2567   ttypeseen,                    /* defined type seen */
2568   tinbody,                      /* inside typedef body */
2569   tend,                         /* just before typedef tag */
2570   tignore                       /* junk after typedef tag */
2571 } typdef;
2572
2573 /*
2574  * struct-like structures (enum, struct and union) are recognized
2575  * using another simple finite automaton.  `structdef' is its state
2576  * variable.
2577  */
2578 static enum
2579 {
2580   snone,                        /* nothing seen yet,
2581                                    or in struct body if bracelev > 0 */
2582   skeyseen,                     /* struct-like keyword seen */
2583   stagseen,                     /* struct-like tag seen */
2584   scolonseen                    /* colon seen after struct-like tag */
2585 } structdef;
2586
2587 /*
2588  * When objdef is different from onone, objtag is the name of the class.
2589  */
2590 static char *objtag = "<uninited>";
2591
2592 /*
2593  * Yet another little state machine to deal with preprocessor lines.
2594  */
2595 static enum
2596 {
2597   dnone,                        /* nothing seen */
2598   dsharpseen,                   /* '#' seen as first char on line */
2599   ddefineseen,                  /* '#' and 'define' seen */
2600   dignorerest                   /* ignore rest of line */
2601 } definedef;
2602
2603 /*
2604  * State machine for Objective C protocols and implementations.
2605  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2606  */
2607 static enum
2608 {
2609   onone,                        /* nothing seen */
2610   oprotocol,                    /* @interface or @protocol seen */
2611   oimplementation,              /* @implementations seen */
2612   otagseen,                     /* class name seen */
2613   oparenseen,                   /* parenthesis before category seen */
2614   ocatseen,                     /* category name seen */
2615   oinbody,                      /* in @implementation body */
2616   omethodsign,                  /* in @implementation body, after +/- */
2617   omethodtag,                   /* after method name */
2618   omethodcolon,                 /* after method colon */
2619   omethodparm,                  /* after method parameter */
2620   oignore                       /* wait for @end */
2621 } objdef;
2622
2623
2624 /*
2625  * Use this structure to keep info about the token read, and how it
2626  * should be tagged.  Used by the make_C_tag function to build a tag.
2627  */
2628 static struct tok
2629 {
2630   char *line;                   /* string containing the token */
2631   int offset;                   /* where the token starts in LINE */
2632   int length;                   /* token length */
2633   /*
2634     The previous members can be used to pass strings around for generic
2635     purposes.  The following ones specifically refer to creating tags.  In this
2636     case the token contained here is the pattern that will be used to create a
2637     tag.
2638   */
2639   bool valid;                   /* do not create a tag; the token should be
2640                                    invalidated whenever a state machine is
2641                                    reset prematurely */
2642   bool named;                   /* create a named tag */
2643   int lineno;                   /* source line number of tag */
2644   long linepos;                 /* source char number of tag */
2645 } token;                        /* latest token read */
2646
2647 /*
2648  * Variables and functions for dealing with nested structures.
2649  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2650  */
2651 static void pushclass_above __P((int, char *, int));
2652 static void popclass_above __P((int));
2653 static void write_classname __P((linebuffer *, char *qualifier));
2654
2655 static struct {
2656   char **cname;                 /* nested class names */
2657   int *bracelev;                /* nested class brace level */
2658   int nl;                       /* class nesting level (elements used) */
2659   int size;                     /* length of the array */
2660 } cstack;                       /* stack for nested declaration tags */
2661 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2662 #define nestlev         (cstack.nl)
2663 /* After struct keyword or in struct body, not inside a nested function. */
2664 #define instruct        (structdef == snone && nestlev > 0                      \
2665                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2666
2667 static void
2668 pushclass_above (bracelev, str, len)
2669      int bracelev;
2670      char *str;
2671      int len;
2672 {
2673   int nl;
2674
2675   popclass_above (bracelev);
2676   nl = cstack.nl;
2677   if (nl >= cstack.size)
2678     {
2679       int size = cstack.size *= 2;
2680       xrnew (cstack.cname, size, char *);
2681       xrnew (cstack.bracelev, size, int);
2682     }
2683   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2684   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2685   cstack.bracelev[nl] = bracelev;
2686   cstack.nl = nl + 1;
2687 }
2688
2689 static void
2690 popclass_above (bracelev)
2691      int bracelev;
2692 {
2693   int nl;
2694
2695   for (nl = cstack.nl - 1;
2696        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2697        nl--)
2698     {
2699       free (cstack.cname[nl]);
2700       cstack.nl = nl;
2701     }
2702 }
2703
2704 static void
2705 write_classname (cn, qualifier)
2706      linebuffer *cn;
2707      char *qualifier;
2708 {
2709   int i, len;
2710   int qlen = strlen (qualifier);
2711
2712   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2713     {
2714       len = 0;
2715       cn->len = 0;
2716       cn->buffer[0] = '\0';
2717     }
2718   else
2719     {
2720       len = strlen (cstack.cname[0]);
2721       linebuffer_setlen (cn, len);
2722       strcpy (cn->buffer, cstack.cname[0]);
2723     }
2724   for (i = 1; i < cstack.nl; i++)
2725     {
2726       char *s;
2727       int slen;
2728
2729       s = cstack.cname[i];
2730       if (s == NULL)
2731         continue;
2732       slen = strlen (s);
2733       len += slen + qlen;
2734       linebuffer_setlen (cn, len);
2735       strncat (cn->buffer, qualifier, qlen);
2736       strncat (cn->buffer, s, slen);
2737     }
2738 }
2739
2740 \f
2741 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2742 static void make_C_tag __P((bool));
2743
2744 /*
2745  * consider_token ()
2746  *      checks to see if the current token is at the start of a
2747  *      function or variable, or corresponds to a typedef, or
2748  *      is a struct/union/enum tag, or #define, or an enum constant.
2749  *
2750  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2751  *      with args.  C_EXTP points to which language we are looking at.
2752  *
2753  * Globals
2754  *      fvdef                   IN OUT
2755  *      structdef               IN OUT
2756  *      definedef               IN OUT
2757  *      typdef                  IN OUT
2758  *      objdef                  IN OUT
2759  */
2760
2761 static bool
2762 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2763      register char *str;        /* IN: token pointer */
2764      register int len;          /* IN: token length */
2765      register int c;            /* IN: first char after the token */
2766      int *c_extp;               /* IN, OUT: C extensions mask */
2767      int bracelev;              /* IN: brace level */
2768      int parlev;                /* IN: parenthesis level */
2769      bool *is_func_or_var;      /* OUT: function or variable found */
2770 {
2771   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2772      structtype is the type of the preceding struct-like keyword, and
2773      structbracelev is the brace level where it has been seen. */
2774   static enum sym_type structtype;
2775   static int structbracelev;
2776   static enum sym_type toktype;
2777
2778
2779   toktype = C_symtype (str, len, *c_extp);
2780
2781   /*
2782    * Skip __attribute__
2783    */
2784   if (toktype == st_C_attribute)
2785     {
2786       inattribute = TRUE;
2787       return FALSE;
2788      }
2789
2790    /*
2791     * Advance the definedef state machine.
2792     */
2793    switch (definedef)
2794      {
2795      case dnone:
2796        /* We're not on a preprocessor line. */
2797        if (toktype == st_C_gnumacro)
2798          {
2799            fvdef = fdefunkey;
2800            return FALSE;
2801          }
2802        break;
2803      case dsharpseen:
2804        if (toktype == st_C_define)
2805          {
2806            definedef = ddefineseen;
2807          }
2808        else
2809          {
2810            definedef = dignorerest;
2811          }
2812        return FALSE;
2813      case ddefineseen:
2814        /*
2815         * Make a tag for any macro, unless it is a constant
2816         * and constantypedefs is FALSE.
2817         */
2818        definedef = dignorerest;
2819        *is_func_or_var = (c == '(');
2820        if (!*is_func_or_var && !constantypedefs)
2821          return FALSE;
2822        else
2823          return TRUE;
2824      case dignorerest:
2825        return FALSE;
2826      default:
2827        error ("internal error: definedef value.", (char *)NULL);
2828      }
2829
2830    /*
2831     * Now typedefs
2832     */
2833    switch (typdef)
2834      {
2835      case tnone:
2836        if (toktype == st_C_typedef)
2837          {
2838            if (typedefs)
2839              typdef = tkeyseen;
2840            fvextern = FALSE;
2841            fvdef = fvnone;
2842            return FALSE;
2843          }
2844        break;
2845      case tkeyseen:
2846        switch (toktype)
2847          {
2848          case st_none:
2849          case st_C_class:
2850          case st_C_struct:
2851          case st_C_enum:
2852            typdef = ttypeseen;
2853          }
2854        break;
2855      case ttypeseen:
2856        if (structdef == snone && fvdef == fvnone)
2857          {
2858            fvdef = fvnameseen;
2859            return TRUE;
2860          }
2861        break;
2862      case tend:
2863        switch (toktype)
2864          {
2865          case st_C_class:
2866          case st_C_struct:
2867          case st_C_enum:
2868            return FALSE;
2869          }
2870        return TRUE;
2871      }
2872
2873    switch (toktype)
2874      {
2875      case st_C_javastruct:
2876        if (structdef == stagseen)
2877          structdef = scolonseen;
2878        return FALSE;
2879      case st_C_template:
2880      case st_C_class:
2881        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2882            && bracelev == 0
2883            && definedef == dnone && structdef == snone
2884            && typdef == tnone && fvdef == fvnone)
2885          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2886        if (toktype == st_C_template)
2887          break;
2888        /* FALLTHRU */
2889      case st_C_struct:
2890      case st_C_enum:
2891        if (parlev == 0
2892            && fvdef != vignore
2893            && (typdef == tkeyseen
2894                || (typedefs_or_cplusplus && structdef == snone)))
2895          {
2896            structdef = skeyseen;
2897            structtype = toktype;
2898            structbracelev = bracelev;
2899            if (fvdef == fvnameseen)
2900              fvdef = fvnone;
2901          }
2902        return FALSE;
2903      }
2904
2905    if (structdef == skeyseen)
2906      {
2907        structdef = stagseen;
2908        return TRUE;
2909      }
2910
2911    if (typdef != tnone)
2912      definedef = dnone;
2913
2914    /* Detect Objective C constructs. */
2915    switch (objdef)
2916      {
2917      case onone:
2918        switch (toktype)
2919          {
2920          case st_C_objprot:
2921            objdef = oprotocol;
2922            return FALSE;
2923          case st_C_objimpl:
2924            objdef = oimplementation;
2925            return FALSE;
2926          }
2927        break;
2928      case oimplementation:
2929        /* Save the class tag for functions or variables defined inside. */
2930        objtag = savenstr (str, len);
2931        objdef = oinbody;
2932        return FALSE;
2933      case oprotocol:
2934        /* Save the class tag for categories. */
2935        objtag = savenstr (str, len);
2936        objdef = otagseen;
2937        *is_func_or_var = TRUE;
2938        return TRUE;
2939      case oparenseen:
2940        objdef = ocatseen;
2941        *is_func_or_var = TRUE;
2942        return TRUE;
2943      case oinbody:
2944        break;
2945      case omethodsign:
2946        if (parlev == 0)
2947          {
2948            fvdef = fvnone;
2949            objdef = omethodtag;
2950            linebuffer_setlen (&token_name, len);
2951            strncpy (token_name.buffer, str, len);
2952            token_name.buffer[len] = '\0';
2953            return TRUE;
2954          }
2955        return FALSE;
2956      case omethodcolon:
2957        if (parlev == 0)
2958          objdef = omethodparm;
2959        return FALSE;
2960      case omethodparm:
2961        if (parlev == 0)
2962          {
2963            fvdef = fvnone;
2964            objdef = omethodtag;
2965            linebuffer_setlen (&token_name, token_name.len + len);
2966            strncat (token_name.buffer, str, len);
2967            return TRUE;
2968          }
2969        return FALSE;
2970      case oignore:
2971        if (toktype == st_C_objend)
2972          {
2973            /* Memory leakage here: the string pointed by objtag is
2974               never released, because many tests would be needed to
2975               avoid breaking on incorrect input code.  The amount of
2976               memory leaked here is the sum of the lengths of the
2977               class tags.
2978            free (objtag); */
2979            objdef = onone;
2980          }
2981        return FALSE;
2982      }
2983
2984    /* A function, variable or enum constant? */
2985    switch (toktype)
2986      {
2987      case st_C_extern:
2988        fvextern = TRUE;
2989        switch  (fvdef)
2990          {
2991          case finlist:
2992          case flistseen:
2993          case fignore:
2994          case vignore:
2995            break;
2996          default:
2997            fvdef = fvnone;
2998          }
2999        return FALSE;
3000      case st_C_ignore:
3001        fvextern = FALSE;
3002        fvdef = vignore;
3003        return FALSE;
3004      case st_C_operator:
3005        fvdef = foperator;
3006        *is_func_or_var = TRUE;
3007        return TRUE;
3008      case st_none:
3009        if (constantypedefs
3010            && structdef == snone
3011            && structtype == st_C_enum && bracelev > structbracelev)
3012          return TRUE;           /* enum constant */
3013        switch (fvdef)
3014          {
3015          case fdefunkey:
3016            if (bracelev > 0)
3017              break;
3018            fvdef = fdefunname;  /* GNU macro */
3019            *is_func_or_var = TRUE;
3020            return TRUE;
3021          case fvnone:
3022            switch (typdef)
3023              {
3024              case ttypeseen:
3025                return FALSE;
3026              case tnone:
3027                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3028                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3029                  {
3030                    fvdef = vignore;
3031                    return FALSE;
3032                  }
3033                break;
3034              }
3035           /* FALLTHRU */
3036           case fvnameseen:
3037           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3038             {
3039               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3040                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3041               fvdef = foperator;
3042               *is_func_or_var = TRUE;
3043               return TRUE;
3044             }
3045           if (bracelev > 0 && !instruct)
3046             break;
3047           fvdef = fvnameseen;   /* function or variable */
3048           *is_func_or_var = TRUE;
3049           return TRUE;
3050         }
3051       break;
3052     }
3053
3054   return FALSE;
3055 }
3056
3057 \f
3058 /*
3059  * C_entries often keeps pointers to tokens or lines which are older than
3060  * the line currently read.  By keeping two line buffers, and switching
3061  * them at end of line, it is possible to use those pointers.
3062  */
3063 static struct
3064 {
3065   long linepos;
3066   linebuffer lb;
3067 } lbs[2];
3068
3069 #define current_lb_is_new (newndx == curndx)
3070 #define switch_line_buffers() (curndx = 1 - curndx)
3071
3072 #define curlb (lbs[curndx].lb)
3073 #define newlb (lbs[newndx].lb)
3074 #define curlinepos (lbs[curndx].linepos)
3075 #define newlinepos (lbs[newndx].linepos)
3076
3077 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3078 #define cplpl (c_ext & C_PLPL)
3079 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3080
3081 #define CNL_SAVE_DEFINEDEF()                                            \
3082 do {                                                                    \
3083   curlinepos = charno;                                                  \
3084   readline (&curlb, inf);                                               \
3085   lp = curlb.buffer;                                                    \
3086   quotednl = FALSE;                                                     \
3087   newndx = curndx;                                                      \
3088 } while (0)
3089
3090 #define CNL()                                                           \
3091 do {                                                                    \
3092   CNL_SAVE_DEFINEDEF();                                                 \
3093   if (savetoken.valid)                                                  \
3094     {                                                                   \
3095       token = savetoken;                                                \
3096       savetoken.valid = FALSE;                                          \
3097     }                                                                   \
3098   definedef = dnone;                                                    \
3099 } while (0)
3100
3101
3102 static void
3103 make_C_tag (isfun)
3104      bool isfun;
3105 {
3106   /* This function is never called when token.valid is FALSE, but
3107      we must protect against invalid input or internal errors. */
3108   if (token.valid)
3109     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3110               token.offset+token.length+1, token.lineno, token.linepos);
3111   else if (DEBUG)
3112     {                             /* this branch is optimised away if !DEBUG */
3113       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3114                 token_name.len + 17, isfun, token.line,
3115                 token.offset+token.length+1, token.lineno, token.linepos);
3116       error ("INVALID TOKEN", NULL);
3117     }
3118
3119   token.valid = FALSE;
3120 }
3121
3122
3123 /*
3124  * C_entries ()
3125  *      This routine finds functions, variables, typedefs,
3126  *      #define's, enum constants and struct/union/enum definitions in
3127  *      C syntax and adds them to the list.
3128  */
3129 static void
3130 C_entries (c_ext, inf)
3131      int c_ext;                 /* extension of C */
3132      FILE *inf;                 /* input file */
3133 {
3134   register char c;              /* latest char read; '\0' for end of line */
3135   register char *lp;            /* pointer one beyond the character `c' */
3136   int curndx, newndx;           /* indices for current and new lb */
3137   register int tokoff;          /* offset in line of start of current token */
3138   register int toklen;          /* length of current token */
3139   char *qualifier;              /* string used to qualify names */
3140   int qlen;                     /* length of qualifier */
3141   int bracelev;                 /* current brace level */
3142   int bracketlev;               /* current bracket level */
3143   int parlev;                   /* current parenthesis level */
3144   int attrparlev;               /* __attribute__ parenthesis level */
3145   int templatelev;              /* current template level */
3146   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3147   bool incomm, inquote, inchar, quotednl, midtoken;
3148   bool yacc_rules;              /* in the rules part of a yacc file */
3149   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3150
3151
3152   linebuffer_init (&lbs[0].lb);
3153   linebuffer_init (&lbs[1].lb);
3154   if (cstack.size == 0)
3155     {
3156       cstack.size = (DEBUG) ? 1 : 4;
3157       cstack.nl = 0;
3158       cstack.cname = xnew (cstack.size, char *);
3159       cstack.bracelev = xnew (cstack.size, int);
3160     }
3161
3162   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3163   curndx = newndx = 0;
3164   lp = curlb.buffer;
3165   *lp = 0;
3166
3167   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3168   structdef = snone; definedef = dnone; objdef = onone;
3169   yacc_rules = FALSE;
3170   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3171   token.valid = savetoken.valid = FALSE;
3172   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3173   if (cjava)
3174     { qualifier = "."; qlen = 1; }
3175   else
3176     { qualifier = "::"; qlen = 2; }
3177
3178
3179   while (!feof (inf))
3180     {
3181       c = *lp++;
3182       if (c == '\\')
3183         {
3184           /* If we are at the end of the line, the next character is a
3185              '\0'; do not skip it, because it is what tells us
3186              to read the next line.  */
3187           if (*lp == '\0')
3188             {
3189               quotednl = TRUE;
3190               continue;
3191             }
3192           lp++;
3193           c = ' ';
3194         }
3195       else if (incomm)
3196         {
3197           switch (c)
3198             {
3199             case '*':
3200               if (*lp == '/')
3201                 {
3202                   c = *lp++;
3203                   incomm = FALSE;
3204                 }
3205               break;
3206             case '\0':
3207               /* Newlines inside comments do not end macro definitions in
3208                  traditional cpp. */
3209               CNL_SAVE_DEFINEDEF ();
3210               break;
3211             }
3212           continue;
3213         }
3214       else if (inquote)
3215         {
3216           switch (c)
3217             {
3218             case '"':
3219               inquote = FALSE;
3220               break;
3221             case '\0':
3222               /* Newlines inside strings do not end macro definitions
3223                  in traditional cpp, even though compilers don't
3224                  usually accept them. */
3225               CNL_SAVE_DEFINEDEF ();
3226               break;
3227             }
3228           continue;
3229         }
3230       else if (inchar)
3231         {
3232           switch (c)
3233             {
3234             case '\0':
3235               /* Hmmm, something went wrong. */
3236               CNL ();
3237               /* FALLTHRU */
3238             case '\'':
3239               inchar = FALSE;
3240               break;
3241             }
3242           continue;
3243         }
3244       else if (bracketlev > 0)
3245         {
3246           switch (c)
3247             {
3248             case ']':
3249               if (--bracketlev > 0)
3250                 continue;
3251               break;
3252             case '\0':
3253               CNL_SAVE_DEFINEDEF ();
3254               break;
3255             }
3256           continue;
3257         }
3258       else switch (c)
3259         {
3260         case '"':
3261           inquote = TRUE;
3262           if (inattribute)
3263             break;
3264           switch (fvdef)
3265             {
3266             case fdefunkey:
3267             case fstartlist:
3268             case finlist:
3269             case fignore:
3270             case vignore:
3271               break;
3272             default:
3273               fvextern = FALSE;
3274               fvdef = fvnone;
3275             }
3276           continue;
3277         case '\'':
3278           inchar = TRUE;
3279           if (inattribute)
3280             break;
3281           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3282             {
3283               fvextern = FALSE;
3284               fvdef = fvnone;
3285             }
3286           continue;
3287         case '/':
3288           if (*lp == '*')
3289             {
3290               incomm = TRUE;
3291               lp++;
3292               c = ' ';
3293             }
3294           else if (/* cplpl && */ *lp == '/')
3295             {
3296               c = '\0';
3297             }
3298           break;
3299         case '%':
3300           if ((c_ext & YACC) && *lp == '%')
3301             {
3302               /* Entering or exiting rules section in yacc file. */
3303               lp++;
3304               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3305               typdef = tnone; structdef = snone;
3306               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3307               bracelev = 0;
3308               yacc_rules = !yacc_rules;
3309               continue;
3310             }
3311           else
3312             break;
3313         case '#':
3314           if (definedef == dnone)
3315             {
3316               char *cp;
3317               bool cpptoken = TRUE;
3318
3319               /* Look back on this line.  If all blanks, or nonblanks
3320                  followed by an end of comment, this is a preprocessor
3321                  token. */
3322               for (cp = newlb.buffer; cp < lp-1; cp++)
3323                 if (!iswhite (*cp))
3324                   {
3325                     if (*cp == '*' && *(cp+1) == '/')
3326                       {
3327                         cp++;
3328                         cpptoken = TRUE;
3329                       }
3330                     else
3331                       cpptoken = FALSE;
3332                   }
3333               if (cpptoken)
3334                 definedef = dsharpseen;
3335             } /* if (definedef == dnone) */
3336           continue;
3337         case '[':
3338           bracketlev++;
3339             continue;
3340         } /* switch (c) */
3341
3342
3343       /* Consider token only if some involved conditions are satisfied. */
3344       if (typdef != tignore
3345           && definedef != dignorerest
3346           && fvdef != finlist
3347           && templatelev == 0
3348           && (definedef != dnone
3349               || structdef != scolonseen)
3350           && !inattribute)
3351         {
3352           if (midtoken)
3353             {
3354               if (endtoken (c))
3355                 {
3356                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3357                     /* This handles :: in the middle,
3358                        but not at the beginning of an identifier.
3359                        Also, space-separated :: is not recognised. */
3360                     {
3361                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3362                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3363                       lp += 2;
3364                       toklen += 2;
3365                       c = lp[-1];
3366                       goto still_in_token;
3367                     }
3368                   else
3369                     {
3370                       bool funorvar = FALSE;
3371
3372                       if (yacc_rules
3373                           || consider_token (newlb.buffer + tokoff, toklen, c,
3374                                              &c_ext, bracelev, parlev,
3375                                              &funorvar))
3376                         {
3377                           if (fvdef == foperator)
3378                             {
3379                               char *oldlp = lp;
3380                               lp = skip_spaces (lp-1);
3381                               if (*lp != '\0')
3382                                 lp += 1;
3383                               while (*lp != '\0'
3384                                      && !iswhite (*lp) && *lp != '(')
3385                                 lp += 1;
3386                               c = *lp++;
3387                               toklen += lp - oldlp;
3388                             }
3389                           token.named = FALSE;
3390                           if (!plainc
3391                               && nestlev > 0 && definedef == dnone)
3392                             /* in struct body */
3393                             {
3394                               write_classname (&token_name, qualifier);
3395                               linebuffer_setlen (&token_name,
3396                                                  token_name.len+qlen+toklen);
3397                               strcat (token_name.buffer, qualifier);
3398                               strncat (token_name.buffer,
3399                                        newlb.buffer + tokoff, toklen);
3400                               token.named = TRUE;
3401                             }
3402                           else if (objdef == ocatseen)
3403                             /* Objective C category */
3404                             {
3405                               int len = strlen (objtag) + 2 + toklen;
3406                               linebuffer_setlen (&token_name, len);
3407                               strcpy (token_name.buffer, objtag);
3408                               strcat (token_name.buffer, "(");
3409                               strncat (token_name.buffer,
3410                                        newlb.buffer + tokoff, toklen);
3411                               strcat (token_name.buffer, ")");
3412                               token.named = TRUE;
3413                             }
3414                           else if (objdef == omethodtag
3415                                    || objdef == omethodparm)
3416                             /* Objective C method */
3417                             {
3418                               token.named = TRUE;
3419                             }
3420                           else if (fvdef == fdefunname)
3421                             /* GNU DEFUN and similar macros */
3422                             {
3423                               bool defun = (newlb.buffer[tokoff] == 'F');
3424                               int off = tokoff;
3425                               int len = toklen;
3426
3427                               /* Rewrite the tag so that emacs lisp DEFUNs
3428                                  can be found by their elisp name */
3429                               if (defun)
3430                                 {
3431                                   off += 1;
3432                                   len -= 1;
3433                                 }
3434                               linebuffer_setlen (&token_name, len);
3435                               strncpy (token_name.buffer,
3436                                        newlb.buffer + off, len);
3437                               token_name.buffer[len] = '\0';
3438                               if (defun)
3439                                 while (--len >= 0)
3440                                   if (token_name.buffer[len] == '_')
3441                                     token_name.buffer[len] = '-';
3442                               token.named = defun;
3443                             }
3444                           else
3445                             {
3446                               linebuffer_setlen (&token_name, toklen);
3447                               strncpy (token_name.buffer,
3448                                        newlb.buffer + tokoff, toklen);
3449                               token_name.buffer[toklen] = '\0';
3450                               /* Name macros and members. */
3451                               token.named = (structdef == stagseen
3452                                              || typdef == ttypeseen
3453                                              || typdef == tend
3454                                              || (funorvar
3455                                                  && definedef == dignorerest)
3456                                              || (funorvar
3457                                                  && definedef == dnone
3458                                                  && structdef == snone
3459                                                  && bracelev > 0));
3460                             }
3461                           token.lineno = lineno;
3462                           token.offset = tokoff;
3463                           token.length = toklen;
3464                           token.line = newlb.buffer;
3465                           token.linepos = newlinepos;
3466                           token.valid = TRUE;
3467
3468                           if (definedef == dnone
3469                               && (fvdef == fvnameseen
3470                                   || fvdef == foperator
3471                                   || structdef == stagseen
3472                                   || typdef == tend
3473                                   || typdef == ttypeseen
3474                                   || objdef != onone))
3475                             {
3476                               if (current_lb_is_new)
3477                                 switch_line_buffers ();
3478                             }
3479                           else if (definedef != dnone
3480                                    || fvdef == fdefunname
3481                                    || instruct)
3482                             make_C_tag (funorvar);
3483                         }
3484                       else /* not yacc and consider_token failed */
3485                         {
3486                           if (inattribute && fvdef == fignore)
3487                             {
3488                               /* We have just met __attribute__ after a
3489                                  function parameter list: do not tag the
3490                                  function again. */
3491                               fvdef = fvnone;
3492                             }
3493                         }
3494                       midtoken = FALSE;
3495                     }
3496                 } /* if (endtoken (c)) */
3497               else if (intoken (c))
3498                 still_in_token:
3499                 {
3500                   toklen++;
3501                   continue;
3502                 }
3503             } /* if (midtoken) */
3504           else if (begtoken (c))
3505             {
3506               switch (definedef)
3507                 {
3508                 case dnone:
3509                   switch (fvdef)
3510                     {
3511                     case fstartlist:
3512                       /* This prevents tagging fb in
3513                          void (__attribute__((noreturn)) *fb) (void);
3514                          Fixing this is not easy and not very important. */
3515                       fvdef = finlist;
3516                       continue;
3517                     case flistseen:
3518                       if (plainc || declarations)
3519                         {
3520                           make_C_tag (TRUE); /* a function */
3521                           fvdef = fignore;
3522                         }
3523                       break;
3524                     }
3525                   if (structdef == stagseen && !cjava)
3526                     {
3527                       popclass_above (bracelev);
3528                       structdef = snone;
3529                     }
3530                   break;
3531                 case dsharpseen:
3532                   savetoken = token;
3533                   break;
3534                 }
3535               if (!yacc_rules || lp == newlb.buffer + 1)
3536                 {
3537                   tokoff = lp - 1 - newlb.buffer;
3538                   toklen = 1;
3539                   midtoken = TRUE;
3540                 }
3541               continue;
3542             } /* if (begtoken) */
3543         } /* if must look at token */
3544
3545
3546       /* Detect end of line, colon, comma, semicolon and various braces
3547          after having handled a token.*/
3548       switch (c)
3549         {
3550         case ':':
3551           if (inattribute)
3552             break;
3553           if (yacc_rules && token.offset == 0 && token.valid)
3554             {
3555               make_C_tag (FALSE); /* a yacc function */
3556               break;
3557             }
3558           if (definedef != dnone)
3559             break;
3560           switch (objdef)
3561             {
3562             case  otagseen:
3563               objdef = oignore;
3564               make_C_tag (TRUE); /* an Objective C class */
3565               break;
3566             case omethodtag:
3567             case omethodparm:
3568               objdef = omethodcolon;
3569               linebuffer_setlen (&token_name, token_name.len + 1);
3570               strcat (token_name.buffer, ":");
3571               break;
3572             }
3573           if (structdef == stagseen)
3574             {
3575               structdef = scolonseen;
3576               break;
3577             }
3578           /* Should be useless, but may be work as a safety net. */
3579           if (cplpl && fvdef == flistseen)
3580             {
3581               make_C_tag (TRUE); /* a function */
3582               fvdef = fignore;
3583               break;
3584             }
3585           break;
3586         case ';':
3587           if (definedef != dnone || inattribute)
3588             break;
3589           switch (typdef)
3590             {
3591             case tend:
3592             case ttypeseen:
3593               make_C_tag (FALSE); /* a typedef */
3594               typdef = tnone;
3595               fvdef = fvnone;
3596               break;
3597             case tnone:
3598             case tinbody:
3599             case tignore:
3600               switch (fvdef)
3601                 {
3602                 case fignore:
3603                   if (typdef == tignore || cplpl)
3604                     fvdef = fvnone;
3605                   break;
3606                 case fvnameseen:
3607                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3608                       || (members && instruct))
3609                     make_C_tag (FALSE); /* a variable */
3610                   fvextern = FALSE;
3611                   fvdef = fvnone;
3612                   token.valid = FALSE;
3613                   break;
3614                 case flistseen:
3615                   if ((declarations
3616                        && (cplpl || !instruct)
3617                        && (typdef == tnone || (typdef != tignore && instruct)))
3618                       || (members
3619                           && plainc && instruct))
3620                     make_C_tag (TRUE);  /* a function */
3621                   /* FALLTHRU */
3622                 default:
3623                   fvextern = FALSE;
3624                   fvdef = fvnone;
3625                   if (declarations
3626                        && cplpl && structdef == stagseen)
3627                     make_C_tag (FALSE); /* forward declaration */
3628                   else
3629                     token.valid = FALSE;
3630                 } /* switch (fvdef) */
3631               /* FALLTHRU */
3632             default:
3633               if (!instruct)
3634                 typdef = tnone;
3635             }
3636           if (structdef == stagseen)
3637             structdef = snone;
3638           break;
3639         case ',':
3640           if (definedef != dnone || inattribute)
3641             break;
3642           switch (objdef)
3643             {
3644             case omethodtag:
3645             case omethodparm:
3646               make_C_tag (TRUE); /* an Objective C method */
3647               objdef = oinbody;
3648               break;
3649             }
3650           switch (fvdef)
3651             {
3652             case fdefunkey:
3653             case foperator:
3654             case fstartlist:
3655             case finlist:
3656             case fignore:
3657             case vignore:
3658               break;
3659             case fdefunname:
3660               fvdef = fignore;
3661               break;
3662             case fvnameseen:
3663               if (parlev == 0
3664                   && ((globals
3665                        && bracelev == 0
3666                        && templatelev == 0
3667                        && (!fvextern || declarations))
3668                       || (members && instruct)))
3669                   make_C_tag (FALSE); /* a variable */
3670               break;
3671             case flistseen:
3672               if ((declarations && typdef == tnone && !instruct)
3673                   || (members && typdef != tignore && instruct))
3674                 {
3675                   make_C_tag (TRUE); /* a function */
3676                   fvdef = fvnameseen;
3677                 }
3678               else if (!declarations)
3679                 fvdef = fvnone;
3680               token.valid = FALSE;
3681               break;
3682             default:
3683               fvdef = fvnone;
3684             }
3685           if (structdef == stagseen)
3686             structdef = snone;
3687           break;
3688         case ']':
3689           if (definedef != dnone || inattribute)
3690             break;
3691           if (structdef == stagseen)
3692             structdef = snone;
3693           switch (typdef)
3694             {
3695             case ttypeseen:
3696             case tend:
3697               typdef = tignore;
3698               make_C_tag (FALSE);       /* a typedef */
3699               break;
3700             case tnone:
3701             case tinbody:
3702               switch (fvdef)
3703                 {
3704                 case foperator:
3705                 case finlist:
3706                 case fignore:
3707                 case vignore:
3708                   break;
3709                 case fvnameseen:
3710                   if ((members && bracelev == 1)
3711                       || (globals && bracelev == 0
3712                           && (!fvextern || declarations)))
3713                     make_C_tag (FALSE); /* a variable */
3714                   /* FALLTHRU */
3715                 default:
3716                   fvdef = fvnone;
3717                 }
3718               break;
3719             }
3720           break;
3721         case '(':
3722           if (inattribute)
3723             {
3724               attrparlev++;
3725               break;
3726             }
3727           if (definedef != dnone)
3728             break;
3729           if (objdef == otagseen && parlev == 0)
3730             objdef = oparenseen;
3731           switch (fvdef)
3732             {
3733             case fvnameseen:
3734               if (typdef == ttypeseen
3735                   && *lp != '*'
3736                   && !instruct)
3737                 {
3738                   /* This handles constructs like:
3739                      typedef void OperatorFun (int fun); */
3740                   make_C_tag (FALSE);
3741                   typdef = tignore;
3742                   fvdef = fignore;
3743                   break;
3744                 }
3745               /* FALLTHRU */
3746             case foperator:
3747               fvdef = fstartlist;
3748               break;
3749             case flistseen:
3750               fvdef = finlist;
3751               break;
3752             }
3753           parlev++;
3754           break;
3755         case ')':
3756           if (inattribute)
3757             {
3758               if (--attrparlev == 0)
3759                 inattribute = FALSE;
3760               break;
3761             }
3762           if (definedef != dnone)
3763             break;
3764           if (objdef == ocatseen && parlev == 1)
3765             {
3766               make_C_tag (TRUE); /* an Objective C category */
3767               objdef = oignore;
3768             }
3769           if (--parlev == 0)
3770             {
3771               switch (fvdef)
3772                 {
3773                 case fstartlist:
3774                 case finlist:
3775                   fvdef = flistseen;
3776                   break;
3777                 }
3778               if (!instruct
3779                   && (typdef == tend
3780                       || typdef == ttypeseen))
3781                 {
3782                   typdef = tignore;
3783                   make_C_tag (FALSE); /* a typedef */
3784                 }
3785             }
3786           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3787             parlev = 0;
3788           break;
3789         case '{':
3790           if (definedef != dnone)
3791             break;
3792           if (typdef == ttypeseen)
3793             {
3794               /* Whenever typdef is set to tinbody (currently only
3795                  here), typdefbracelev should be set to bracelev. */
3796               typdef = tinbody;
3797               typdefbracelev = bracelev;
3798             }
3799           switch (fvdef)
3800             {
3801             case flistseen:
3802               make_C_tag (TRUE);    /* a function */
3803               /* FALLTHRU */
3804             case fignore:
3805               fvdef = fvnone;
3806               break;
3807             case fvnone:
3808               switch (objdef)
3809                 {
3810                 case otagseen:
3811                   make_C_tag (TRUE); /* an Objective C class */
3812                   objdef = oignore;
3813                   break;
3814                 case omethodtag:
3815                 case omethodparm:
3816                   make_C_tag (TRUE); /* an Objective C method */
3817                   objdef = oinbody;
3818                   break;
3819                 default:
3820                   /* Neutralize `extern "C" {' grot. */
3821                   if (bracelev == 0 && structdef == snone && nestlev == 0
3822                       && typdef == tnone)
3823                     bracelev = -1;
3824                 }
3825               break;
3826             }
3827           switch (structdef)
3828             {
3829             case skeyseen:         /* unnamed struct */
3830               pushclass_above (bracelev, NULL, 0);
3831               structdef = snone;
3832               break;
3833             case stagseen:         /* named struct or enum */
3834             case scolonseen:       /* a class */
3835               pushclass_above (bracelev,token.line+token.offset, token.length);
3836               structdef = snone;
3837               make_C_tag (FALSE);  /* a struct or enum */
3838               break;
3839             }
3840           bracelev += 1;
3841           break;
3842         case '*':
3843           if (definedef != dnone)
3844             break;
3845           if (fvdef == fstartlist)
3846             {
3847               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3848               token.valid = FALSE;
3849             }
3850           break;
3851         case '}':
3852           if (definedef != dnone)
3853             break;
3854           bracelev -= 1;
3855           if (!ignoreindent && lp == newlb.buffer + 1)
3856             {
3857               if (bracelev != 0)
3858                 token.valid = FALSE; /* unexpected value, token unreliable */
3859               bracelev = 0;     /* reset brace level if first column */
3860               parlev = 0;       /* also reset paren level, just in case... */
3861             }
3862           else if (bracelev < 0)
3863             {
3864               token.valid = FALSE; /* something gone amiss, token unreliable */
3865               bracelev = 0;
3866             }
3867           if (bracelev == 0 && fvdef == vignore)
3868             fvdef = fvnone;             /* end of function */
3869           popclass_above (bracelev);
3870           structdef = snone;
3871           /* Only if typdef == tinbody is typdefbracelev significant. */
3872           if (typdef == tinbody && bracelev <= typdefbracelev)
3873             {
3874               assert (bracelev == typdefbracelev);
3875               typdef = tend;
3876             }
3877           break;
3878         case '=':
3879           if (definedef != dnone)
3880             break;
3881           switch (fvdef)
3882             {
3883             case foperator:
3884             case finlist:
3885             case fignore:
3886             case vignore:
3887               break;
3888             case fvnameseen:
3889               if ((members && bracelev == 1)
3890                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3891                 make_C_tag (FALSE); /* a variable */
3892               /* FALLTHRU */
3893             default:
3894               fvdef = vignore;
3895             }
3896           break;
3897         case '<':
3898           if (cplpl
3899               && (structdef == stagseen || fvdef == fvnameseen))
3900             {
3901               templatelev++;
3902               break;
3903             }
3904           goto resetfvdef;
3905         case '>':
3906           if (templatelev > 0)
3907             {
3908               templatelev--;
3909               break;
3910             }
3911           goto resetfvdef;
3912         case '+':
3913         case '-':
3914           if (objdef == oinbody && bracelev == 0)
3915             {
3916               objdef = omethodsign;
3917               break;
3918             }
3919           /* FALLTHRU */
3920         resetfvdef:
3921         case '#': case '~': case '&': case '%': case '/':
3922         case '|': case '^': case '!': case '.': case '?':
3923           if (definedef != dnone)
3924             break;
3925           /* These surely cannot follow a function tag in C. */
3926           switch (fvdef)
3927             {
3928             case foperator:
3929             case finlist:
3930             case fignore:
3931             case vignore:
3932               break;
3933             default:
3934               fvdef = fvnone;
3935             }
3936           break;
3937         case '\0':
3938           if (objdef == otagseen)
3939             {
3940               make_C_tag (TRUE); /* an Objective C class */
3941               objdef = oignore;
3942             }
3943           /* If a macro spans multiple lines don't reset its state. */
3944           if (quotednl)
3945             CNL_SAVE_DEFINEDEF ();
3946           else
3947             CNL ();
3948           break;
3949         } /* switch (c) */
3950
3951     } /* while not eof */
3952
3953   free (lbs[0].lb.buffer);
3954   free (lbs[1].lb.buffer);
3955 }
3956
3957 /*
3958  * Process either a C++ file or a C file depending on the setting
3959  * of a global flag.
3960  */
3961 static void
3962 default_C_entries (inf)
3963      FILE *inf;
3964 {
3965   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3966 }
3967
3968 /* Always do plain C. */
3969 static void
3970 plain_C_entries (inf)
3971      FILE *inf;
3972 {
3973   C_entries (0, inf);
3974 }
3975
3976 /* Always do C++. */
3977 static void
3978 Cplusplus_entries (inf)
3979      FILE *inf;
3980 {
3981   C_entries (C_PLPL, inf);
3982 }
3983
3984 /* Always do Java. */
3985 static void
3986 Cjava_entries (inf)
3987      FILE *inf;
3988 {
3989   C_entries (C_JAVA, inf);
3990 }
3991
3992 /* Always do C*. */
3993 static void
3994 Cstar_entries (inf)
3995      FILE *inf;
3996 {
3997   C_entries (C_STAR, inf);
3998 }
3999
4000 /* Always do Yacc. */
4001 static void
4002 Yacc_entries (inf)
4003      FILE *inf;
4004 {
4005   C_entries (YACC, inf);
4006 }
4007
4008 \f
4009 /* Useful macros. */
4010 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4011   for (;                        /* loop initialization */               \
4012        !feof (file_pointer)     /* loop test */                         \
4013        &&                       /* instructions at start of loop */     \
4014           (readline (&line_buffer, file_pointer),                       \
4015            char_pointer = line_buffer.buffer,                           \
4016            TRUE);                                                       \
4017       )
4018
4019 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4020   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4021    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4022    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4023    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4024
4025 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4026 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4027   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4028    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4029    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4030
4031 /*
4032  * Read a file, but do no processing.  This is used to do regexp
4033  * matching on files that have no language defined.
4034  */
4035 static void
4036 just_read_file (inf)
4037      FILE *inf;
4038 {
4039   register char *dummy;
4040
4041   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4042     continue;
4043 }
4044
4045 \f
4046 /* Fortran parsing */
4047
4048 static void F_takeprec __P((void));
4049 static void F_getit __P((FILE *));
4050
4051 static void
4052 F_takeprec ()
4053 {
4054   dbp = skip_spaces (dbp);
4055   if (*dbp != '*')
4056     return;
4057   dbp++;
4058   dbp = skip_spaces (dbp);
4059   if (strneq (dbp, "(*)", 3))
4060     {
4061       dbp += 3;
4062       return;
4063     }
4064   if (!ISDIGIT (*dbp))
4065     {
4066       --dbp;                    /* force failure */
4067       return;
4068     }
4069   do
4070     dbp++;
4071   while (ISDIGIT (*dbp));
4072 }
4073
4074 static void
4075 F_getit (inf)
4076      FILE *inf;
4077 {
4078   register char *cp;
4079
4080   dbp = skip_spaces (dbp);
4081   if (*dbp == '\0')
4082     {
4083       readline (&lb, inf);
4084       dbp = lb.buffer;
4085       if (dbp[5] != '&')
4086         return;
4087       dbp += 6;
4088       dbp = skip_spaces (dbp);
4089     }
4090   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4091     return;
4092   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4093     continue;
4094   make_tag (dbp, cp-dbp, TRUE,
4095             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4096 }
4097
4098
4099 static void
4100 Fortran_functions (inf)
4101      FILE *inf;
4102 {
4103   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4104     {
4105       if (*dbp == '%')
4106         dbp++;                  /* Ratfor escape to fortran */
4107       dbp = skip_spaces (dbp);
4108       if (*dbp == '\0')
4109         continue;
4110       switch (lowcase (*dbp))
4111         {
4112         case 'i':
4113           if (nocase_tail ("integer"))
4114             F_takeprec ();
4115           break;
4116         case 'r':
4117           if (nocase_tail ("real"))
4118             F_takeprec ();
4119           break;
4120         case 'l':
4121           if (nocase_tail ("logical"))
4122             F_takeprec ();
4123           break;
4124         case 'c':
4125           if (nocase_tail ("complex") || nocase_tail ("character"))
4126             F_takeprec ();
4127           break;
4128         case 'd':
4129           if (nocase_tail ("double"))
4130             {
4131               dbp = skip_spaces (dbp);
4132               if (*dbp == '\0')
4133                 continue;
4134               if (nocase_tail ("precision"))
4135                 break;
4136               continue;
4137             }
4138           break;
4139         }
4140       dbp = skip_spaces (dbp);
4141       if (*dbp == '\0')
4142         continue;
4143       switch (lowcase (*dbp))
4144         {
4145         case 'f':
4146           if (nocase_tail ("function"))
4147             F_getit (inf);
4148           continue;
4149         case 's':
4150           if (nocase_tail ("subroutine"))
4151             F_getit (inf);
4152           continue;
4153         case 'e':
4154           if (nocase_tail ("entry"))
4155             F_getit (inf);
4156           continue;
4157         case 'b':
4158           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4159             {
4160               dbp = skip_spaces (dbp);
4161               if (*dbp == '\0') /* assume un-named */
4162                 make_tag ("blockdata", 9, TRUE,
4163                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4164               else
4165                 F_getit (inf);  /* look for name */
4166             }
4167           continue;
4168         }
4169     }
4170 }
4171
4172 \f
4173 /*
4174  * Ada parsing
4175  * Original code by
4176  * Philippe Waroquiers (1998)
4177  */
4178
4179 static void Ada_getit __P((FILE *, char *));
4180
4181 /* Once we are positioned after an "interesting" keyword, let's get
4182    the real tag value necessary. */
4183 static void
4184 Ada_getit (inf, name_qualifier)
4185      FILE *inf;
4186      char *name_qualifier;
4187 {
4188   register char *cp;
4189   char *name;
4190   char c;
4191
4192   while (!feof (inf))
4193     {
4194       dbp = skip_spaces (dbp);
4195       if (*dbp == '\0'
4196           || (dbp[0] == '-' && dbp[1] == '-'))
4197         {
4198           readline (&lb, inf);
4199           dbp = lb.buffer;
4200         }
4201       switch (lowcase(*dbp))
4202         {
4203         case 'b':
4204           if (nocase_tail ("body"))
4205             {
4206               /* Skipping body of   procedure body   or   package body or ....
4207                  resetting qualifier to body instead of spec. */
4208               name_qualifier = "/b";
4209               continue;
4210             }
4211           break;
4212         case 't':
4213           /* Skipping type of   task type   or   protected type ... */
4214           if (nocase_tail ("type"))
4215             continue;
4216           break;
4217         }
4218       if (*dbp == '"')
4219         {
4220           dbp += 1;
4221           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4222             continue;
4223         }
4224       else
4225         {
4226           dbp = skip_spaces (dbp);
4227           for (cp = dbp;
4228                (*cp != '\0'
4229                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4230                cp++)
4231             continue;
4232           if (cp == dbp)
4233             return;
4234         }
4235       c = *cp;
4236       *cp = '\0';
4237       name = concat (dbp, name_qualifier, "");
4238       *cp = c;
4239       make_tag (name, strlen (name), TRUE,
4240                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4241       free (name);
4242       if (c == '"')
4243         dbp = cp + 1;
4244       return;
4245     }
4246 }
4247
4248 static void
4249 Ada_funcs (inf)
4250      FILE *inf;
4251 {
4252   bool inquote = FALSE;
4253   bool skip_till_semicolumn = FALSE;
4254
4255   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4256     {
4257       while (*dbp != '\0')
4258         {
4259           /* Skip a string i.e. "abcd". */
4260           if (inquote || (*dbp == '"'))
4261             {
4262               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4263               if (dbp != NULL)
4264                 {
4265                   inquote = FALSE;
4266                   dbp += 1;
4267                   continue;     /* advance char */
4268                 }
4269               else
4270                 {
4271                   inquote = TRUE;
4272                   break;        /* advance line */
4273                 }
4274             }
4275
4276           /* Skip comments. */
4277           if (dbp[0] == '-' && dbp[1] == '-')
4278             break;              /* advance line */
4279
4280           /* Skip character enclosed in single quote i.e. 'a'
4281              and skip single quote starting an attribute i.e. 'Image. */
4282           if (*dbp == '\'')
4283             {
4284               dbp++ ;
4285               if (*dbp != '\0')
4286                 dbp++;
4287               continue;
4288             }
4289
4290           if (skip_till_semicolumn)
4291             {
4292               if (*dbp == ';')
4293                 skip_till_semicolumn = FALSE;
4294               dbp++;
4295               continue;         /* advance char */
4296             }
4297
4298           /* Search for beginning of a token.  */
4299           if (!begtoken (*dbp))
4300             {
4301               dbp++;
4302               continue;         /* advance char */
4303             }
4304
4305           /* We are at the beginning of a token. */
4306           switch (lowcase(*dbp))
4307             {
4308             case 'f':
4309               if (!packages_only && nocase_tail ("function"))
4310                 Ada_getit (inf, "/f");
4311               else
4312                 break;          /* from switch */
4313               continue;         /* advance char */
4314             case 'p':
4315               if (!packages_only && nocase_tail ("procedure"))
4316                 Ada_getit (inf, "/p");
4317               else if (nocase_tail ("package"))
4318                 Ada_getit (inf, "/s");
4319               else if (nocase_tail ("protected")) /* protected type */
4320                 Ada_getit (inf, "/t");
4321               else
4322                 break;          /* from switch */
4323               continue;         /* advance char */
4324
4325             case 'u':
4326               if (typedefs && !packages_only && nocase_tail ("use"))
4327                 {
4328                   /* when tagging types, avoid tagging  use type Pack.Typename;
4329                      for this, we will skip everything till a ; */
4330                   skip_till_semicolumn = TRUE;
4331                   continue;     /* advance char */
4332                 }
4333
4334             case 't':
4335               if (!packages_only && nocase_tail ("task"))
4336                 Ada_getit (inf, "/k");
4337               else if (typedefs && !packages_only && nocase_tail ("type"))
4338                 {
4339                   Ada_getit (inf, "/t");
4340                   while (*dbp != '\0')
4341                     dbp += 1;
4342                 }
4343               else
4344                 break;          /* from switch */
4345               continue;         /* advance char */
4346             }
4347
4348           /* Look for the end of the token. */
4349           while (!endtoken (*dbp))
4350             dbp++;
4351
4352         } /* advance char */
4353     } /* advance line */
4354 }
4355
4356 \f
4357 /*
4358  * Unix and microcontroller assembly tag handling
4359  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4360  * Idea by Bob Weiner, Motorola Inc. (1994)
4361  */
4362 static void
4363 Asm_labels (inf)
4364      FILE *inf;
4365 {
4366   register char *cp;
4367
4368   LOOP_ON_INPUT_LINES (inf, lb, cp)
4369     {
4370       /* If first char is alphabetic or one of [_.$], test for colon
4371          following identifier. */
4372       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4373         {
4374           /* Read past label. */
4375           cp++;
4376           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4377             cp++;
4378           if (*cp == ':' || iswhite (*cp))
4379             /* Found end of label, so copy it and add it to the table. */
4380             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4381                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4382         }
4383     }
4384 }
4385
4386 \f
4387 /*
4388  * Perl support
4389  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4390  * Perl variable names: /^(my|local).../
4391  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4392  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4393  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4394  */
4395 static void
4396 Perl_functions (inf)
4397      FILE *inf;
4398 {
4399   char *package = savestr ("main"); /* current package name */
4400   register char *cp;
4401
4402   LOOP_ON_INPUT_LINES (inf, lb, cp)
4403     {
4404       cp = skip_spaces (cp);
4405
4406       if (LOOKING_AT (cp, "package"))
4407         {
4408           free (package);
4409           get_tag (cp, &package);
4410         }
4411       else if (LOOKING_AT (cp, "sub"))
4412         {
4413           char *pos;
4414           char *sp = cp;
4415
4416           while (!notinname (*cp))
4417             cp++;
4418           if (cp == sp)
4419             continue;           /* nothing found */
4420           if ((pos = etags_strchr (sp, ':')) != NULL
4421               && pos < cp && pos[1] == ':')
4422             /* The name is already qualified. */
4423             make_tag (sp, cp - sp, TRUE,
4424                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4425           else
4426             /* Qualify it. */
4427             {
4428               char savechar, *name;
4429
4430               savechar = *cp;
4431               *cp = '\0';
4432               name = concat (package, "::", sp);
4433               *cp = savechar;
4434               make_tag (name, strlen(name), TRUE,
4435                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4436               free (name);
4437             }
4438         }
4439        else if (globals)        /* only if we are tagging global vars */
4440         {
4441           /* Skip a qualifier, if any. */
4442           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4443           /* After "my" or "local", but before any following paren or space. */
4444           char *varstart = cp;
4445
4446           if (qual              /* should this be removed?  If yes, how? */
4447               && (*cp == '$' || *cp == '@' || *cp == '%'))
4448             {
4449               varstart += 1;
4450               do
4451                 cp++;
4452               while (ISALNUM (*cp) || *cp == '_');
4453             }
4454           else if (qual)
4455             {
4456               /* Should be examining a variable list at this point;
4457                  could insist on seeing an open parenthesis. */
4458               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4459                 cp++;
4460             }
4461           else
4462             continue;
4463
4464           make_tag (varstart, cp - varstart, FALSE,
4465                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4466         }
4467     }
4468   free (package);
4469 }
4470
4471
4472 /*
4473  * Python support
4474  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4475  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4476  * More ideas by seb bacon <seb@jamkit.com> (2002)
4477  */
4478 static void
4479 Python_functions (inf)
4480      FILE *inf;
4481 {
4482   register char *cp;
4483
4484   LOOP_ON_INPUT_LINES (inf, lb, cp)
4485     {
4486       cp = skip_spaces (cp);
4487       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4488         {
4489           char *name = cp;
4490           while (!notinname (*cp) && *cp != ':')
4491             cp++;
4492           make_tag (name, cp - name, TRUE,
4493                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4494         }
4495     }
4496 }
4497
4498 \f
4499 /*
4500  * PHP support
4501  * Look for:
4502  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4503  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4504  *  - /^[ \t]*define\(\"[^\"]+/
4505  * Only with --members:
4506  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4507  * Idea by Diez B. Roggisch (2001)
4508  */
4509 static void
4510 PHP_functions (inf)
4511      FILE *inf;
4512 {
4513   register char *cp, *name;
4514   bool search_identifier = FALSE;
4515
4516   LOOP_ON_INPUT_LINES (inf, lb, cp)
4517     {
4518       cp = skip_spaces (cp);
4519       name = cp;
4520       if (search_identifier
4521           && *cp != '\0')
4522         {
4523           while (!notinname (*cp))
4524             cp++;
4525           make_tag (name, cp - name, TRUE,
4526                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4527           search_identifier = FALSE;
4528         }
4529       else if (LOOKING_AT (cp, "function"))
4530         {
4531           if(*cp == '&')
4532             cp = skip_spaces (cp+1);
4533           if(*cp != '\0')
4534             {
4535               name = cp;
4536               while (!notinname (*cp))
4537                 cp++;
4538               make_tag (name, cp - name, TRUE,
4539                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4540             }
4541           else
4542             search_identifier = TRUE;
4543         }
4544       else if (LOOKING_AT (cp, "class"))
4545         {
4546           if (*cp != '\0')
4547             {
4548               name = cp;
4549               while (*cp != '\0' && !iswhite (*cp))
4550                 cp++;
4551               make_tag (name, cp - name, FALSE,
4552                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4553             }
4554           else
4555             search_identifier = TRUE;
4556         }
4557       else if (strneq (cp, "define", 6)
4558                && (cp = skip_spaces (cp+6))
4559                && *cp++ == '('
4560                && (*cp == '"' || *cp == '\''))
4561         {
4562           char quote = *cp++;
4563           name = cp;
4564           while (*cp != quote && *cp != '\0')
4565             cp++;
4566           make_tag (name, cp - name, FALSE,
4567                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4568         }
4569       else if (members
4570                && LOOKING_AT (cp, "var")
4571                && *cp == '$')
4572         {
4573           name = cp;
4574           while (!notinname(*cp))
4575             cp++;
4576           make_tag (name, cp - name, FALSE,
4577                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4578         }
4579     }
4580 }
4581
4582 \f
4583 /*
4584  * Cobol tag functions
4585  * We could look for anything that could be a paragraph name.
4586  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4587  * Idea by Corny de Souza (1993)
4588  */
4589 static void
4590 Cobol_paragraphs (inf)
4591      FILE *inf;
4592 {
4593   register char *bp, *ep;
4594
4595   LOOP_ON_INPUT_LINES (inf, lb, bp)
4596     {
4597       if (lb.len < 9)
4598         continue;
4599       bp += 8;
4600
4601       /* If eoln, compiler option or comment ignore whole line. */
4602       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4603         continue;
4604
4605       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4606         continue;
4607       if (*ep++ == '.')
4608         make_tag (bp, ep - bp, TRUE,
4609                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4610     }
4611 }
4612
4613 \f
4614 /*
4615  * Makefile support
4616  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4617  */
4618 static void
4619 Makefile_targets (inf)
4620      FILE *inf;
4621 {
4622   register char *bp;
4623
4624   LOOP_ON_INPUT_LINES (inf, lb, bp)
4625     {
4626       if (*bp == '\t' || *bp == '#')
4627         continue;
4628       while (*bp != '\0' && *bp != '=' && *bp != ':')
4629         bp++;
4630       if (*bp == ':' || (globals && *bp == '='))
4631         {
4632           /* We should detect if there is more than one tag, but we do not.
4633              We just skip initial and final spaces. */
4634           char * namestart = skip_spaces (lb.buffer);
4635           while (--bp > namestart)
4636             if (!notinname (*bp))
4637               break;
4638           make_tag (namestart, bp - namestart + 1, TRUE,
4639                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4640         }
4641     }
4642 }
4643
4644 \f
4645 /*
4646  * Pascal parsing
4647  * Original code by Mosur K. Mohan (1989)
4648  *
4649  *  Locates tags for procedures & functions.  Doesn't do any type- or
4650  *  var-definitions.  It does look for the keyword "extern" or
4651  *  "forward" immediately following the procedure statement; if found,
4652  *  the tag is skipped.
4653  */
4654 static void
4655 Pascal_functions (inf)
4656      FILE *inf;
4657 {
4658   linebuffer tline;             /* mostly copied from C_entries */
4659   long save_lcno;
4660   int save_lineno, namelen, taglen;
4661   char c, *name;
4662
4663   bool                          /* each of these flags is TRUE if: */
4664     incomment,                  /* point is inside a comment */
4665     inquote,                    /* point is inside '..' string */
4666     get_tagname,                /* point is after PROCEDURE/FUNCTION
4667                                    keyword, so next item = potential tag */
4668     found_tag,                  /* point is after a potential tag */
4669     inparms,                    /* point is within parameter-list */
4670     verify_tag;                 /* point has passed the parm-list, so the
4671                                    next token will determine whether this
4672                                    is a FORWARD/EXTERN to be ignored, or
4673                                    whether it is a real tag */
4674
4675   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4676   name = NULL;                  /* keep compiler quiet */
4677   dbp = lb.buffer;
4678   *dbp = '\0';
4679   linebuffer_init (&tline);
4680
4681   incomment = inquote = FALSE;
4682   found_tag = FALSE;            /* have a proc name; check if extern */
4683   get_tagname = FALSE;          /* found "procedure" keyword         */
4684   inparms = FALSE;              /* found '(' after "proc"            */
4685   verify_tag = FALSE;           /* check if "extern" is ahead        */
4686
4687
4688   while (!feof (inf))           /* long main loop to get next char */
4689     {
4690       c = *dbp++;
4691       if (c == '\0')            /* if end of line */
4692         {
4693           readline (&lb, inf);
4694           dbp = lb.buffer;
4695           if (*dbp == '\0')
4696             continue;
4697           if (!((found_tag && verify_tag)
4698                 || get_tagname))
4699             c = *dbp++;         /* only if don't need *dbp pointing
4700                                    to the beginning of the name of
4701                                    the procedure or function */
4702         }
4703       if (incomment)
4704         {
4705           if (c == '}')         /* within { } comments */
4706             incomment = FALSE;
4707           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4708             {
4709               dbp++;
4710               incomment = FALSE;
4711             }
4712           continue;
4713         }
4714       else if (inquote)
4715         {
4716           if (c == '\'')
4717             inquote = FALSE;
4718           continue;
4719         }
4720       else
4721         switch (c)
4722           {
4723           case '\'':
4724             inquote = TRUE;     /* found first quote */
4725             continue;
4726           case '{':             /* found open { comment */
4727             incomment = TRUE;
4728             continue;
4729           case '(':
4730             if (*dbp == '*')    /* found open (* comment */
4731               {
4732                 incomment = TRUE;
4733                 dbp++;
4734               }
4735             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4736               inparms = TRUE;
4737             continue;
4738           case ')':             /* end of parms list */
4739             if (inparms)
4740               inparms = FALSE;
4741             continue;
4742           case ';':
4743             if (found_tag && !inparms) /* end of proc or fn stmt */
4744               {
4745                 verify_tag = TRUE;
4746                 break;
4747               }
4748             continue;
4749           }
4750       if (found_tag && verify_tag && (*dbp != ' '))
4751         {
4752           /* Check if this is an "extern" declaration. */
4753           if (*dbp == '\0')
4754             continue;
4755           if (lowcase (*dbp == 'e'))
4756             {
4757               if (nocase_tail ("extern")) /* superfluous, really! */
4758                 {
4759                   found_tag = FALSE;
4760                   verify_tag = FALSE;
4761                 }
4762             }
4763           else if (lowcase (*dbp) == 'f')
4764             {
4765               if (nocase_tail ("forward")) /* check for forward reference */
4766                 {
4767                   found_tag = FALSE;
4768                   verify_tag = FALSE;
4769                 }
4770             }
4771           if (found_tag && verify_tag) /* not external proc, so make tag */
4772             {
4773               found_tag = FALSE;
4774               verify_tag = FALSE;
4775               make_tag (name, namelen, TRUE,
4776                         tline.buffer, taglen, save_lineno, save_lcno);
4777               continue;
4778             }
4779         }
4780       if (get_tagname)          /* grab name of proc or fn */
4781         {
4782           char *cp;
4783
4784           if (*dbp == '\0')
4785             continue;
4786
4787           /* Find block name. */
4788           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4789             continue;
4790
4791           /* Save all values for later tagging. */
4792           linebuffer_setlen (&tline, lb.len);
4793           strcpy (tline.buffer, lb.buffer);
4794           save_lineno = lineno;
4795           save_lcno = linecharno;
4796           name = tline.buffer + (dbp - lb.buffer);
4797           namelen = cp - dbp;
4798           taglen = cp - lb.buffer + 1;
4799
4800           dbp = cp;             /* set dbp to e-o-token */
4801           get_tagname = FALSE;
4802           found_tag = TRUE;
4803           continue;
4804
4805           /* And proceed to check for "extern". */
4806         }
4807       else if (!incomment && !inquote && !found_tag)
4808         {
4809           /* Check for proc/fn keywords. */
4810           switch (lowcase (c))
4811             {
4812             case 'p':
4813               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4814                 get_tagname = TRUE;
4815               continue;
4816             case 'f':
4817               if (nocase_tail ("unction"))
4818                 get_tagname = TRUE;
4819               continue;
4820             }
4821         }
4822     } /* while not eof */
4823
4824   free (tline.buffer);
4825 }
4826
4827 \f
4828 /*
4829  * Lisp tag functions
4830  *  look for (def or (DEF, quote or QUOTE
4831  */
4832
4833 static void L_getit __P((void));
4834
4835 static void
4836 L_getit ()
4837 {
4838   if (*dbp == '\'')             /* Skip prefix quote */
4839     dbp++;
4840   else if (*dbp == '(')
4841   {
4842     dbp++;
4843     /* Try to skip "(quote " */
4844     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4845       /* Ok, then skip "(" before name in (defstruct (foo)) */
4846       dbp = skip_spaces (dbp);
4847   }
4848   get_tag (dbp, NULL);
4849 }
4850
4851 static void
4852 Lisp_functions (inf)
4853      FILE *inf;
4854 {
4855   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4856     {
4857       if (dbp[0] != '(')
4858         continue;
4859
4860       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4861         {
4862           dbp = skip_non_spaces (dbp);
4863           dbp = skip_spaces (dbp);
4864           L_getit ();
4865         }
4866       else
4867         {
4868           /* Check for (foo::defmumble name-defined ... */
4869           do
4870             dbp++;
4871           while (!notinname (*dbp) && *dbp != ':');
4872           if (*dbp == ':')
4873             {
4874               do
4875                 dbp++;
4876               while (*dbp == ':');
4877
4878               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4879                 {
4880                   dbp = skip_non_spaces (dbp);
4881                   dbp = skip_spaces (dbp);
4882                   L_getit ();
4883                 }
4884             }
4885         }
4886     }
4887 }
4888
4889 \f
4890 /*
4891  * Lua script language parsing
4892  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4893  *
4894  *  "function" and "local function" are tags if they start at column 1.
4895  */
4896 static void
4897 Lua_functions (inf)
4898      FILE *inf;
4899 {
4900   register char *bp;
4901
4902   LOOP_ON_INPUT_LINES (inf, lb, bp)
4903     {
4904       if (bp[0] != 'f' && bp[0] != 'l')
4905         continue;
4906
4907       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4908
4909       if (LOOKING_AT (bp, "function"))
4910         get_tag (bp, NULL);
4911     }
4912 }
4913
4914 \f
4915 /*
4916  * Postscript tags
4917  * Just look for lines where the first character is '/'
4918  * Also look at "defineps" for PSWrap
4919  * Ideas by:
4920  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4921  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4922  */
4923 static void
4924 PS_functions (inf)
4925      FILE *inf;
4926 {
4927   register char *bp, *ep;
4928
4929   LOOP_ON_INPUT_LINES (inf, lb, bp)
4930     {
4931       if (bp[0] == '/')
4932         {
4933           for (ep = bp+1;
4934                *ep != '\0' && *ep != ' ' && *ep != '{';
4935                ep++)
4936             continue;
4937           make_tag (bp, ep - bp, TRUE,
4938                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4939         }
4940       else if (LOOKING_AT (bp, "defineps"))
4941         get_tag (bp, NULL);
4942     }
4943 }
4944
4945 \f
4946 /*
4947  * Forth tags
4948  * Ignore anything after \ followed by space or in ( )
4949  * Look for words defined by :
4950  * Look for constant, code, create, defer, value, and variable
4951  * OBP extensions:  Look for buffer:, field,
4952  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4953  */
4954 static void
4955 Forth_words (inf)
4956      FILE *inf;
4957 {
4958   register char *bp;
4959
4960   LOOP_ON_INPUT_LINES (inf, lb, bp)
4961     while ((bp = skip_spaces (bp))[0] != '\0')
4962       if (bp[0] == '\\' && iswhite(bp[1]))
4963         break;                  /* read next line */
4964       else if (bp[0] == '(' && iswhite(bp[1]))
4965         do                      /* skip to ) or eol */
4966           bp++;
4967         while (*bp != ')' && *bp != '\0');
4968       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4969                || LOOKING_AT_NOCASE (bp, "constant")
4970                || LOOKING_AT_NOCASE (bp, "code")
4971                || LOOKING_AT_NOCASE (bp, "create")
4972                || LOOKING_AT_NOCASE (bp, "defer")
4973                || LOOKING_AT_NOCASE (bp, "value")
4974                || LOOKING_AT_NOCASE (bp, "variable")
4975                || LOOKING_AT_NOCASE (bp, "buffer:")
4976                || LOOKING_AT_NOCASE (bp, "field"))
4977         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4978       else
4979         bp = skip_non_spaces (bp);
4980 }
4981
4982 \f
4983 /*
4984  * Scheme tag functions
4985  * look for (def... xyzzy
4986  *          (def... (xyzzy
4987  *          (def ... ((...(xyzzy ....
4988  *          (set! xyzzy
4989  * Original code by Ken Haase (1985?)
4990  */
4991 static void
4992 Scheme_functions (inf)
4993      FILE *inf;
4994 {
4995   register char *bp;
4996
4997   LOOP_ON_INPUT_LINES (inf, lb, bp)
4998     {
4999       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5000         {
5001           bp = skip_non_spaces (bp+4);
5002           /* Skip over open parens and white space */
5003           while (notinname (*bp))
5004             bp++;
5005           get_tag (bp, NULL);
5006         }
5007       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5008         get_tag (bp, NULL);
5009     }
5010 }
5011
5012 \f
5013 /* Find tags in TeX and LaTeX input files.  */
5014
5015 /* TEX_toktab is a table of TeX control sequences that define tags.
5016  * Each entry records one such control sequence.
5017  *
5018  * Original code from who knows whom.
5019  * Ideas by:
5020  *   Stefan Monnier (2002)
5021  */
5022
5023 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5024
5025 /* Default set of control sequences to put into TEX_toktab.
5026    The value of environment var TEXTAGS is prepended to this.  */
5027 static char *TEX_defenv = "\
5028 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5029 :part:appendix:entry:index:def\
5030 :newcommand:renewcommand:newenvironment:renewenvironment";
5031
5032 static void TEX_mode __P((FILE *));
5033 static void TEX_decode_env __P((char *, char *));
5034
5035 static char TEX_esc = '\\';
5036 static char TEX_opgrp = '{';
5037 static char TEX_clgrp = '}';
5038
5039 /*
5040  * TeX/LaTeX scanning loop.
5041  */
5042 static void
5043 TeX_commands (inf)
5044      FILE *inf;
5045 {
5046   char *cp;
5047   linebuffer *key;
5048
5049   /* Select either \ or ! as escape character.  */
5050   TEX_mode (inf);
5051
5052   /* Initialize token table once from environment. */
5053   if (TEX_toktab == NULL)
5054     TEX_decode_env ("TEXTAGS", TEX_defenv);
5055
5056   LOOP_ON_INPUT_LINES (inf, lb, cp)
5057     {
5058       /* Look at each TEX keyword in line. */
5059       for (;;)
5060         {
5061           /* Look for a TEX escape. */
5062           while (*cp++ != TEX_esc)
5063             if (cp[-1] == '\0' || cp[-1] == '%')
5064               goto tex_next_line;
5065
5066           for (key = TEX_toktab; key->buffer != NULL; key++)
5067             if (strneq (cp, key->buffer, key->len))
5068               {
5069                 register char *p;
5070                 int namelen, linelen;
5071                 bool opgrp = FALSE;
5072
5073                 cp = skip_spaces (cp + key->len);
5074                 if (*cp == TEX_opgrp)
5075                   {
5076                     opgrp = TRUE;
5077                     cp++;
5078                   }
5079                 for (p = cp;
5080                      (!iswhite (*p) && *p != '#' &&
5081                       *p != TEX_opgrp && *p != TEX_clgrp);
5082                      p++)
5083                   continue;
5084                 namelen = p - cp;
5085                 linelen = lb.len;
5086                 if (!opgrp || *p == TEX_clgrp)
5087                   {
5088                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5089                       p++;
5090                     linelen = p - lb.buffer + 1;
5091                   }
5092                 make_tag (cp, namelen, TRUE,
5093                           lb.buffer, linelen, lineno, linecharno);
5094                 goto tex_next_line; /* We only tag a line once */
5095               }
5096         }
5097     tex_next_line:
5098       ;
5099     }
5100 }
5101
5102 #define TEX_LESC '\\'
5103 #define TEX_SESC '!'
5104
5105 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5106    chars accordingly. */
5107 static void
5108 TEX_mode (inf)
5109      FILE *inf;
5110 {
5111   int c;
5112
5113   while ((c = getc (inf)) != EOF)
5114     {
5115       /* Skip to next line if we hit the TeX comment char. */
5116       if (c == '%')
5117         while (c != '\n' && c != EOF)
5118           c = getc (inf);
5119       else if (c == TEX_LESC || c == TEX_SESC )
5120         break;
5121     }
5122
5123   if (c == TEX_LESC)
5124     {
5125       TEX_esc = TEX_LESC;
5126       TEX_opgrp = '{';
5127       TEX_clgrp = '}';
5128     }
5129   else
5130     {
5131       TEX_esc = TEX_SESC;
5132       TEX_opgrp = '<';
5133       TEX_clgrp = '>';
5134     }
5135   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5136      No attempt is made to correct the situation. */
5137   rewind (inf);
5138 }
5139
5140 /* Read environment and prepend it to the default string.
5141    Build token table. */
5142 static void
5143 TEX_decode_env (evarname, defenv)
5144      char *evarname;
5145      char *defenv;
5146 {
5147   register char *env, *p;
5148   int i, len;
5149
5150   /* Append default string to environment. */
5151   env = getenv (evarname);
5152   if (!env)
5153     env = defenv;
5154   else
5155     {
5156       char *oldenv = env;
5157       env = concat (oldenv, defenv, "");
5158     }
5159
5160   /* Allocate a token table */
5161   for (len = 1, p = env; p;)
5162     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5163       len++;
5164   TEX_toktab = xnew (len, linebuffer);
5165
5166   /* Unpack environment string into token table. Be careful about */
5167   /* zero-length strings (leading ':', "::" and trailing ':') */
5168   for (i = 0; *env != '\0';)
5169     {
5170       p = etags_strchr (env, ':');
5171       if (!p)                   /* End of environment string. */
5172         p = env + strlen (env);
5173       if (p - env > 0)
5174         {                       /* Only non-zero strings. */
5175           TEX_toktab[i].buffer = savenstr (env, p - env);
5176           TEX_toktab[i].len = p - env;
5177           i++;
5178         }
5179       if (*p)
5180         env = p + 1;
5181       else
5182         {
5183           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5184           TEX_toktab[i].len = 0;
5185           break;
5186         }
5187     }
5188 }
5189
5190 \f
5191 /* Texinfo support.  Dave Love, Mar. 2000.  */
5192 static void
5193 Texinfo_nodes (inf)
5194      FILE * inf;
5195 {
5196   char *cp, *start;
5197   LOOP_ON_INPUT_LINES (inf, lb, cp)
5198     if (LOOKING_AT (cp, "@node"))
5199       {
5200         start = cp;
5201         while (*cp != '\0' && *cp != ',')
5202           cp++;
5203         make_tag (start, cp - start, TRUE,
5204                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5205       }
5206 }
5207
5208 \f
5209 /*
5210  * HTML support.
5211  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5212  * Contents of <a name=xxx> are tags with name xxx.
5213  *
5214  * Francesco Potortì, 2002.
5215  */
5216 static void
5217 HTML_labels (inf)
5218      FILE * inf;
5219 {
5220   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5221   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5222   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5223   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5224   char *end;
5225
5226
5227   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5228
5229   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5230     for (;;)                    /* loop on the same line */
5231       {
5232         if (skiptag)            /* skip HTML tag */
5233           {
5234             while (*dbp != '\0' && *dbp != '>')
5235               dbp++;
5236             if (*dbp == '>')
5237               {
5238                 dbp += 1;
5239                 skiptag = FALSE;
5240                 continue;       /* look on the same line */
5241               }
5242             break;              /* go to next line */
5243           }
5244
5245         else if (intag) /* look for "name=" or "id=" */
5246           {
5247             while (*dbp != '\0' && *dbp != '>'
5248                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5249               dbp++;
5250             if (*dbp == '\0')
5251               break;            /* go to next line */
5252             if (*dbp == '>')
5253               {
5254                 dbp += 1;
5255                 intag = FALSE;
5256                 continue;       /* look on the same line */
5257               }
5258             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5259                 || LOOKING_AT_NOCASE (dbp, "id="))
5260               {
5261                 bool quoted = (dbp[0] == '"');
5262
5263                 if (quoted)
5264                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5265                     continue;
5266                 else
5267                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5268                     continue;
5269                 linebuffer_setlen (&token_name, end - dbp);
5270                 strncpy (token_name.buffer, dbp, end - dbp);
5271                 token_name.buffer[end - dbp] = '\0';
5272
5273                 dbp = end;
5274                 intag = FALSE;  /* we found what we looked for */
5275                 skiptag = TRUE; /* skip to the end of the tag */
5276                 getnext = TRUE; /* then grab the text */
5277                 continue;       /* look on the same line */
5278               }
5279             dbp += 1;
5280           }
5281
5282         else if (getnext)       /* grab next tokens and tag them */
5283           {
5284             dbp = skip_spaces (dbp);
5285             if (*dbp == '\0')
5286               break;            /* go to next line */
5287             if (*dbp == '<')
5288               {
5289                 intag = TRUE;
5290                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5291                 continue;       /* look on the same line */
5292               }
5293
5294             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5295               continue;
5296             make_tag (token_name.buffer, token_name.len, TRUE,
5297                       dbp, end - dbp, lineno, linecharno);
5298             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5299             getnext = FALSE;
5300             break;              /* go to next line */
5301           }
5302
5303         else                    /* look for an interesting HTML tag */
5304           {
5305             while (*dbp != '\0' && *dbp != '<')
5306               dbp++;
5307             if (*dbp == '\0')
5308               break;            /* go to next line */
5309             intag = TRUE;
5310             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5311               {
5312                 inanchor = TRUE;
5313                 continue;       /* look on the same line */
5314               }
5315             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5316                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5317                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5318                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5319               {
5320                 intag = FALSE;
5321                 getnext = TRUE;
5322                 continue;       /* look on the same line */
5323               }
5324             dbp += 1;
5325           }
5326       }
5327 }
5328
5329 \f
5330 /*
5331  * Prolog support
5332  *
5333  * Assumes that the predicate or rule starts at column 0.
5334  * Only the first clause of a predicate or rule is added.
5335  * Original code by Sunichirou Sugou (1989)
5336  * Rewritten by Anders Lindgren (1996)
5337  */
5338 static int prolog_pr __P((char *, char *));
5339 static void prolog_skip_comment __P((linebuffer *, FILE *));
5340 static int prolog_atom __P((char *, int));
5341
5342 static void
5343 Prolog_functions (inf)
5344      FILE *inf;
5345 {
5346   char *cp, *last;
5347   int len;
5348   int allocated;
5349
5350   allocated = 0;
5351   len = 0;
5352   last = NULL;
5353
5354   LOOP_ON_INPUT_LINES (inf, lb, cp)
5355     {
5356       if (cp[0] == '\0')        /* Empty line */
5357         continue;
5358       else if (iswhite (cp[0])) /* Not a predicate */
5359         continue;
5360       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5361         prolog_skip_comment (&lb, inf);
5362       else if ((len = prolog_pr (cp, last)) > 0)
5363         {
5364           /* Predicate or rule.  Store the function name so that we
5365              only generate a tag for the first clause.  */
5366           if (last == NULL)
5367             last = xnew(len + 1, char);
5368           else if (len + 1 > allocated)
5369             xrnew (last, len + 1, char);
5370           allocated = len + 1;
5371           strncpy (last, cp, len);
5372           last[len] = '\0';
5373         }
5374     }
5375   free (last);
5376 }
5377
5378
5379 static void
5380 prolog_skip_comment (plb, inf)
5381      linebuffer *plb;
5382      FILE *inf;
5383 {
5384   char *cp;
5385
5386   do
5387     {
5388       for (cp = plb->buffer; *cp != '\0'; cp++)
5389         if (cp[0] == '*' && cp[1] == '/')
5390           return;
5391       readline (plb, inf);
5392     }
5393   while (!feof(inf));
5394 }
5395
5396 /*
5397  * A predicate or rule definition is added if it matches:
5398  *     <beginning of line><Prolog Atom><whitespace>(
5399  * or  <beginning of line><Prolog Atom><whitespace>:-
5400  *
5401  * It is added to the tags database if it doesn't match the
5402  * name of the previous clause header.
5403  *
5404  * Return the size of the name of the predicate or rule, or 0 if no
5405  * header was found.
5406  */
5407 static int
5408 prolog_pr (s, last)
5409      char *s;
5410      char *last;                /* Name of last clause. */
5411 {
5412   int pos;
5413   int len;
5414
5415   pos = prolog_atom (s, 0);
5416   if (pos < 1)
5417     return 0;
5418
5419   len = pos;
5420   pos = skip_spaces (s + pos) - s;
5421
5422   if ((s[pos] == '.'
5423        || (s[pos] == '(' && (pos += 1))
5424        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5425       && (last == NULL          /* save only the first clause */
5426           || len != (int)strlen (last)
5427           || !strneq (s, last, len)))
5428         {
5429           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5430           return len;
5431         }
5432   else
5433     return 0;
5434 }
5435
5436 /*
5437  * Consume a Prolog atom.
5438  * Return the number of bytes consumed, or -1 if there was an error.
5439  *
5440  * A prolog atom, in this context, could be one of:
5441  * - An alphanumeric sequence, starting with a lower case letter.
5442  * - A quoted arbitrary string. Single quotes can escape themselves.
5443  *   Backslash quotes everything.
5444  */
5445 static int
5446 prolog_atom (s, pos)
5447      char *s;
5448      int pos;
5449 {
5450   int origpos;
5451
5452   origpos = pos;
5453
5454   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5455     {
5456       /* The atom is unquoted. */
5457       pos++;
5458       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5459         {
5460           pos++;
5461         }
5462       return pos - origpos;
5463     }
5464   else if (s[pos] == '\'')
5465     {
5466       pos++;
5467
5468       for (;;)
5469         {
5470           if (s[pos] == '\'')
5471             {
5472               pos++;
5473               if (s[pos] != '\'')
5474                 break;
5475               pos++;            /* A double quote */
5476             }
5477           else if (s[pos] == '\0')
5478             /* Multiline quoted atoms are ignored. */
5479             return -1;
5480           else if (s[pos] == '\\')
5481             {
5482               if (s[pos+1] == '\0')
5483                 return -1;
5484               pos += 2;
5485             }
5486           else
5487             pos++;
5488         }
5489       return pos - origpos;
5490     }
5491   else
5492     return -1;
5493 }
5494
5495 \f
5496 /*
5497  * Support for Erlang
5498  *
5499  * Generates tags for functions, defines, and records.
5500  * Assumes that Erlang functions start at column 0.
5501  * Original code by Anders Lindgren (1996)
5502  */
5503 static int erlang_func __P((char *, char *));
5504 static void erlang_attribute __P((char *));
5505 static int erlang_atom __P((char *));
5506
5507 static void
5508 Erlang_functions (inf)
5509      FILE *inf;
5510 {
5511   char *cp, *last;
5512   int len;
5513   int allocated;
5514
5515   allocated = 0;
5516   len = 0;
5517   last = NULL;
5518
5519   LOOP_ON_INPUT_LINES (inf, lb, cp)
5520     {
5521       if (cp[0] == '\0')        /* Empty line */
5522         continue;
5523       else if (iswhite (cp[0])) /* Not function nor attribute */
5524         continue;
5525       else if (cp[0] == '%')    /* comment */
5526         continue;
5527       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5528         continue;
5529       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5530         {
5531           erlang_attribute (cp);
5532           if (last != NULL)
5533             {
5534               free (last);
5535               last = NULL;
5536             }
5537         }
5538       else if ((len = erlang_func (cp, last)) > 0)
5539         {
5540           /*
5541            * Function.  Store the function name so that we only
5542            * generates a tag for the first clause.
5543            */
5544           if (last == NULL)
5545             last = xnew (len + 1, char);
5546           else if (len + 1 > allocated)
5547             xrnew (last, len + 1, char);
5548           allocated = len + 1;
5549           strncpy (last, cp, len);
5550           last[len] = '\0';
5551         }
5552     }
5553   free (last);
5554 }
5555
5556
5557 /*
5558  * A function definition is added if it matches:
5559  *     <beginning of line><Erlang Atom><whitespace>(
5560  *
5561  * It is added to the tags database if it doesn't match the
5562  * name of the previous clause header.
5563  *
5564  * Return the size of the name of the function, or 0 if no function
5565  * was found.
5566  */
5567 static int
5568 erlang_func (s, last)
5569      char *s;
5570      char *last;                /* Name of last clause. */
5571 {
5572   int pos;
5573   int len;
5574
5575   pos = erlang_atom (s);
5576   if (pos < 1)
5577     return 0;
5578
5579   len = pos;
5580   pos = skip_spaces (s + pos) - s;
5581
5582   /* Save only the first clause. */
5583   if (s[pos++] == '('
5584       && (last == NULL
5585           || len != (int)strlen (last)
5586           || !strneq (s, last, len)))
5587         {
5588           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5589           return len;
5590         }
5591
5592   return 0;
5593 }
5594
5595
5596 /*
5597  * Handle attributes.  Currently, tags are generated for defines
5598  * and records.
5599  *
5600  * They are on the form:
5601  * -define(foo, bar).
5602  * -define(Foo(M, N), M+N).
5603  * -record(graph, {vtab = notable, cyclic = true}).
5604  */
5605 static void
5606 erlang_attribute (s)
5607      char *s;
5608 {
5609   char *cp = s;
5610
5611   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5612       && *cp++ == '(')
5613     {
5614       int len = erlang_atom (skip_spaces (cp));
5615       if (len > 0)
5616         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5617     }
5618   return;
5619 }
5620
5621
5622 /*
5623  * Consume an Erlang atom (or variable).
5624  * Return the number of bytes consumed, or -1 if there was an error.
5625  */
5626 static int
5627 erlang_atom (s)
5628      char *s;
5629 {
5630   int pos = 0;
5631
5632   if (ISALPHA (s[pos]) || s[pos] == '_')
5633     {
5634       /* The atom is unquoted. */
5635       do
5636         pos++;
5637       while (ISALNUM (s[pos]) || s[pos] == '_');
5638     }
5639   else if (s[pos] == '\'')
5640     {
5641       for (pos++; s[pos] != '\''; pos++)
5642         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5643             || (s[pos] == '\\' && s[++pos] == '\0'))
5644           return 0;
5645       pos++;
5646     }
5647
5648   return pos;
5649 }
5650
5651 \f
5652 static char *scan_separators __P((char *));
5653 static void add_regex __P((char *, language *));
5654 static char *substitute __P((char *, char *, struct re_registers *));
5655
5656 /*
5657  * Take a string like "/blah/" and turn it into "blah", verifying
5658  * that the first and last characters are the same, and handling
5659  * quoted separator characters.  Actually, stops on the occurrence of
5660  * an unquoted separator.  Also process \t, \n, etc. and turn into
5661  * appropriate characters. Works in place.  Null terminates name string.
5662  * Returns pointer to terminating separator, or NULL for
5663  * unterminated regexps.
5664  */
5665 static char *
5666 scan_separators (name)
5667      char *name;
5668 {
5669   char sep = name[0];
5670   char *copyto = name;
5671   bool quoted = FALSE;
5672
5673   for (++name; *name != '\0'; ++name)
5674     {
5675       if (quoted)
5676         {
5677           switch (*name)
5678             {
5679             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5680             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5681             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5682             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5683             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5684             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5685             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5686             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5687             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5688             default:
5689               if (*name == sep)
5690                 *copyto++ = sep;
5691               else
5692                 {
5693                   /* Something else is quoted, so preserve the quote. */
5694                   *copyto++ = '\\';
5695                   *copyto++ = *name;
5696                 }
5697               break;
5698             }
5699           quoted = FALSE;
5700         }
5701       else if (*name == '\\')
5702         quoted = TRUE;
5703       else if (*name == sep)
5704         break;
5705       else
5706         *copyto++ = *name;
5707     }
5708   if (*name != sep)
5709     name = NULL;                /* signal unterminated regexp */
5710
5711   /* Terminate copied string. */
5712   *copyto = '\0';
5713   return name;
5714 }
5715
5716 /* Look at the argument of --regex or --no-regex and do the right
5717    thing.  Same for each line of a regexp file. */
5718 static void
5719 analyse_regex (regex_arg)
5720      char *regex_arg;
5721 {
5722   if (regex_arg == NULL)
5723     {
5724       free_regexps ();          /* --no-regex: remove existing regexps */
5725       return;
5726     }
5727
5728   /* A real --regexp option or a line in a regexp file. */
5729   switch (regex_arg[0])
5730     {
5731       /* Comments in regexp file or null arg to --regex. */
5732     case '\0':
5733     case ' ':
5734     case '\t':
5735       break;
5736
5737       /* Read a regex file.  This is recursive and may result in a
5738          loop, which will stop when the file descriptors are exhausted. */
5739     case '@':
5740       {
5741         FILE *regexfp;
5742         linebuffer regexbuf;
5743         char *regexfile = regex_arg + 1;
5744
5745         /* regexfile is a file containing regexps, one per line. */
5746         regexfp = fopen (regexfile, "r");
5747         if (regexfp == NULL)
5748           {
5749             pfatal (regexfile);
5750             return;
5751           }
5752         linebuffer_init (&regexbuf);
5753         while (readline_internal (&regexbuf, regexfp) > 0)
5754           analyse_regex (regexbuf.buffer);
5755         free (regexbuf.buffer);
5756         fclose (regexfp);
5757       }
5758       break;
5759
5760       /* Regexp to be used for a specific language only. */
5761     case '{':
5762       {
5763         language *lang;
5764         char *lang_name = regex_arg + 1;
5765         char *cp;
5766
5767         for (cp = lang_name; *cp != '}'; cp++)
5768           if (*cp == '\0')
5769             {
5770               error ("unterminated language name in regex: %s", regex_arg);
5771               return;
5772             }
5773         *cp++ = '\0';
5774         lang = get_language_from_langname (lang_name);
5775         if (lang == NULL)
5776           return;
5777         add_regex (cp, lang);
5778       }
5779       break;
5780
5781       /* Regexp to be used for any language. */
5782     default:
5783       add_regex (regex_arg, NULL);
5784       break;
5785     }
5786 }
5787
5788 /* Separate the regexp pattern, compile it,
5789    and care for optional name and modifiers. */
5790 static void
5791 add_regex (regexp_pattern, lang)
5792      char *regexp_pattern;
5793      language *lang;
5794 {
5795   static struct re_pattern_buffer zeropattern;
5796   char sep, *pat, *name, *modifiers;
5797   const char *err;
5798   struct re_pattern_buffer *patbuf;
5799   regexp *rp;
5800   bool
5801     force_explicit_name = TRUE, /* do not use implicit tag names */
5802     ignore_case = FALSE,        /* case is significant */
5803     multi_line = FALSE,         /* matches are done one line at a time */
5804     single_line = FALSE;        /* dot does not match newline */
5805
5806
5807   if (strlen(regexp_pattern) < 3)
5808     {
5809       error ("null regexp", (char *)NULL);
5810       return;
5811     }
5812   sep = regexp_pattern[0];
5813   name = scan_separators (regexp_pattern);
5814   if (name == NULL)
5815     {
5816       error ("%s: unterminated regexp", regexp_pattern);
5817       return;
5818     }
5819   if (name[1] == sep)
5820     {
5821       error ("null name for regexp \"%s\"", regexp_pattern);
5822       return;
5823     }
5824   modifiers = scan_separators (name);
5825   if (modifiers == NULL)        /* no terminating separator --> no name */
5826     {
5827       modifiers = name;
5828       name = "";
5829     }
5830   else
5831     modifiers += 1;             /* skip separator */
5832
5833   /* Parse regex modifiers. */
5834   for (; modifiers[0] != '\0'; modifiers++)
5835     switch (modifiers[0])
5836       {
5837       case 'N':
5838         if (modifiers == name)
5839           error ("forcing explicit tag name but no name, ignoring", NULL);
5840         force_explicit_name = TRUE;
5841         break;
5842       case 'i':
5843         ignore_case = TRUE;
5844         break;
5845       case 's':
5846         single_line = TRUE;
5847         /* FALLTHRU */
5848       case 'm':
5849         multi_line = TRUE;
5850         need_filebuf = TRUE;
5851         break;
5852       default:
5853         {
5854           char wrongmod [2];
5855           wrongmod[0] = modifiers[0];
5856           wrongmod[1] = '\0';
5857           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5858         }
5859         break;
5860       }
5861
5862   patbuf = xnew (1, struct re_pattern_buffer);
5863   *patbuf = zeropattern;
5864   if (ignore_case)
5865     {
5866       static char lc_trans[CHARS];
5867       int i;
5868       for (i = 0; i < CHARS; i++)
5869         lc_trans[i] = lowcase (i);
5870       patbuf->translate = lc_trans;     /* translation table to fold case  */
5871     }
5872
5873   if (multi_line)
5874     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5875   else
5876     pat = regexp_pattern;
5877
5878   if (single_line)
5879     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5880   else
5881     re_set_syntax (RE_SYNTAX_EMACS);
5882
5883   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5884   if (multi_line)
5885     free (pat);
5886   if (err != NULL)
5887     {
5888       error ("%s while compiling pattern", err);
5889       return;
5890     }
5891
5892   rp = p_head;
5893   p_head = xnew (1, regexp);
5894   p_head->pattern = savestr (regexp_pattern);
5895   p_head->p_next = rp;
5896   p_head->lang = lang;
5897   p_head->pat = patbuf;
5898   p_head->name = savestr (name);
5899   p_head->error_signaled = FALSE;
5900   p_head->force_explicit_name = force_explicit_name;
5901   p_head->ignore_case = ignore_case;
5902   p_head->multi_line = multi_line;
5903 }
5904
5905 /*
5906  * Do the substitutions indicated by the regular expression and
5907  * arguments.
5908  */
5909 static char *
5910 substitute (in, out, regs)
5911      char *in, *out;
5912      struct re_registers *regs;
5913 {
5914   char *result, *t;
5915   int size, dig, diglen;
5916
5917   result = NULL;
5918   size = strlen (out);
5919
5920   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5921   if (out[size - 1] == '\\')
5922     fatal ("pattern error in \"%s\"", out);
5923   for (t = etags_strchr (out, '\\');
5924        t != NULL;
5925        t = etags_strchr (t + 2, '\\'))
5926     if (ISDIGIT (t[1]))
5927       {
5928         dig = t[1] - '0';
5929         diglen = regs->end[dig] - regs->start[dig];
5930         size += diglen - 2;
5931       }
5932     else
5933       size -= 1;
5934
5935   /* Allocate space and do the substitutions. */
5936   assert (size >= 0);
5937   result = xnew (size + 1, char);
5938
5939   for (t = result; *out != '\0'; out++)
5940     if (*out == '\\' && ISDIGIT (*++out))
5941       {
5942         dig = *out - '0';
5943         diglen = regs->end[dig] - regs->start[dig];
5944         strncpy (t, in + regs->start[dig], diglen);
5945         t += diglen;
5946       }
5947     else
5948       *t++ = *out;
5949   *t = '\0';
5950
5951   assert (t <= result + size);
5952   assert (t - result == (int)strlen (result));
5953
5954   return result;
5955 }
5956
5957 /* Deallocate all regexps. */
5958 static void
5959 free_regexps ()
5960 {
5961   regexp *rp;
5962   while (p_head != NULL)
5963     {
5964       rp = p_head->p_next;
5965       free (p_head->pattern);
5966       free (p_head->name);
5967       free (p_head);
5968       p_head = rp;
5969     }
5970   return;
5971 }
5972
5973 /*
5974  * Reads the whole file as a single string from `filebuf' and looks for
5975  * multi-line regular expressions, creating tags on matches.
5976  * readline already dealt with normal regexps.
5977  *
5978  * Idea by Ben Wing <ben@666.com> (2002).
5979  */
5980 static void
5981 regex_tag_multiline ()
5982 {
5983   char *buffer = filebuf.buffer;
5984   regexp *rp;
5985   char *name;
5986
5987   for (rp = p_head; rp != NULL; rp = rp->p_next)
5988     {
5989       int match = 0;
5990
5991       if (!rp->multi_line)
5992         continue;               /* skip normal regexps */
5993
5994       /* Generic initialisations before parsing file from memory. */
5995       lineno = 1;               /* reset global line number */
5996       charno = 0;               /* reset global char number */
5997       linecharno = 0;           /* reset global char number of line start */
5998
5999       /* Only use generic regexps or those for the current language. */
6000       if (rp->lang != NULL && rp->lang != curfdp->lang)
6001         continue;
6002
6003       while (match >= 0 && match < filebuf.len)
6004         {
6005           match = re_search (rp->pat, buffer, filebuf.len, charno,
6006                              filebuf.len - match, &rp->regs);
6007           switch (match)
6008             {
6009             case -2:
6010               /* Some error. */
6011               if (!rp->error_signaled)
6012                 {
6013                   error ("regexp stack overflow while matching \"%s\"",
6014                          rp->pattern);
6015                   rp->error_signaled = TRUE;
6016                 }
6017               break;
6018             case -1:
6019               /* No match. */
6020               break;
6021             default:
6022               if (match == rp->regs.end[0])
6023                 {
6024                   if (!rp->error_signaled)
6025                     {
6026                       error ("regexp matches the empty string: \"%s\"",
6027                              rp->pattern);
6028                       rp->error_signaled = TRUE;
6029                     }
6030                   match = -3;   /* exit from while loop */
6031                   break;
6032                 }
6033
6034               /* Match occurred.  Construct a tag. */
6035               while (charno < rp->regs.end[0])
6036                 if (buffer[charno++] == '\n')
6037                   lineno++, linecharno = charno;
6038               name = rp->name;
6039               if (name[0] == '\0')
6040                 name = NULL;
6041               else /* make a named tag */
6042                 name = substitute (buffer, rp->name, &rp->regs);
6043               if (rp->force_explicit_name)
6044                 /* Force explicit tag name, if a name is there. */
6045                 pfnote (name, TRUE, buffer + linecharno,
6046                         charno - linecharno + 1, lineno, linecharno);
6047               else
6048                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6049                           charno - linecharno + 1, lineno, linecharno);
6050               break;
6051             }
6052         }
6053     }
6054 }
6055
6056 \f
6057 static bool
6058 nocase_tail (cp)
6059      char *cp;
6060 {
6061   register int len = 0;
6062
6063   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6064     cp++, len++;
6065   if (*cp == '\0' && !intoken (dbp[len]))
6066     {
6067       dbp += len;
6068       return TRUE;
6069     }
6070   return FALSE;
6071 }
6072
6073 static void
6074 get_tag (bp, namepp)
6075      register char *bp;
6076      char **namepp;
6077 {
6078   register char *cp = bp;
6079
6080   if (*bp != '\0')
6081     {
6082       /* Go till you get to white space or a syntactic break */
6083       for (cp = bp + 1; !notinname (*cp); cp++)
6084         continue;
6085       make_tag (bp, cp - bp, TRUE,
6086                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6087     }
6088
6089   if (namepp != NULL)
6090     *namepp = savenstr (bp, cp - bp);
6091 }
6092
6093 /*
6094  * Read a line of text from `stream' into `lbp', excluding the
6095  * newline or CR-NL, if any.  Return the number of characters read from
6096  * `stream', which is the length of the line including the newline.
6097  *
6098  * On DOS or Windows we do not count the CR character, if any before the
6099  * NL, in the returned length; this mirrors the behavior of Emacs on those
6100  * platforms (for text files, it translates CR-NL to NL as it reads in the
6101  * file).
6102  *
6103  * If multi-line regular expressions are requested, each line read is
6104  * appended to `filebuf'.
6105  */
6106 static long
6107 readline_internal (lbp, stream)
6108      linebuffer *lbp;
6109      register FILE *stream;
6110 {
6111   char *buffer = lbp->buffer;
6112   register char *p = lbp->buffer;
6113   register char *pend;
6114   int chars_deleted;
6115
6116   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6117
6118   for (;;)
6119     {
6120       register int c = getc (stream);
6121       if (p == pend)
6122         {
6123           /* We're at the end of linebuffer: expand it. */
6124           lbp->size *= 2;
6125           xrnew (buffer, lbp->size, char);
6126           p += buffer - lbp->buffer;
6127           pend = buffer + lbp->size;
6128           lbp->buffer = buffer;
6129         }
6130       if (c == EOF)
6131         {
6132           *p = '\0';
6133           chars_deleted = 0;
6134           break;
6135         }
6136       if (c == '\n')
6137         {
6138           if (p > buffer && p[-1] == '\r')
6139             {
6140               p -= 1;
6141 #ifdef DOS_NT
6142              /* Assume CRLF->LF translation will be performed by Emacs
6143                 when loading this file, so CRs won't appear in the buffer.
6144                 It would be cleaner to compensate within Emacs;
6145                 however, Emacs does not know how many CRs were deleted
6146                 before any given point in the file.  */
6147               chars_deleted = 1;
6148 #else
6149               chars_deleted = 2;
6150 #endif
6151             }
6152           else
6153             {
6154               chars_deleted = 1;
6155             }
6156           *p = '\0';
6157           break;
6158         }
6159       *p++ = c;
6160     }
6161   lbp->len = p - buffer;
6162
6163   if (need_filebuf              /* we need filebuf for multi-line regexps */
6164       && chars_deleted > 0)     /* not at EOF */
6165     {
6166       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6167         {
6168           /* Expand filebuf. */
6169           filebuf.size *= 2;
6170           xrnew (filebuf.buffer, filebuf.size, char);
6171         }
6172       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6173       filebuf.len += lbp->len;
6174       filebuf.buffer[filebuf.len++] = '\n';
6175       filebuf.buffer[filebuf.len] = '\0';
6176     }
6177
6178   return lbp->len + chars_deleted;
6179 }
6180
6181 /*
6182  * Like readline_internal, above, but in addition try to match the
6183  * input line against relevant regular expressions and manage #line
6184  * directives.
6185  */
6186 static void
6187 readline (lbp, stream)
6188      linebuffer *lbp;
6189      FILE *stream;
6190 {
6191   long result;
6192
6193   linecharno = charno;          /* update global char number of line start */
6194   result = readline_internal (lbp, stream); /* read line */
6195   lineno += 1;                  /* increment global line number */
6196   charno += result;             /* increment global char number */
6197
6198   /* Honour #line directives. */
6199   if (!no_line_directive)
6200     {
6201       static bool discard_until_line_directive;
6202
6203       /* Check whether this is a #line directive. */
6204       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6205         {
6206           unsigned int lno;
6207           int start = 0;
6208
6209           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6210               && start > 0)     /* double quote character found */
6211             {
6212               char *endp = lbp->buffer + start;
6213
6214               while ((endp = etags_strchr (endp, '"')) != NULL
6215                      && endp[-1] == '\\')
6216                 endp++;
6217               if (endp != NULL)
6218                 /* Ok, this is a real #line directive.  Let's deal with it. */
6219                 {
6220                   char *taggedabsname;  /* absolute name of original file */
6221                   char *taggedfname;    /* name of original file as given */
6222                   char *name;           /* temp var */
6223
6224                   discard_until_line_directive = FALSE; /* found it */
6225                   name = lbp->buffer + start;
6226                   *endp = '\0';
6227                   canonicalize_filename (name);
6228                   taggedabsname = absolute_filename (name, tagfiledir);
6229                   if (filename_is_absolute (name)
6230                       || filename_is_absolute (curfdp->infname))
6231                     taggedfname = savestr (taggedabsname);
6232                   else
6233                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6234
6235                   if (streq (curfdp->taggedfname, taggedfname))
6236                     /* The #line directive is only a line number change.  We
6237                        deal with this afterwards. */
6238                     free (taggedfname);
6239                   else
6240                     /* The tags following this #line directive should be
6241                        attributed to taggedfname.  In order to do this, set
6242                        curfdp accordingly. */
6243                     {
6244                       fdesc *fdp; /* file description pointer */
6245
6246                       /* Go look for a file description already set up for the
6247                          file indicated in the #line directive.  If there is
6248                          one, use it from now until the next #line
6249                          directive. */
6250                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6251                         if (streq (fdp->infname, curfdp->infname)
6252                             && streq (fdp->taggedfname, taggedfname))
6253                           /* If we remove the second test above (after the &&)
6254                              then all entries pertaining to the same file are
6255                              coalesced in the tags file.  If we use it, then
6256                              entries pertaining to the same file but generated
6257                              from different files (via #line directives) will
6258                              go into separate sections in the tags file.  These
6259                              alternatives look equivalent.  The first one
6260                              destroys some apparently useless information. */
6261                           {
6262                             curfdp = fdp;
6263                             free (taggedfname);
6264                             break;
6265                           }
6266                       /* Else, if we already tagged the real file, skip all
6267                          input lines until the next #line directive. */
6268                       if (fdp == NULL) /* not found */
6269                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6270                           if (streq (fdp->infabsname, taggedabsname))
6271                             {
6272                               discard_until_line_directive = TRUE;
6273                               free (taggedfname);
6274                               break;
6275                             }
6276                       /* Else create a new file description and use that from
6277                          now on, until the next #line directive. */
6278                       if (fdp == NULL) /* not found */
6279                         {
6280                           fdp = fdhead;
6281                           fdhead = xnew (1, fdesc);
6282                           *fdhead = *curfdp; /* copy curr. file description */
6283                           fdhead->next = fdp;
6284                           fdhead->infname = savestr (curfdp->infname);
6285                           fdhead->infabsname = savestr (curfdp->infabsname);
6286                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6287                           fdhead->taggedfname = taggedfname;
6288                           fdhead->usecharno = FALSE;
6289                           fdhead->prop = NULL;
6290                           fdhead->written = FALSE;
6291                           curfdp = fdhead;
6292                         }
6293                     }
6294                   free (taggedabsname);
6295                   lineno = lno - 1;
6296                   readline (lbp, stream);
6297                   return;
6298                 } /* if a real #line directive */
6299             } /* if #line is followed by a a number */
6300         } /* if line begins with "#line " */
6301
6302       /* If we are here, no #line directive was found. */
6303       if (discard_until_line_directive)
6304         {
6305           if (result > 0)
6306             {
6307               /* Do a tail recursion on ourselves, thus discarding the contents
6308                  of the line buffer. */
6309               readline (lbp, stream);
6310               return;
6311             }
6312           /* End of file. */
6313           discard_until_line_directive = FALSE;
6314           return;
6315         }
6316     } /* if #line directives should be considered */
6317
6318   {
6319     int match;
6320     regexp *rp;
6321     char *name;
6322
6323     /* Match against relevant regexps. */
6324     if (lbp->len > 0)
6325       for (rp = p_head; rp != NULL; rp = rp->p_next)
6326         {
6327           /* Only use generic regexps or those for the current language.
6328              Also do not use multiline regexps, which is the job of
6329              regex_tag_multiline. */
6330           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6331               || rp->multi_line)
6332             continue;
6333
6334           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6335           switch (match)
6336             {
6337             case -2:
6338               /* Some error. */
6339               if (!rp->error_signaled)
6340                 {
6341                   error ("regexp stack overflow while matching \"%s\"",
6342                          rp->pattern);
6343                   rp->error_signaled = TRUE;
6344                 }
6345               break;
6346             case -1:
6347               /* No match. */
6348               break;
6349             case 0:
6350               /* Empty string matched. */
6351               if (!rp->error_signaled)
6352                 {
6353                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6354                   rp->error_signaled = TRUE;
6355                 }
6356               break;
6357             default:
6358               /* Match occurred.  Construct a tag. */
6359               name = rp->name;
6360               if (name[0] == '\0')
6361                 name = NULL;
6362               else /* make a named tag */
6363                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6364               if (rp->force_explicit_name)
6365                 /* Force explicit tag name, if a name is there. */
6366                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6367               else
6368                 make_tag (name, strlen (name), TRUE,
6369                           lbp->buffer, match, lineno, linecharno);
6370               break;
6371             }
6372         }
6373   }
6374 }
6375
6376 \f
6377 /*
6378  * Return a pointer to a space of size strlen(cp)+1 allocated
6379  * with xnew where the string CP has been copied.
6380  */
6381 static char *
6382 savestr (cp)
6383      char *cp;
6384 {
6385   return savenstr (cp, strlen (cp));
6386 }
6387
6388 /*
6389  * Return a pointer to a space of size LEN+1 allocated with xnew where
6390  * the string CP has been copied for at most the first LEN characters.
6391  */
6392 static char *
6393 savenstr (cp, len)
6394      char *cp;
6395      int len;
6396 {
6397   register char *dp;
6398
6399   dp = xnew (len + 1, char);
6400   strncpy (dp, cp, len);
6401   dp[len] = '\0';
6402   return dp;
6403 }
6404
6405 /*
6406  * Return the ptr in sp at which the character c last
6407  * appears; NULL if not found
6408  *
6409  * Identical to POSIX strrchr, included for portability.
6410  */
6411 static char *
6412 etags_strrchr (sp, c)
6413      register const char *sp;
6414      register int c;
6415 {
6416   register const char *r;
6417
6418   r = NULL;
6419   do
6420     {
6421       if (*sp == c)
6422         r = sp;
6423   } while (*sp++);
6424   return (char *)r;
6425 }
6426
6427 /*
6428  * Return the ptr in sp at which the character c first
6429  * appears; NULL if not found
6430  *
6431  * Identical to POSIX strchr, included for portability.
6432  */
6433 static char *
6434 etags_strchr (sp, c)
6435      register const char *sp;
6436      register int c;
6437 {
6438   do
6439     {
6440       if (*sp == c)
6441         return (char *)sp;
6442     } while (*sp++);
6443   return NULL;
6444 }
6445
6446 /*
6447  * Compare two strings, ignoring case for alphabetic characters.
6448  *
6449  * Same as BSD's strcasecmp, included for portability.
6450  */
6451 static int
6452 etags_strcasecmp (s1, s2)
6453      register const char *s1;
6454      register const char *s2;
6455 {
6456   while (*s1 != '\0'
6457          && (ISALPHA (*s1) && ISALPHA (*s2)
6458              ? lowcase (*s1) == lowcase (*s2)
6459              : *s1 == *s2))
6460     s1++, s2++;
6461
6462   return (ISALPHA (*s1) && ISALPHA (*s2)
6463           ? lowcase (*s1) - lowcase (*s2)
6464           : *s1 - *s2);
6465 }
6466
6467 /*
6468  * Compare two strings, ignoring case for alphabetic characters.
6469  * Stop after a given number of characters
6470  *
6471  * Same as BSD's strncasecmp, included for portability.
6472  */
6473 static int
6474 etags_strncasecmp (s1, s2, n)
6475      register const char *s1;
6476      register const char *s2;
6477      register int n;
6478 {
6479   while (*s1 != '\0' && n-- > 0
6480          && (ISALPHA (*s1) && ISALPHA (*s2)
6481              ? lowcase (*s1) == lowcase (*s2)
6482              : *s1 == *s2))
6483     s1++, s2++;
6484
6485   if (n < 0)
6486     return 0;
6487   else
6488     return (ISALPHA (*s1) && ISALPHA (*s2)
6489             ? lowcase (*s1) - lowcase (*s2)
6490             : *s1 - *s2);
6491 }
6492
6493 /* Skip spaces (end of string is not space), return new pointer. */
6494 static char *
6495 skip_spaces (cp)
6496      char *cp;
6497 {
6498   while (iswhite (*cp))
6499     cp++;
6500   return cp;
6501 }
6502
6503 /* Skip non spaces, except end of string, return new pointer. */
6504 static char *
6505 skip_non_spaces (cp)
6506      char *cp;
6507 {
6508   while (*cp != '\0' && !iswhite (*cp))
6509     cp++;
6510   return cp;
6511 }
6512
6513 /* Print error message and exit.  */
6514 void
6515 fatal (s1, s2)
6516      char *s1, *s2;
6517 {
6518   error (s1, s2);
6519   exit (EXIT_FAILURE);
6520 }
6521
6522 static void
6523 pfatal (s1)
6524      char *s1;
6525 {
6526   perror (s1);
6527   exit (EXIT_FAILURE);
6528 }
6529
6530 static void
6531 suggest_asking_for_help ()
6532 {
6533   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6534            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6535   exit (EXIT_FAILURE);
6536 }
6537
6538 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6539 static void
6540 error (s1, s2)
6541      const char *s1, *s2;
6542 {
6543   fprintf (stderr, "%s: ", progname);
6544   fprintf (stderr, s1, s2);
6545   fprintf (stderr, "\n");
6546 }
6547
6548 /* Return a newly-allocated string whose contents
6549    concatenate those of s1, s2, s3.  */
6550 static char *
6551 concat (s1, s2, s3)
6552      char *s1, *s2, *s3;
6553 {
6554   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6555   char *result = xnew (len1 + len2 + len3 + 1, char);
6556
6557   strcpy (result, s1);
6558   strcpy (result + len1, s2);
6559   strcpy (result + len1 + len2, s3);
6560   result[len1 + len2 + len3] = '\0';
6561
6562   return result;
6563 }
6564
6565 \f
6566 /* Does the same work as the system V getcwd, but does not need to
6567    guess the buffer size in advance. */
6568 static char *
6569 etags_getcwd ()
6570 {
6571 #ifdef HAVE_GETCWD
6572   int bufsize = 200;
6573   char *path = xnew (bufsize, char);
6574
6575   while (getcwd (path, bufsize) == NULL)
6576     {
6577       if (errno != ERANGE)
6578         pfatal ("getcwd");
6579       bufsize *= 2;
6580       free (path);
6581       path = xnew (bufsize, char);
6582     }
6583
6584   canonicalize_filename (path);
6585   return path;
6586
6587 #else /* not HAVE_GETCWD */
6588 #if MSDOS
6589
6590   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6591
6592   getwd (path);
6593
6594   for (p = path; *p != '\0'; p++)
6595     if (*p == '\\')
6596       *p = '/';
6597     else
6598       *p = lowcase (*p);
6599
6600   return strdup (path);
6601 #else /* not MSDOS */
6602   linebuffer path;
6603   FILE *pipe;
6604
6605   linebuffer_init (&path);
6606   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6607   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6608     pfatal ("pwd");
6609   pclose (pipe);
6610
6611   return path.buffer;
6612 #endif /* not MSDOS */
6613 #endif /* not HAVE_GETCWD */
6614 }
6615
6616 /* Return a newly allocated string containing the file name of FILE
6617    relative to the absolute directory DIR (which should end with a slash). */
6618 static char *
6619 relative_filename (file, dir)
6620      char *file, *dir;
6621 {
6622   char *fp, *dp, *afn, *res;
6623   int i;
6624
6625   /* Find the common root of file and dir (with a trailing slash). */
6626   afn = absolute_filename (file, cwd);
6627   fp = afn;
6628   dp = dir;
6629   while (*fp++ == *dp++)
6630     continue;
6631   fp--, dp--;                   /* back to the first differing char */
6632 #ifdef DOS_NT
6633   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6634     return afn;
6635 #endif
6636   do                            /* look at the equal chars until '/' */
6637     fp--, dp--;
6638   while (*fp != '/');
6639
6640   /* Build a sequence of "../" strings for the resulting relative file name. */
6641   i = 0;
6642   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6643     i += 1;
6644   res = xnew (3*i + strlen (fp + 1) + 1, char);
6645   res[0] = '\0';
6646   while (i-- > 0)
6647     strcat (res, "../");
6648
6649   /* Add the file name relative to the common root of file and dir. */
6650   strcat (res, fp + 1);
6651   free (afn);
6652
6653   return res;
6654 }
6655
6656 /* Return a newly allocated string containing the absolute file name
6657    of FILE given DIR (which should end with a slash). */
6658 static char *
6659 absolute_filename (file, dir)
6660      char *file, *dir;
6661 {
6662   char *slashp, *cp, *res;
6663
6664   if (filename_is_absolute (file))
6665     res = savestr (file);
6666 #ifdef DOS_NT
6667   /* We don't support non-absolute file names with a drive
6668      letter, like `d:NAME' (it's too much hassle).  */
6669   else if (file[1] == ':')
6670     fatal ("%s: relative file names with drive letters not supported", file);
6671 #endif
6672   else
6673     res = concat (dir, file, "");
6674
6675   /* Delete the "/dirname/.." and "/." substrings. */
6676   slashp = etags_strchr (res, '/');
6677   while (slashp != NULL && slashp[0] != '\0')
6678     {
6679       if (slashp[1] == '.')
6680         {
6681           if (slashp[2] == '.'
6682               && (slashp[3] == '/' || slashp[3] == '\0'))
6683             {
6684               cp = slashp;
6685               do
6686                 cp--;
6687               while (cp >= res && !filename_is_absolute (cp));
6688               if (cp < res)
6689                 cp = slashp;    /* the absolute name begins with "/.." */
6690 #ifdef DOS_NT
6691               /* Under MSDOS and NT we get `d:/NAME' as absolute
6692                  file name, so the luser could say `d:/../NAME'.
6693                  We silently treat this as `d:/NAME'.  */
6694               else if (cp[0] != '/')
6695                 cp = slashp;
6696 #endif
6697               strcpy (cp, slashp + 3);
6698               slashp = cp;
6699               continue;
6700             }
6701           else if (slashp[2] == '/' || slashp[2] == '\0')
6702             {
6703               strcpy (slashp, slashp + 2);
6704               continue;
6705             }
6706         }
6707
6708       slashp = etags_strchr (slashp + 1, '/');
6709     }
6710
6711   if (res[0] == '\0')           /* just a safety net: should never happen */
6712     {
6713       free (res);
6714       return savestr ("/");
6715     }
6716   else
6717     return res;
6718 }
6719
6720 /* Return a newly allocated string containing the absolute
6721    file name of dir where FILE resides given DIR (which should
6722    end with a slash). */
6723 static char *
6724 absolute_dirname (file, dir)
6725      char *file, *dir;
6726 {
6727   char *slashp, *res;
6728   char save;
6729
6730   slashp = etags_strrchr (file, '/');
6731   if (slashp == NULL)
6732     return savestr (dir);
6733   save = slashp[1];
6734   slashp[1] = '\0';
6735   res = absolute_filename (file, dir);
6736   slashp[1] = save;
6737
6738   return res;
6739 }
6740
6741 /* Whether the argument string is an absolute file name.  The argument
6742    string must have been canonicalized with canonicalize_filename. */
6743 static bool
6744 filename_is_absolute (fn)
6745      char *fn;
6746 {
6747   return (fn[0] == '/'
6748 #ifdef DOS_NT
6749           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6750 #endif
6751           );
6752 }
6753
6754 /* Upcase DOS drive letter and collapse separators into single slashes.
6755    Works in place. */
6756 static void
6757 canonicalize_filename (fn)
6758      register char *fn;
6759 {
6760   register char* cp;
6761   char sep = '/';
6762
6763 #ifdef DOS_NT
6764   /* Canonicalize drive letter case.  */
6765   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6766     fn[0] = upcase (fn[0]);
6767
6768   sep = '\\';
6769 #endif
6770
6771   /* Collapse multiple separators into a single slash. */
6772   for (cp = fn; *cp != '\0'; cp++, fn++)
6773     if (*cp == sep)
6774       {
6775         *fn = '/';
6776         while (cp[1] == sep)
6777           cp++;
6778       }
6779     else
6780       *fn = *cp;
6781   *fn = '\0';
6782 }
6783
6784 \f
6785 /* Initialize a linebuffer for use. */
6786 static void
6787 linebuffer_init (lbp)
6788      linebuffer *lbp;
6789 {
6790   lbp->size = (DEBUG) ? 3 : 200;
6791   lbp->buffer = xnew (lbp->size, char);
6792   lbp->buffer[0] = '\0';
6793   lbp->len = 0;
6794 }
6795
6796 /* Set the minimum size of a string contained in a linebuffer. */
6797 static void
6798 linebuffer_setlen (lbp, toksize)
6799      linebuffer *lbp;
6800      int toksize;
6801 {
6802   while (lbp->size <= toksize)
6803     {
6804       lbp->size *= 2;
6805       xrnew (lbp->buffer, lbp->size, char);
6806     }
6807   lbp->len = toksize;
6808 }
6809
6810 /* Like malloc but get fatal error if memory is exhausted. */
6811 static PTR
6812 xmalloc (size)
6813      unsigned int size;
6814 {
6815   PTR result = (PTR) malloc (size);
6816   if (result == NULL)
6817     fatal ("virtual memory exhausted", (char *)NULL);
6818   return result;
6819 }
6820
6821 static PTR
6822 xrealloc (ptr, size)
6823      char *ptr;
6824      unsigned int size;
6825 {
6826   PTR result = (PTR) realloc (ptr, size);
6827   if (result == NULL)
6828     fatal ("virtual memory exhausted", (char *)NULL);
6829   return result;
6830 }
6831
6832 /*
6833  * Local Variables:
6834  * indent-tabs-mode: t
6835  * tab-width: 8
6836  * fill-column: 79
6837  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6838  * c-file-style: "gnu"
6839  * End:
6840  */
6841
6842 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6843    (do not change this comment) */
6844
6845 /* etags.c ends here */