lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
  32   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  33   Free Software Foundation, Inc.
  34
  35 This file is not considered part of GNU Emacs.
  36
  37 This program is free software: you can redistribute it and/or modify
  38 it under the terms of the GNU General Public License as published by
  39 the Free Software Foundation, either version 3 of the License, or
  40 (at your option) any later version.
  41
  42 This program is distributed in the hope that it will be useful,
  43 but WITHOUT ANY WARRANTY; without even the implied warranty of
  44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  45 GNU General Public License for more details.
  46
  47 You should have received a copy of the GNU General Public License
  48 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  49
  50
  51 /* NB To comply with the above BSD license, copyright information is
  52 reproduced in etc/ETAGS.README.  That file should be updated when the
  53 above notices are.
  54
  55 To the best of our knowledge, this code was originally based on the
  56 ctags.c distributed with BSD4.2, which was copyrighted by the
  57 University of California, as described above. */
  58
  59
  60 /*
  61  * Authors:
  62  * 1983 Ctags originally by Ken Arnold.
  63  * 1984 Fortran added by Jim Kleckner.
  64  * 1984 Ed Pelegri-Llopart added C typedefs.
  65  * 1985 Emacs TAGS format by Richard Stallman.
  66  * 1989 Sam Kendall added C++.
  67  * 1992 Joseph B. Wells improved C and C++ parsing.
  68  * 1993 Francesco Potortì reorganized C and C++.
  69  * 1994 Line-by-line regexp tags by Tom Tromey.
  70  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  71  * 2002 #line directives by Francesco Potortì.
  72  *
  73  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  74  */
  75
  76 /*
  77  * If you want to add support for a new language, start by looking at the LUA
  78  * language, which is the simplest.  Alternatively, consider shipping a
  79  * configuration file containing regexp definitions for etags.
  80  */
  81
  82 char pot_etags_version[] = "@(#) pot revision number is 17.38";
  83
  84 #define TRUE    1
  85 #define FALSE   0
  86
  87 #ifdef DEBUG
  88 #  undef DEBUG
  89 #  define DEBUG TRUE
  90 #else
  91 #  define DEBUG  FALSE
  92 #  define NDEBUG                /* disable assert */
  93 #endif
  94
  95 #ifdef HAVE_CONFIG_H
  96 # include <config.h>
  97   /* On some systems, Emacs defines static as nothing for the sake
  98      of unexec.  We don't want that here since we don't use unexec. */
  99 # undef static
 100 # ifndef PTR                    /* for XEmacs */
 101 #   define PTR void *
 102 # endif
 103 # ifndef __P                    /* for XEmacs */
 104 #   define __P(args) args
 105 # endif
 106 #else  /* no config.h */
 107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 108 #   define __P(args) args       /* use prototypes */
 109 #   define PTR void *           /* for generic pointers */
 110 # else /* not standard C */
 111 #   define __P(args) ()         /* no prototypes */
 112 #   define const                /* remove const for old compilers' sake */
 113 #   define PTR long *           /* don't use void* */
 114 # endif
 115 #endif /* !HAVE_CONFIG_H */
 116
 117 #ifndef _GNU_SOURCE
 118 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 119 #endif
 120
 121 /* WIN32_NATIVE is for XEmacs.
 122    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 123 #ifdef WIN32_NATIVE
 124 # undef MSDOS
 125 # undef  WINDOWSNT
 126 # define WINDOWSNT
 127 #endif /* WIN32_NATIVE */
 128
 129 #ifdef MSDOS
 130 # undef MSDOS
 131 # define MSDOS TRUE
 132 # include <fcntl.h>
 133 # include <sys/param.h>
 134 # include <io.h>
 135 # ifndef HAVE_CONFIG_H
 136 #   define DOS_NT
 137 #   include <sys/config.h>
 138 # endif
 139 #else
 140 # define MSDOS FALSE
 141 #endif /* MSDOS */
 142
 143 #ifdef WINDOWSNT
 144 # include <stdlib.h>
 145 # include <fcntl.h>
 146 # include <string.h>
 147 # include <direct.h>
 148 # include <io.h>
 149 # define MAXPATHLEN _MAX_PATH
 150 # undef HAVE_NTGUI
 151 # undef  DOS_NT
 152 # define DOS_NT
 153 # ifndef HAVE_GETCWD
 154 #   define HAVE_GETCWD
 155 # endif /* undef HAVE_GETCWD */
 156 #else /* not WINDOWSNT */
 157 # ifdef STDC_HEADERS
 158 #  include <stdlib.h>
 159 #  include <string.h>
 160 # else /* no standard C headers */
 161    extern char *getenv __P((const char *));
 162    extern char *strcpy __P((char *, const char *));
 163    extern char *strncpy __P((char *, const char *, unsigned long));
 164    extern char *strcat __P((char *, const char *));
 165    extern char *strncat __P((char *, const char *, unsigned long));
 166    extern int strcmp __P((const char *, const char *));
 167    extern int strncmp __P((const char *, const char *, unsigned long));
 168    extern int system __P((const char *));
 169    extern unsigned long strlen __P((const char *));
 170    extern void *malloc __P((unsigned long));
 171    extern void *realloc __P((void *, unsigned long));
 172    extern void exit __P((int));
 173    extern void free __P((void *));
 174    extern void *memmove __P((void *, const void *, unsigned long));
 175 #  define EXIT_SUCCESS  0
 176 #  define EXIT_FAILURE  1
 177 # endif
 178 #endif /* !WINDOWSNT */
 179
 180 #ifdef HAVE_UNISTD_H
 181 # include <unistd.h>
 182 #else
 183 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 184     extern char *getcwd (char *buf, size_t size);
 185 # endif
 186 #endif /* HAVE_UNISTD_H */
 187
 188 #include <stdio.h>
 189 #include <ctype.h>
 190 #include <errno.h>
 191 #ifndef errno
 192   extern int errno;
 193 #endif
 194 #include <sys/types.h>
 195 #include <sys/stat.h>
 196
 197 #include <assert.h>
 198 #ifdef NDEBUG
 199 # undef  assert                 /* some systems have a buggy assert.h */
 200 # define assert(x) ((void) 0)
 201 #endif
 202
 203 #if !defined (S_ISREG) && defined (S_IFREG)
 204 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 205 #endif
 206
 207 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 208 # define NO_LONG_OPTIONS TRUE
 209 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 210   extern char *optarg;
 211   extern int optind, opterr;
 212 #else
 213 # define NO_LONG_OPTIONS FALSE
 214 # include <getopt.h>
 215 #endif /* NO_LONG_OPTIONS */
 216
 217 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 218 # ifdef __CYGWIN__              /* compiling on Cygwin */
 219                              !!! NOTICE !!!
 220  the regex.h distributed with Cygwin is not compatible with etags, alas!
 221 If you want regular expression support, you should delete this notice and
 222               arrange to use the GNU regex.h and regex.c.
 223 # endif
 224 #endif
 225 #include <regex.h>
 226
 227 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 228  Leave it undefined to make the program "etags", which makes emacs-style
 229  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 230 #ifdef CTAGS
 231 # undef  CTAGS
 232 # define CTAGS TRUE
 233 #else
 234 # define CTAGS FALSE
 235 #endif
 236
 237 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 238 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 239 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 240 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 241
 242 #define CHARS 256               /* 2^sizeof(char) */
 243 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 244 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 245 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 246 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 247 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 248 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 249
 250 #define ISALNUM(c)      isalnum (CHAR(c))
 251 #define ISALPHA(c)      isalpha (CHAR(c))
 252 #define ISDIGIT(c)      isdigit (CHAR(c))
 253 #define ISLOWER(c)      islower (CHAR(c))
 254
 255 #define lowcase(c)      tolower (CHAR(c))
 256 #define upcase(c)       toupper (CHAR(c))
 257
 258
 259 /*
 260  *      xnew, xrnew -- allocate, reallocate storage
 261  *
 262  * SYNOPSIS:    Type *xnew (int n, Type);
 263  *              void xrnew (OldPointer, int n, Type);
 264  */
 265 #if DEBUG
 266 # include "chkmalloc.h"
 267 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 268                                                   (n) * sizeof (Type)))
 269 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 270                                         (char *) (op), (n) * sizeof (Type)))
 271 #else
 272 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 273 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 274                                         (char *) (op), (n) * sizeof (Type)))
 275 #endif
 276
 277 #define bool int
 278
 279 typedef void Lang_function __P((FILE *));
 280
 281 typedef struct
 282 {
 283   char *suffix;                 /* file name suffix for this compressor */
 284   char *command;                /* takes one arg and decompresses to stdout */
 285 } compressor;
 286
 287 typedef struct
 288 {
 289   char *name;                   /* language name */
 290   char *help;                   /* detailed help for the language */
 291   Lang_function *function;      /* parse function */
 292   char **suffixes;              /* name suffixes of this language's files */
 293   char **filenames;             /* names of this language's files */
 294   char **interpreters;          /* interpreters for this language */
 295   bool metasource;              /* source used to generate other sources */
 296 } language;
 297
 298 typedef struct fdesc
 299 {
 300   struct fdesc *next;           /* for the linked list */
 301   char *infname;                /* uncompressed input file name */
 302   char *infabsname;             /* absolute uncompressed input file name */
 303   char *infabsdir;              /* absolute dir of input file */
 304   char *taggedfname;            /* file name to write in tagfile */
 305   language *lang;               /* language of file */
 306   char *prop;                   /* file properties to write in tagfile */
 307   bool usecharno;               /* etags tags shall contain char number */
 308   bool written;                 /* entry written in the tags file */
 309 } fdesc;
 310
 311 typedef struct node_st
 312 {                               /* sorting structure */
 313   struct node_st *left, *right; /* left and right sons */
 314   fdesc *fdp;                   /* description of file to whom tag belongs */
 315   char *name;                   /* tag name */
 316   char *regex;                  /* search regexp */
 317   bool valid;                   /* write this tag on the tag file */
 318   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 319   bool been_warned;             /* warning already given for duplicated tag */
 320   int lno;                      /* line number tag is on */
 321   long cno;                     /* character number line starts on */
 322 } node;
 323
 324 /*
 325  * A `linebuffer' is a structure which holds a line of text.
 326  * `readline_internal' reads a line from a stream into a linebuffer
 327  * and works regardless of the length of the line.
 328  * SIZE is the size of BUFFER, LEN is the length of the string in
 329  * BUFFER after readline reads it.
 330  */
 331 typedef struct
 332 {
 333   long size;
 334   int len;
 335   char *buffer;
 336 } linebuffer;
 337
 338 /* Used to support mixing of --lang and file names. */
 339 typedef struct
 340 {
 341   enum {
 342     at_language,                /* a language specification */
 343     at_regexp,                  /* a regular expression */
 344     at_filename,                /* a file name */
 345     at_stdin,                   /* read from stdin here */
 346     at_end                      /* stop parsing the list */
 347   } arg_type;                   /* argument type */
 348   language *lang;               /* language associated with the argument */
 349   char *what;                   /* the argument itself */
 350 } argument;
 351
 352 /* Structure defining a regular expression. */
 353 typedef struct regexp
 354 {
 355   struct regexp *p_next;        /* pointer to next in list */
 356   language *lang;               /* if set, use only for this language */
 357   char *pattern;                /* the regexp pattern */
 358   char *name;                   /* tag name */
 359   struct re_pattern_buffer *pat; /* the compiled pattern */
 360   struct re_registers regs;     /* re registers */
 361   bool error_signaled;          /* already signaled for this regexp */
 362   bool force_explicit_name;     /* do not allow implict tag name */
 363   bool ignore_case;             /* ignore case when matching */
 364   bool multi_line;              /* do a multi-line match on the whole file */
 365 } regexp;
 366
 367
 368 /* Many compilers barf on this:
 369         Lang_function Ada_funcs;
 370    so let's write it this way */
 371 static void Ada_funcs __P((FILE *));
 372 static void Asm_labels __P((FILE *));
 373 static void C_entries __P((int c_ext, FILE *));
 374 static void default_C_entries __P((FILE *));
 375 static void plain_C_entries __P((FILE *));
 376 static void Cjava_entries __P((FILE *));
 377 static void Cobol_paragraphs __P((FILE *));
 378 static void Cplusplus_entries __P((FILE *));
 379 static void Cstar_entries __P((FILE *));
 380 static void Erlang_functions __P((FILE *));
 381 static void Forth_words __P((FILE *));
 382 static void Fortran_functions __P((FILE *));
 383 static void HTML_labels __P((FILE *));
 384 static void Lisp_functions __P((FILE *));
 385 static void Lua_functions __P((FILE *));
 386 static void Makefile_targets __P((FILE *));
 387 static void Pascal_functions __P((FILE *));
 388 static void Perl_functions __P((FILE *));
 389 static void PHP_functions __P((FILE *));
 390 static void PS_functions __P((FILE *));
 391 static void Prolog_functions __P((FILE *));
 392 static void Python_functions __P((FILE *));
 393 static void Scheme_functions __P((FILE *));
 394 static void TeX_commands __P((FILE *));
 395 static void Texinfo_nodes __P((FILE *));
 396 static void Yacc_entries __P((FILE *));
 397 static void just_read_file __P((FILE *));
 398
 399 static void print_language_names __P((void));
 400 static void print_version __P((void));
 401 static void print_help __P((argument *));
 402 int main __P((int, char **));
 403
 404 static compressor *get_compressor_from_suffix __P((char *, char **));
 405 static language *get_language_from_langname __P((const char *));
 406 static language *get_language_from_interpreter __P((char *));
 407 static language *get_language_from_filename __P((char *, bool));
 408 static void readline __P((linebuffer *, FILE *));
 409 static long readline_internal __P((linebuffer *, FILE *));
 410 static bool nocase_tail __P((char *));
 411 static void get_tag __P((char *, char **));
 412
 413 static void analyse_regex __P((char *));
 414 static void free_regexps __P((void));
 415 static void regex_tag_multiline __P((void));
 416 static void error __P((const char *, const char *));
 417 static void suggest_asking_for_help __P((void));
 418 void fatal __P((char *, char *));
 419 static void pfatal __P((char *));
 420 static void add_node __P((node *, node **));
 421
 422 static void init __P((void));
 423 static void process_file_name __P((char *, language *));
 424 static void process_file __P((FILE *, char *, language *));
 425 static void find_entries __P((FILE *));
 426 static void free_tree __P((node *));
 427 static void free_fdesc __P((fdesc *));
 428 static void pfnote __P((char *, bool, char *, int, int, long));
 429 static void make_tag __P((char *, int, bool, char *, int, int, long));
 430 static void invalidate_nodes __P((fdesc *, node **));
 431 static void put_entries __P((node *));
 432
 433 static char *concat __P((char *, char *, char *));
 434 static char *skip_spaces __P((char *));
 435 static char *skip_non_spaces __P((char *));
 436 static char *savenstr __P((char *, int));
 437 static char *savestr __P((char *));
 438 static char *etags_strchr __P((const char *, int));
 439 static char *etags_strrchr __P((const char *, int));
 440 static int etags_strcasecmp __P((const char *, const char *));
 441 static int etags_strncasecmp __P((const char *, const char *, int));
 442 static char *etags_getcwd __P((void));
 443 static char *relative_filename __P((char *, char *));
 444 static char *absolute_filename __P((char *, char *));
 445 static char *absolute_dirname __P((char *, char *));
 446 static bool filename_is_absolute __P((char *f));
 447 static void canonicalize_filename __P((char *));
 448 static void linebuffer_init __P((linebuffer *));
 449 static void linebuffer_setlen __P((linebuffer *, int));
 450 static PTR xmalloc __P((unsigned int));
 451 static PTR xrealloc __P((char *, unsigned int));
 452
 453 \f
 454 static char searchar = '/';     /* use /.../ searches */
 455
 456 static char *tagfile;           /* output file */
 457 static char *progname;          /* name this program was invoked with */
 458 static char *cwd;               /* current working directory */
 459 static char *tagfiledir;        /* directory of tagfile */
 460 static FILE *tagf;              /* ioptr for tags file */
 461
 462 static fdesc *fdhead;           /* head of file description list */
 463 static fdesc *curfdp;           /* current file description */
 464 static int lineno;              /* line number of current line */
 465 static long charno;             /* current character number */
 466 static long linecharno;         /* charno of start of current line */
 467 static char *dbp;               /* pointer to start of current tag */
 468
 469 static const int invalidcharno = -1;
 470
 471 static node *nodehead;          /* the head of the binary tree of tags */
 472 static node *last_node;         /* the last node created */
 473
 474 static linebuffer lb;           /* the current line */
 475 static linebuffer filebuf;      /* a buffer containing the whole file */
 476 static linebuffer token_name;   /* a buffer containing a tag name */
 477
 478 /* boolean "functions" (see init)       */
 479 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 480 static char
 481   /* white chars */
 482   *white = " \f\t\n\r\v",
 483   /* not in a name */
 484   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 485   /* token ending chars */
 486   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 487   /* token starting chars */
 488   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 489   /* valid in-token chars */
 490   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 491
 492 static bool append_to_tagfile;  /* -a: append to tags */
 493 /* The next five default to TRUE in C and derived languages.  */
 494 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 495 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 496                                 /* 0 struct/enum/union decls, and C++ */
 497                                 /* member functions. */
 498 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 499                                 /* constants and variables. */
 500                                 /* -D: opposite of -d.  Default under ctags. */
 501 static bool globals;            /* create tags for global variables */
 502 static bool members;            /* create tags for C member variables */
 503 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 504 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 505 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 506 static bool update;             /* -u: update tags */
 507 static bool vgrind_style;       /* -v: create vgrind style index output */
 508 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 509 static bool cxref_style;        /* -x: create cxref style output */
 510 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 511 static bool ignoreindent;       /* -I: ignore indentation in C */
 512 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 513
 514 /* STDIN is defined in LynxOS system headers */
 515 #ifdef STDIN
 516 # undef STDIN
 517 #endif
 518
 519 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 520 static bool parsing_stdin;      /* --parse-stdin used */
 521
 522 static regexp *p_head;          /* list of all regexps */
 523 static bool need_filebuf;       /* some regexes are multi-line */
 524
 525 static struct option longopts[] =
 526 {
 527   { "append",             no_argument,       NULL,               'a'   },
 528   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 529   { "c++",                no_argument,       NULL,               'C'   },
 530   { "declarations",       no_argument,       &declarations,      TRUE  },
 531   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 532   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 533   { "help",               no_argument,       NULL,               'h'   },
 534   { "help",               no_argument,       NULL,               'H'   },
 535   { "ignore-indentation", no_argument,       NULL,               'I'   },
 536   { "language",           required_argument, NULL,               'l'   },
 537   { "members",            no_argument,       &members,           TRUE  },
 538   { "no-members",         no_argument,       &members,           FALSE },
 539   { "output",             required_argument, NULL,               'o'   },
 540   { "regex",              required_argument, NULL,               'r'   },
 541   { "no-regex",           no_argument,       NULL,               'R'   },
 542   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 543   { "parse-stdin",        required_argument, NULL,               STDIN },
 544   { "version",            no_argument,       NULL,               'V'   },
 545
 546 #if CTAGS /* Ctags options */
 547   { "backward-search",    no_argument,       NULL,               'B'   },
 548   { "cxref",              no_argument,       NULL,               'x'   },
 549   { "defines",            no_argument,       NULL,               'd'   },
 550   { "globals",            no_argument,       &globals,           TRUE  },
 551   { "typedefs",           no_argument,       NULL,               't'   },
 552   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 553   { "update",             no_argument,       NULL,               'u'   },
 554   { "vgrind",             no_argument,       NULL,               'v'   },
 555   { "no-warn",            no_argument,       NULL,               'w'   },
 556
 557 #else /* Etags options */
 558   { "no-defines",         no_argument,       NULL,               'D'   },
 559   { "no-globals",         no_argument,       &globals,           FALSE },
 560   { "include",            required_argument, NULL,               'i'   },
 561 #endif
 562   { NULL }
 563 };
 564
 565 static compressor compressors[] =
 566 {
 567   { "z", "gzip -d -c"},
 568   { "Z", "gzip -d -c"},
 569   { "gz", "gzip -d -c"},
 570   { "GZ", "gzip -d -c"},
 571   { "bz2", "bzip2 -d -c" },
 572   { NULL }
 573 };
 574
 575 /*
 576  * Language stuff.
 577  */
 578
 579 /* Ada code */
 580 static char *Ada_suffixes [] =
 581   { "ads", "adb", "ada", NULL };
 582 static char Ada_help [] =
 583 "In Ada code, functions, procedures, packages, tasks and types are\n\
 584 tags.  Use the `--packages-only' option to create tags for\n\
 585 packages only.\n\
 586 Ada tag names have suffixes indicating the type of entity:\n\
 587         Entity type:    Qualifier:\n\
 588         ------------    ----------\n\
 589         function        /f\n\
 590         procedure       /p\n\
 591         package spec    /s\n\
 592         package body    /b\n\
 593         type            /t\n\
 594         task            /k\n\
 595 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 596 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 597 will just search for any tag `bidule'.";
 598
 599 /* Assembly code */
 600 static char *Asm_suffixes [] =
 601   { "a",        /* Unix assembler */
 602     "asm", /* Microcontroller assembly */
 603     "def", /* BSO/Tasking definition includes  */
 604     "inc", /* Microcontroller include files */
 605     "ins", /* Microcontroller include files */
 606     "s", "sa", /* Unix assembler */
 607     "S",   /* cpp-processed Unix assembler */
 608     "src", /* BSO/Tasking C compiler output */
 609     NULL
 610   };
 611 static char Asm_help [] =
 612 "In assembler code, labels appearing at the beginning of a line,\n\
 613 followed by a colon, are tags.";
 614
 615
 616 /* Note that .c and .h can be considered C++, if the --c++ flag was
 617    given, or if the `class' or `template' keywords are met inside the file.
 618    That is why default_C_entries is called for these. */
 619 static char *default_C_suffixes [] =
 620   { "c", "h", NULL };
 621 #if CTAGS                               /* C help for Ctags */
 622 static char default_C_help [] =
 623 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 624 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 625 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 626 Use --globals to tag global variables.\n\
 627 You can tag function declarations and external variables by\n\
 628 using `--declarations', and struct members by using `--members'.";
 629 #else                                   /* C help for Etags */
 630 static char default_C_help [] =
 631 "In C code, any C function or typedef is a tag, and so are\n\
 632 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 633 definitions and `enum' constants are tags unless you specify\n\
 634 `--no-defines'.  Global variables are tags unless you specify\n\
 635 `--no-globals' and so are struct members unless you specify\n\
 636 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 637 `--no-members' can make the tags table file much smaller.\n\
 638 You can tag function declarations and external variables by\n\
 639 using `--declarations'.";
 640 #endif  /* C help for Ctags and Etags */
 641
 642 static char *Cplusplus_suffixes [] =
 643   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 644     "M",                        /* Objective C++ */
 645     "pdb",                      /* Postscript with C syntax */
 646     NULL };
 647 static char Cplusplus_help [] =
 648 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 649 --help --lang=c --lang=c++ for full help.)\n\
 650 In addition to C tags, member functions are also recognized.  Member\n\
 651 variables are recognized unless you use the `--no-members' option.\n\
 652 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 653 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 654 `operator+'.";
 655
 656 static char *Cjava_suffixes [] =
 657   { "java", NULL };
 658 static char Cjava_help [] =
 659 "In Java code, all the tags constructs of C and C++ code are\n\
 660 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 661
 662
 663 static char *Cobol_suffixes [] =
 664   { "COB", "cob", NULL };
 665 static char Cobol_help [] =
 666 "In Cobol code, tags are paragraph names; that is, any word\n\
 667 starting in column 8 and followed by a period.";
 668
 669 static char *Cstar_suffixes [] =
 670   { "cs", "hs", NULL };
 671
 672 static char *Erlang_suffixes [] =
 673   { "erl", "hrl", NULL };
 674 static char Erlang_help [] =
 675 "In Erlang code, the tags are the functions, records and macros\n\
 676 defined in the file.";
 677
 678 char *Forth_suffixes [] =
 679   { "fth", "tok", NULL };
 680 static char Forth_help [] =
 681 "In Forth code, tags are words defined by `:',\n\
 682 constant, code, create, defer, value, variable, buffer:, field.";
 683
 684 static char *Fortran_suffixes [] =
 685   { "F", "f", "f90", "for", NULL };
 686 static char Fortran_help [] =
 687 "In Fortran code, functions, subroutines and block data are tags.";
 688
 689 static char *HTML_suffixes [] =
 690   { "htm", "html", "shtml", NULL };
 691 static char HTML_help [] =
 692 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 693 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 694 occurrences of `id='.";
 695
 696 static char *Lisp_suffixes [] =
 697   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 698 static char Lisp_help [] =
 699 "In Lisp code, any function defined with `defun', any variable\n\
 700 defined with `defvar' or `defconst', and in general the first\n\
 701 argument of any expression that starts with `(def' in column zero\n\
 702 is a tag.";
 703
 704 static char *Lua_suffixes [] =
 705   { "lua", "LUA", NULL };
 706 static char Lua_help [] =
 707 "In Lua scripts, all functions are tags.";
 708
 709 static char *Makefile_filenames [] =
 710   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 711 static char Makefile_help [] =
 712 "In makefiles, targets are tags; additionally, variables are tags\n\
 713 unless you specify `--no-globals'.";
 714
 715 static char *Objc_suffixes [] =
 716   { "lm",                       /* Objective lex file */
 717     "m",                        /* Objective C file */
 718      NULL };
 719 static char Objc_help [] =
 720 "In Objective C code, tags include Objective C definitions for classes,\n\
 721 class categories, methods and protocols.  Tags for variables and\n\
 722 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 723 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 724
 725 static char *Pascal_suffixes [] =
 726   { "p", "pas", NULL };
 727 static char Pascal_help [] =
 728 "In Pascal code, the tags are the functions and procedures defined\n\
 729 in the file.";
 730 /* " // this is for working around an Emacs highlighting bug... */
 731
 732 static char *Perl_suffixes [] =
 733   { "pl", "pm", NULL };
 734 static char *Perl_interpreters [] =
 735   { "perl", "@PERL@", NULL };
 736 static char Perl_help [] =
 737 "In Perl code, the tags are the packages, subroutines and variables\n\
 738 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 739 `--globals' if you want to tag global variables.  Tags for\n\
 740 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 741 defined in the default package is `main::SUB'.";
 742
 743 static char *PHP_suffixes [] =
 744   { "php", "php3", "php4", NULL };
 745 static char PHP_help [] =
 746 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 747 the `--no-members' option, vars are tags too.";
 748
 749 static char *plain_C_suffixes [] =
 750   { "pc",                       /* Pro*C file */
 751      NULL };
 752
 753 static char *PS_suffixes [] =
 754   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 755 static char PS_help [] =
 756 "In PostScript code, the tags are the functions.";
 757
 758 static char *Prolog_suffixes [] =
 759   { "prolog", NULL };
 760 static char Prolog_help [] =
 761 "In Prolog code, tags are predicates and rules at the beginning of\n\
 762 line.";
 763
 764 static char *Python_suffixes [] =
 765   { "py", NULL };
 766 static char Python_help [] =
 767 "In Python code, `def' or `class' at the beginning of a line\n\
 768 generate a tag.";
 769
 770 /* Can't do the `SCM' or `scm' prefix with a version number. */
 771 static char *Scheme_suffixes [] =
 772   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 773 static char Scheme_help [] =
 774 "In Scheme code, tags include anything defined with `def' or with a\n\
 775 construct whose name starts with `def'.  They also include\n\
 776 variables set with `set!' at top level in the file.";
 777
 778 static char *TeX_suffixes [] =
 779   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 780 static char TeX_help [] =
 781 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 782 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 783 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 784 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 785 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 786 \n\
 787 Other commands can be specified by setting the environment variable\n\
 788 `TEXTAGS' to a colon-separated list like, for example,\n\
 789      TEXTAGS=\"mycommand:myothercommand\".";
 790
 791
 792 static char *Texinfo_suffixes [] =
 793   { "texi", "texinfo", "txi", NULL };
 794 static char Texinfo_help [] =
 795 "for texinfo files, lines starting with @node are tagged.";
 796
 797 static char *Yacc_suffixes [] =
 798   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 799 static char Yacc_help [] =
 800 "In Bison or Yacc input files, each rule defines as a tag the\n\
 801 nonterminal it constructs.  The portions of the file that contain\n\
 802 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 803 for full help).";
 804
 805 static char auto_help [] =
 806 "`auto' is not a real language, it indicates to use\n\
 807 a default language for files base on file name suffix and file contents.";
 808
 809 static char none_help [] =
 810 "`none' is not a real language, it indicates to only do\n\
 811 regexp processing on files.";
 812
 813 static char no_lang_help [] =
 814 "No detailed help available for this language.";
 815
 816
 817 /*
 818  * Table of languages.
 819  *
 820  * It is ok for a given function to be listed under more than one
 821  * name.  I just didn't.
 822  */
 823
 824 static language lang_names [] =
 825 {
 826   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 827   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 828   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 829   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 830   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 831   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 832   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 833   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 834   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 835   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 836   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 837   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 838   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 839   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 840   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 841   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 842   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 843   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 844   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 845   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 846   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 847   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 848   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 849   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 850   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 851   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 852   { "auto",      auto_help },                      /* default guessing scheme */
 853   { "none",      none_help,      just_read_file }, /* regexp matching only */
 854   { NULL }                /* end of list */
 855 };
 856
 857 \f
 858 static void
 859 print_language_names ()
 860 {
 861   language *lang;
 862   char **name, **ext;
 863
 864   puts ("\nThese are the currently supported languages, along with the\n\
 865 default file names and dot suffixes:");
 866   for (lang = lang_names; lang->name != NULL; lang++)
 867     {
 868       printf ("  %-*s", 10, lang->name);
 869       if (lang->filenames != NULL)
 870         for (name = lang->filenames; *name != NULL; name++)
 871           printf (" %s", *name);
 872       if (lang->suffixes != NULL)
 873         for (ext = lang->suffixes; *ext != NULL; ext++)
 874           printf (" .%s", *ext);
 875       puts ("");
 876     }
 877   puts ("where `auto' means use default language for files based on file\n\
 878 name suffix, and `none' means only do regexp processing on files.\n\
 879 If no language is specified and no matching suffix is found,\n\
 880 the first line of the file is read for a sharp-bang (#!) sequence\n\
 881 followed by the name of an interpreter.  If no such sequence is found,\n\
 882 Fortran is tried first; if no tags are found, C is tried next.\n\
 883 When parsing any C file, a \"class\" or \"template\" keyword\n\
 884 switches to C++.");
 885   puts ("Compressed files are supported using gzip and bzip2.\n\
 886 \n\
 887 For detailed help on a given language use, for example,\n\
 888 etags --help --lang=ada.");
 889 }
 890
 891 #ifndef EMACS_NAME
 892 # define EMACS_NAME "standalone"
 893 #endif
 894 #ifndef VERSION
 895 # define VERSION "17.38"
 896 #endif
 897 static void
 898 print_version ()
 899 {
 900   /* Makes it easier to update automatically. */
 901   char emacs_copyright[] = "Copyright (C) 2008 Free Software Foundation, Inc.";
 902
 903   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 904   puts (emacs_copyright);
 905   puts ("This program is distributed under the terms in ETAGS.README");
 906
 907   exit (EXIT_SUCCESS);
 908 }
 909
 910 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 911 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 912 #endif
 913
 914 static void
 915 print_help (argbuffer)
 916      argument *argbuffer;
 917 {
 918   bool help_for_lang = FALSE;
 919
 920   for (; argbuffer->arg_type != at_end; argbuffer++)
 921     if (argbuffer->arg_type == at_language)
 922       {
 923         if (help_for_lang)
 924           puts ("");
 925         puts (argbuffer->lang->help);
 926         help_for_lang = TRUE;
 927       }
 928
 929   if (help_for_lang)
 930     exit (EXIT_SUCCESS);
 931
 932   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 933 \n\
 934 These are the options accepted by %s.\n", progname, progname);
 935   if (NO_LONG_OPTIONS)
 936     puts ("WARNING: long option names do not work with this executable,\n\
 937 as it is not linked with GNU getopt.");
 938   else
 939     puts ("You may use unambiguous abbreviations for the long option names.");
 940   puts ("  A - as file name means read names from stdin (one per line).\n\
 941 Absolute names are stored in the output file as they are.\n\
 942 Relative ones are stored relative to the output file's directory.\n");
 943
 944   puts ("-a, --append\n\
 945         Append tag entries to existing tags file.");
 946
 947   puts ("--packages-only\n\
 948         For Ada files, only generate tags for packages.");
 949
 950   if (CTAGS)
 951     puts ("-B, --backward-search\n\
 952         Write the search commands for the tag entries using '?', the\n\
 953         backward-search command instead of '/', the forward-search command.");
 954
 955   /* This option is mostly obsolete, because etags can now automatically
 956      detect C++.  Retained for backward compatibility and for debugging and
 957      experimentation.  In principle, we could want to tag as C++ even
 958      before any "class" or "template" keyword.
 959   puts ("-C, --c++\n\
 960         Treat files whose name suffix defaults to C language as C++ files.");
 961   */
 962
 963   puts ("--declarations\n\
 964         In C and derived languages, create tags for function declarations,");
 965   if (CTAGS)
 966     puts ("\tand create tags for extern variables if --globals is used.");
 967   else
 968     puts
 969       ("\tand create tags for extern variables unless --no-globals is used.");
 970
 971   if (CTAGS)
 972     puts ("-d, --defines\n\
 973         Create tag entries for C #define constants and enum constants, too.");
 974   else
 975     puts ("-D, --no-defines\n\
 976         Don't create tag entries for C #define constants and enum constants.\n\
 977         This makes the tags file smaller.");
 978
 979   if (!CTAGS)
 980     puts ("-i FILE, --include=FILE\n\
 981         Include a note in tag file indicating that, when searching for\n\
 982         a tag, one should also consult the tags file FILE after\n\
 983         checking the current file.");
 984
 985   puts ("-l LANG, --language=LANG\n\
 986         Force the following files to be considered as written in the\n\
 987         named language up to the next --language=LANG option.");
 988
 989   if (CTAGS)
 990     puts ("--globals\n\
 991         Create tag entries for global variables in some languages.");
 992   else
 993     puts ("--no-globals\n\
 994         Do not create tag entries for global variables in some\n\
 995         languages.  This makes the tags file smaller.");
 996
 997   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 998     puts ("--no-line-directive\n\
 999         Ignore #line preprocessor directives in C and derived languages.");
1000
1001   if (CTAGS)
1002     puts ("--members\n\
1003         Create tag entries for members of structures in some languages.");
1004   else
1005     puts ("--no-members\n\
1006         Do not create tag entries for members of structures\n\
1007         in some languages.");
1008
1009   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1010         Make a tag for each line matching a regular expression pattern\n\
1011         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1012         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
1013         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1014         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
1015   puts ("       If TAGNAME/ is present, the tags created are named.\n\
1016         For example Tcl named tags can be created with:\n\
1017           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1018         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1019         `m' means to allow multi-line matches, `s' implies `m' and\n\
1020         causes dot to match any character, including newline.");
1021
1022   puts ("-R, --no-regex\n\
1023         Don't create tags from regexps for the following files.");
1024
1025   puts ("-I, --ignore-indentation\n\
1026         In C and C++ do not assume that a closing brace in the first\n\
1027         column is the final brace of a function or structure definition.");
1028
1029   puts ("-o FILE, --output=FILE\n\
1030         Write the tags to FILE.");
1031
1032   puts ("--parse-stdin=NAME\n\
1033         Read from standard input and record tags as belonging to file NAME.");
1034
1035   if (CTAGS)
1036     {
1037       puts ("-t, --typedefs\n\
1038         Generate tag entries for C and Ada typedefs.");
1039       puts ("-T, --typedefs-and-c++\n\
1040         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1041         and C++ member functions.");
1042     }
1043
1044   if (CTAGS)
1045     puts ("-u, --update\n\
1046         Update the tag entries for the given files, leaving tag\n\
1047         entries for other files in place.  Currently, this is\n\
1048         implemented by deleting the existing entries for the given\n\
1049         files and then rewriting the new entries at the end of the\n\
1050         tags file.  It is often faster to simply rebuild the entire\n\
1051         tag file than to use this.");
1052
1053   if (CTAGS)
1054     {
1055       puts ("-v, --vgrind\n\
1056         Print on the standard output an index of items intended for\n\
1057         human consumption, similar to the output of vgrind.  The index\n\
1058         is sorted, and gives the page number of each item.");
1059
1060       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1061         puts ("-w, --no-duplicates\n\
1062         Do not create duplicate tag entries, for compatibility with\n\
1063         traditional ctags.");
1064
1065       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1066         puts ("-w, --no-warn\n\
1067         Suppress warning messages about duplicate tag entries.");
1068
1069       puts ("-x, --cxref\n\
1070         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1071         The output uses line numbers instead of page numbers, but\n\
1072         beyond that the differences are cosmetic; try both to see\n\
1073         which you like.");
1074     }
1075
1076   puts ("-V, --version\n\
1077         Print the version of the program.\n\
1078 -h, --help\n\
1079         Print this help message.\n\
1080         Followed by one or more `--language' options prints detailed\n\
1081         help about tag generation for the specified languages.");
1082
1083   print_language_names ();
1084
1085   puts ("");
1086   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1087
1088   exit (EXIT_SUCCESS);
1089 }
1090
1091 \f
1092 int
1093 main (argc, argv)
1094      int argc;
1095      char *argv[];
1096 {
1097   int i;
1098   unsigned int nincluded_files;
1099   char **included_files;
1100   argument *argbuffer;
1101   int current_arg, file_count;
1102   linebuffer filename_lb;
1103   bool help_asked = FALSE;
1104  char *optstring;
1105  int opt;
1106
1107
1108 #ifdef DOS_NT
1109   _fmode = O_BINARY;   /* all of files are treated as binary files */
1110 #endif /* DOS_NT */
1111
1112   progname = argv[0];
1113   nincluded_files = 0;
1114   included_files = xnew (argc, char *);
1115   current_arg = 0;
1116   file_count = 0;
1117
1118   /* Allocate enough no matter what happens.  Overkill, but each one
1119      is small. */
1120   argbuffer = xnew (argc, argument);
1121
1122   /*
1123    * Always find typedefs and structure tags.
1124    * Also default to find macro constants, enum constants, struct
1125    * members and global variables.  Do it for both etags and ctags.
1126    */
1127   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1128   globals = members = TRUE;
1129
1130   /* When the optstring begins with a '-' getopt_long does not rearrange the
1131      non-options arguments to be at the end, but leaves them alone. */
1132   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1133                       "ac:Cf:Il:o:r:RSVhH",
1134                       (CTAGS) ? "BxdtTuvw" : "Di:");
1135
1136   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1137     switch (opt)
1138       {
1139       case 0:
1140         /* If getopt returns 0, then it has already processed a
1141            long-named option.  We should do nothing.  */
1142         break;
1143
1144       case 1:
1145         /* This means that a file name has been seen.  Record it. */
1146         argbuffer[current_arg].arg_type = at_filename;
1147         argbuffer[current_arg].what     = optarg;
1148         ++current_arg;
1149         ++file_count;
1150         break;
1151
1152       case STDIN:
1153         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1154         argbuffer[current_arg].arg_type = at_stdin;
1155         argbuffer[current_arg].what     = optarg;
1156         ++current_arg;
1157         ++file_count;
1158         if (parsing_stdin)
1159           fatal ("cannot parse standard input more than once", (char *)NULL);
1160         parsing_stdin = TRUE;
1161         break;
1162
1163         /* Common options. */
1164       case 'a': append_to_tagfile = TRUE;       break;
1165       case 'C': cplusplus = TRUE;               break;
1166       case 'f':         /* for compatibility with old makefiles */
1167       case 'o':
1168         if (tagfile)
1169           {
1170             error ("-o option may only be given once.", (char *)NULL);
1171             suggest_asking_for_help ();
1172             /* NOTREACHED */
1173           }
1174         tagfile = optarg;
1175         break;
1176       case 'I':
1177       case 'S':         /* for backward compatibility */
1178         ignoreindent = TRUE;
1179         break;
1180       case 'l':
1181         {
1182           language *lang = get_language_from_langname (optarg);
1183           if (lang != NULL)
1184             {
1185               argbuffer[current_arg].lang = lang;
1186               argbuffer[current_arg].arg_type = at_language;
1187               ++current_arg;
1188             }
1189         }
1190         break;
1191       case 'c':
1192         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1193         optarg = concat (optarg, "i", ""); /* memory leak here */
1194         /* FALLTHRU */
1195       case 'r':
1196         argbuffer[current_arg].arg_type = at_regexp;
1197         argbuffer[current_arg].what = optarg;
1198         ++current_arg;
1199         break;
1200       case 'R':
1201         argbuffer[current_arg].arg_type = at_regexp;
1202         argbuffer[current_arg].what = NULL;
1203         ++current_arg;
1204         break;
1205       case 'V':
1206         print_version ();
1207         break;
1208       case 'h':
1209       case 'H':
1210         help_asked = TRUE;
1211         break;
1212
1213         /* Etags options */
1214       case 'D': constantypedefs = FALSE;                        break;
1215       case 'i': included_files[nincluded_files++] = optarg;     break;
1216
1217         /* Ctags options. */
1218       case 'B': searchar = '?';                                 break;
1219       case 'd': constantypedefs = TRUE;                         break;
1220       case 't': typedefs = TRUE;                                break;
1221       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1222       case 'u': update = TRUE;                                  break;
1223       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1224       case 'x': cxref_style = TRUE;                             break;
1225       case 'w': no_warnings = TRUE;                             break;
1226       default:
1227         suggest_asking_for_help ();
1228         /* NOTREACHED */
1229       }
1230
1231   /* No more options.  Store the rest of arguments. */
1232   for (; optind < argc; optind++)
1233     {
1234       argbuffer[current_arg].arg_type = at_filename;
1235       argbuffer[current_arg].what = argv[optind];
1236       ++current_arg;
1237       ++file_count;
1238     }
1239
1240   argbuffer[current_arg].arg_type = at_end;
1241
1242   if (help_asked)
1243     print_help (argbuffer);
1244     /* NOTREACHED */
1245
1246   if (nincluded_files == 0 && file_count == 0)
1247     {
1248       error ("no input files specified.", (char *)NULL);
1249       suggest_asking_for_help ();
1250       /* NOTREACHED */
1251     }
1252
1253   if (tagfile == NULL)
1254     tagfile = CTAGS ? "tags" : "TAGS";
1255   cwd = etags_getcwd ();        /* the current working directory */
1256   if (cwd[strlen (cwd) - 1] != '/')
1257     {
1258       char *oldcwd = cwd;
1259       cwd = concat (oldcwd, "/", "");
1260       free (oldcwd);
1261     }
1262   /* Relative file names are made relative to the current directory. */
1263   if (streq (tagfile, "-")
1264       || strneq (tagfile, "/dev/", 5))
1265     tagfiledir = cwd;
1266   else
1267     tagfiledir = absolute_dirname (tagfile, cwd);
1268
1269   init ();                      /* set up boolean "functions" */
1270
1271   linebuffer_init (&lb);
1272   linebuffer_init (&filename_lb);
1273   linebuffer_init (&filebuf);
1274   linebuffer_init (&token_name);
1275
1276   if (!CTAGS)
1277     {
1278       if (streq (tagfile, "-"))
1279         {
1280           tagf = stdout;
1281 #ifdef DOS_NT
1282           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1283              doesn't take effect until after `stdout' is already open). */
1284           if (!isatty (fileno (stdout)))
1285             setmode (fileno (stdout), O_BINARY);
1286 #endif /* DOS_NT */
1287         }
1288       else
1289         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1290       if (tagf == NULL)
1291         pfatal (tagfile);
1292     }
1293
1294   /*
1295    * Loop through files finding functions.
1296    */
1297   for (i = 0; i < current_arg; i++)
1298     {
1299       static language *lang;    /* non-NULL if language is forced */
1300       char *this_file;
1301
1302       switch (argbuffer[i].arg_type)
1303         {
1304         case at_language:
1305           lang = argbuffer[i].lang;
1306           break;
1307         case at_regexp:
1308           analyse_regex (argbuffer[i].what);
1309           break;
1310         case at_filename:
1311               this_file = argbuffer[i].what;
1312               /* Input file named "-" means read file names from stdin
1313                  (one per line) and use them. */
1314               if (streq (this_file, "-"))
1315                 {
1316                   if (parsing_stdin)
1317                     fatal ("cannot parse standard input AND read file names from it",
1318                            (char *)NULL);
1319                   while (readline_internal (&filename_lb, stdin) > 0)
1320                     process_file_name (filename_lb.buffer, lang);
1321                 }
1322               else
1323                 process_file_name (this_file, lang);
1324           break;
1325         case at_stdin:
1326           this_file = argbuffer[i].what;
1327           process_file (stdin, this_file, lang);
1328           break;
1329         }
1330     }
1331
1332   free_regexps ();
1333   free (lb.buffer);
1334   free (filebuf.buffer);
1335   free (token_name.buffer);
1336
1337   if (!CTAGS || cxref_style)
1338     {
1339       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1340       put_entries (nodehead);
1341       free_tree (nodehead);
1342       nodehead = NULL;
1343       if (!CTAGS)
1344         {
1345           fdesc *fdp;
1346
1347           /* Output file entries that have no tags. */
1348           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1349             if (!fdp->written)
1350               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1351
1352           while (nincluded_files-- > 0)
1353             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1354
1355           if (fclose (tagf) == EOF)
1356             pfatal (tagfile);
1357         }
1358
1359       exit (EXIT_SUCCESS);
1360     }
1361
1362   /* From here on, we are in (CTAGS && !cxref_style) */
1363   if (update)
1364     {
1365       char cmd[BUFSIZ];
1366       for (i = 0; i < current_arg; ++i)
1367         {
1368           switch (argbuffer[i].arg_type)
1369             {
1370             case at_filename:
1371             case at_stdin:
1372               break;
1373             default:
1374               continue;         /* the for loop */
1375             }
1376           sprintf (cmd,
1377                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1378                    tagfile, argbuffer[i].what, tagfile);
1379           if (system (cmd) != EXIT_SUCCESS)
1380             fatal ("failed to execute shell command", (char *)NULL);
1381         }
1382       append_to_tagfile = TRUE;
1383     }
1384
1385   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1386   if (tagf == NULL)
1387     pfatal (tagfile);
1388   put_entries (nodehead);       /* write all the tags (CTAGS) */
1389   free_tree (nodehead);
1390   nodehead = NULL;
1391   if (fclose (tagf) == EOF)
1392     pfatal (tagfile);
1393
1394   if (CTAGS)
1395     if (append_to_tagfile || update)
1396       {
1397         char cmd[2*BUFSIZ+20];
1398         /* Maybe these should be used:
1399            setenv ("LC_COLLATE", "C", 1);
1400            setenv ("LC_ALL", "C", 1); */
1401         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1402         exit (system (cmd));
1403       }
1404   return EXIT_SUCCESS;
1405 }
1406
1407
1408 /*
1409  * Return a compressor given the file name.  If EXTPTR is non-zero,
1410  * return a pointer into FILE where the compressor-specific
1411  * extension begins.  If no compressor is found, NULL is returned
1412  * and EXTPTR is not significant.
1413  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1414  */
1415 static compressor *
1416 get_compressor_from_suffix (file, extptr)
1417      char *file;
1418      char **extptr;
1419 {
1420   compressor *compr;
1421   char *slash, *suffix;
1422
1423   /* This relies on FN to be after canonicalize_filename,
1424      so we don't need to consider backslashes on DOS_NT.  */
1425   slash = etags_strrchr (file, '/');
1426   suffix = etags_strrchr (file, '.');
1427   if (suffix == NULL || suffix < slash)
1428     return NULL;
1429   if (extptr != NULL)
1430     *extptr = suffix;
1431   suffix += 1;
1432   /* Let those poor souls who live with DOS 8+3 file name limits get
1433      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1434      Only the first do loop is run if not MSDOS */
1435   do
1436     {
1437       for (compr = compressors; compr->suffix != NULL; compr++)
1438         if (streq (compr->suffix, suffix))
1439           return compr;
1440       if (!MSDOS)
1441         break;                  /* do it only once: not really a loop */
1442       if (extptr != NULL)
1443         *extptr = ++suffix;
1444     } while (*suffix != '\0');
1445   return NULL;
1446 }
1447
1448
1449
1450 /*
1451  * Return a language given the name.
1452  */
1453 static language *
1454 get_language_from_langname (name)
1455      const char *name;
1456 {
1457   language *lang;
1458
1459   if (name == NULL)
1460     error ("empty language name", (char *)NULL);
1461   else
1462     {
1463       for (lang = lang_names; lang->name != NULL; lang++)
1464         if (streq (name, lang->name))
1465           return lang;
1466       error ("unknown language \"%s\"", name);
1467     }
1468
1469   return NULL;
1470 }
1471
1472
1473 /*
1474  * Return a language given the interpreter name.
1475  */
1476 static language *
1477 get_language_from_interpreter (interpreter)
1478      char *interpreter;
1479 {
1480   language *lang;
1481   char **iname;
1482
1483   if (interpreter == NULL)
1484     return NULL;
1485   for (lang = lang_names; lang->name != NULL; lang++)
1486     if (lang->interpreters != NULL)
1487       for (iname = lang->interpreters; *iname != NULL; iname++)
1488         if (streq (*iname, interpreter))
1489             return lang;
1490
1491   return NULL;
1492 }
1493
1494
1495
1496 /*
1497  * Return a language given the file name.
1498  */
1499 static language *
1500 get_language_from_filename (file, case_sensitive)
1501      char *file;
1502      bool case_sensitive;
1503 {
1504   language *lang;
1505   char **name, **ext, *suffix;
1506
1507   /* Try whole file name first. */
1508   for (lang = lang_names; lang->name != NULL; lang++)
1509     if (lang->filenames != NULL)
1510       for (name = lang->filenames; *name != NULL; name++)
1511         if ((case_sensitive)
1512             ? streq (*name, file)
1513             : strcaseeq (*name, file))
1514           return lang;
1515
1516   /* If not found, try suffix after last dot. */
1517   suffix = etags_strrchr (file, '.');
1518   if (suffix == NULL)
1519     return NULL;
1520   suffix += 1;
1521   for (lang = lang_names; lang->name != NULL; lang++)
1522     if (lang->suffixes != NULL)
1523       for (ext = lang->suffixes; *ext != NULL; ext++)
1524         if ((case_sensitive)
1525             ? streq (*ext, suffix)
1526             : strcaseeq (*ext, suffix))
1527           return lang;
1528   return NULL;
1529 }
1530
1531 \f
1532 /*
1533  * This routine is called on each file argument.
1534  */
1535 static void
1536 process_file_name (file, lang)
1537      char *file;
1538      language *lang;
1539 {
1540   struct stat stat_buf;
1541   FILE *inf;
1542   fdesc *fdp;
1543   compressor *compr;
1544   char *compressed_name, *uncompressed_name;
1545   char *ext, *real_name;
1546   int retval;
1547
1548   canonicalize_filename (file);
1549   if (streq (file, tagfile) && !streq (tagfile, "-"))
1550     {
1551       error ("skipping inclusion of %s in self.", file);
1552       return;
1553     }
1554   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1555     {
1556       compressed_name = NULL;
1557       real_name = uncompressed_name = savestr (file);
1558     }
1559   else
1560     {
1561       real_name = compressed_name = savestr (file);
1562       uncompressed_name = savenstr (file, ext - file);
1563     }
1564
1565   /* If the canonicalized uncompressed name
1566      has already been dealt with, skip it silently. */
1567   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1568     {
1569       assert (fdp->infname != NULL);
1570       if (streq (uncompressed_name, fdp->infname))
1571         goto cleanup;
1572     }
1573
1574   if (stat (real_name, &stat_buf) != 0)
1575     {
1576       /* Reset real_name and try with a different name. */
1577       real_name = NULL;
1578       if (compressed_name != NULL) /* try with the given suffix */
1579         {
1580           if (stat (uncompressed_name, &stat_buf) == 0)
1581             real_name = uncompressed_name;
1582         }
1583       else                      /* try all possible suffixes */
1584         {
1585           for (compr = compressors; compr->suffix != NULL; compr++)
1586             {
1587               compressed_name = concat (file, ".", compr->suffix);
1588               if (stat (compressed_name, &stat_buf) != 0)
1589                 {
1590                   if (MSDOS)
1591                     {
1592                       char *suf = compressed_name + strlen (file);
1593                       size_t suflen = strlen (compr->suffix) + 1;
1594                       for ( ; suf[1]; suf++, suflen--)
1595                         {
1596                           memmove (suf, suf + 1, suflen);
1597                           if (stat (compressed_name, &stat_buf) == 0)
1598                             {
1599                               real_name = compressed_name;
1600                               break;
1601                             }
1602                         }
1603                       if (real_name != NULL)
1604                         break;
1605                     } /* MSDOS */
1606                   free (compressed_name);
1607                   compressed_name = NULL;
1608                 }
1609               else
1610                 {
1611                   real_name = compressed_name;
1612                   break;
1613                 }
1614             }
1615         }
1616       if (real_name == NULL)
1617         {
1618           perror (file);
1619           goto cleanup;
1620         }
1621     } /* try with a different name */
1622
1623   if (!S_ISREG (stat_buf.st_mode))
1624     {
1625       error ("skipping %s: it is not a regular file.", real_name);
1626       goto cleanup;
1627     }
1628   if (real_name == compressed_name)
1629     {
1630       char *cmd = concat (compr->command, " ", real_name);
1631       inf = (FILE *) popen (cmd, "r");
1632       free (cmd);
1633     }
1634   else
1635     inf = fopen (real_name, "r");
1636   if (inf == NULL)
1637     {
1638       perror (real_name);
1639       goto cleanup;
1640     }
1641
1642   process_file (inf, uncompressed_name, lang);
1643
1644   if (real_name == compressed_name)
1645     retval = pclose (inf);
1646   else
1647     retval = fclose (inf);
1648   if (retval < 0)
1649     pfatal (file);
1650
1651  cleanup:
1652   free (compressed_name);
1653   free (uncompressed_name);
1654   last_node = NULL;
1655   curfdp = NULL;
1656   return;
1657 }
1658
1659 static void
1660 process_file (fh, fn, lang)
1661      FILE *fh;
1662      char *fn;
1663      language *lang;
1664 {
1665   static const fdesc emptyfdesc;
1666   fdesc *fdp;
1667
1668   /* Create a new input file description entry. */
1669   fdp = xnew (1, fdesc);
1670   *fdp = emptyfdesc;
1671   fdp->next = fdhead;
1672   fdp->infname = savestr (fn);
1673   fdp->lang = lang;
1674   fdp->infabsname = absolute_filename (fn, cwd);
1675   fdp->infabsdir = absolute_dirname (fn, cwd);
1676   if (filename_is_absolute (fn))
1677     {
1678       /* An absolute file name.  Canonicalize it. */
1679       fdp->taggedfname = absolute_filename (fn, NULL);
1680     }
1681   else
1682     {
1683       /* A file name relative to cwd.  Make it relative
1684          to the directory of the tags file. */
1685       fdp->taggedfname = relative_filename (fn, tagfiledir);
1686     }
1687   fdp->usecharno = TRUE;        /* use char position when making tags */
1688   fdp->prop = NULL;
1689   fdp->written = FALSE;         /* not written on tags file yet */
1690
1691   fdhead = fdp;
1692   curfdp = fdhead;              /* the current file description */
1693
1694   find_entries (fh);
1695
1696   /* If not Ctags, and if this is not metasource and if it contained no #line
1697      directives, we can write the tags and free all nodes pointing to
1698      curfdp. */
1699   if (!CTAGS
1700       && curfdp->usecharno      /* no #line directives in this file */
1701       && !curfdp->lang->metasource)
1702     {
1703       node *np, *prev;
1704
1705       /* Look for the head of the sublist relative to this file.  See add_node
1706          for the structure of the node tree. */
1707       prev = NULL;
1708       for (np = nodehead; np != NULL; prev = np, np = np->left)
1709         if (np->fdp == curfdp)
1710           break;
1711
1712       /* If we generated tags for this file, write and delete them. */
1713       if (np != NULL)
1714         {
1715           /* This is the head of the last sublist, if any.  The following
1716              instructions depend on this being true. */
1717           assert (np->left == NULL);
1718
1719           assert (fdhead == curfdp);
1720           assert (last_node->fdp == curfdp);
1721           put_entries (np);     /* write tags for file curfdp->taggedfname */
1722           free_tree (np);       /* remove the written nodes */
1723           if (prev == NULL)
1724             nodehead = NULL;    /* no nodes left */
1725           else
1726             prev->left = NULL;  /* delete the pointer to the sublist */
1727         }
1728     }
1729 }
1730
1731 /*
1732  * This routine sets up the boolean pseudo-functions which work
1733  * by setting boolean flags dependent upon the corresponding character.
1734  * Every char which is NOT in that string is not a white char.  Therefore,
1735  * all of the array "_wht" is set to FALSE, and then the elements
1736  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1737  * of a char is TRUE if it is the string "white", else FALSE.
1738  */
1739 static void
1740 init ()
1741 {
1742   register char *sp;
1743   register int i;
1744
1745   for (i = 0; i < CHARS; i++)
1746     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1747   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1748   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1749   notinname('\0') = notinname('\n');
1750   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1751   begtoken('\0') = begtoken('\n');
1752   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1753   intoken('\0') = intoken('\n');
1754   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1755   endtoken('\0') = endtoken('\n');
1756 }
1757
1758 /*
1759  * This routine opens the specified file and calls the function
1760  * which finds the function and type definitions.
1761  */
1762 static void
1763 find_entries (inf)
1764      FILE *inf;
1765 {
1766   char *cp;
1767   language *lang = curfdp->lang;
1768   Lang_function *parser = NULL;
1769
1770   /* If user specified a language, use it. */
1771   if (lang != NULL && lang->function != NULL)
1772     {
1773       parser = lang->function;
1774     }
1775
1776   /* Else try to guess the language given the file name. */
1777   if (parser == NULL)
1778     {
1779       lang = get_language_from_filename (curfdp->infname, TRUE);
1780       if (lang != NULL && lang->function != NULL)
1781         {
1782           curfdp->lang = lang;
1783           parser = lang->function;
1784         }
1785     }
1786
1787   /* Else look for sharp-bang as the first two characters. */
1788   if (parser == NULL
1789       && readline_internal (&lb, inf) > 0
1790       && lb.len >= 2
1791       && lb.buffer[0] == '#'
1792       && lb.buffer[1] == '!')
1793     {
1794       char *lp;
1795
1796       /* Set lp to point at the first char after the last slash in the
1797          line or, if no slashes, at the first nonblank.  Then set cp to
1798          the first successive blank and terminate the string. */
1799       lp = etags_strrchr (lb.buffer+2, '/');
1800       if (lp != NULL)
1801         lp += 1;
1802       else
1803         lp = skip_spaces (lb.buffer + 2);
1804       cp = skip_non_spaces (lp);
1805       *cp = '\0';
1806
1807       if (strlen (lp) > 0)
1808         {
1809           lang = get_language_from_interpreter (lp);
1810           if (lang != NULL && lang->function != NULL)
1811             {
1812               curfdp->lang = lang;
1813               parser = lang->function;
1814             }
1815         }
1816     }
1817
1818   /* We rewind here, even if inf may be a pipe.  We fail if the
1819      length of the first line is longer than the pipe block size,
1820      which is unlikely. */
1821   rewind (inf);
1822
1823   /* Else try to guess the language given the case insensitive file name. */
1824   if (parser == NULL)
1825     {
1826       lang = get_language_from_filename (curfdp->infname, FALSE);
1827       if (lang != NULL && lang->function != NULL)
1828         {
1829           curfdp->lang = lang;
1830           parser = lang->function;
1831         }
1832     }
1833
1834   /* Else try Fortran or C. */
1835   if (parser == NULL)
1836     {
1837       node *old_last_node = last_node;
1838
1839       curfdp->lang = get_language_from_langname ("fortran");
1840       find_entries (inf);
1841
1842       if (old_last_node == last_node)
1843         /* No Fortran entries found.  Try C. */
1844         {
1845           /* We do not tag if rewind fails.
1846              Only the file name will be recorded in the tags file. */
1847           rewind (inf);
1848           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1849           find_entries (inf);
1850         }
1851       return;
1852     }
1853
1854   if (!no_line_directive
1855       && curfdp->lang != NULL && curfdp->lang->metasource)
1856     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1857        file, or anyway we parsed a file that is automatically generated from
1858        this one.  If this is the case, the bingo.c file contained #line
1859        directives that generated tags pointing to this file.  Let's delete
1860        them all before parsing this file, which is the real source. */
1861     {
1862       fdesc **fdpp = &fdhead;
1863       while (*fdpp != NULL)
1864         if (*fdpp != curfdp
1865             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1866           /* We found one of those!  We must delete both the file description
1867              and all tags referring to it. */
1868           {
1869             fdesc *badfdp = *fdpp;
1870
1871             /* Delete the tags referring to badfdp->taggedfname
1872                that were obtained from badfdp->infname. */
1873             invalidate_nodes (badfdp, &nodehead);
1874
1875             *fdpp = badfdp->next; /* remove the bad description from the list */
1876             free_fdesc (badfdp);
1877           }
1878         else
1879           fdpp = &(*fdpp)->next; /* advance the list pointer */
1880     }
1881
1882   assert (parser != NULL);
1883
1884   /* Generic initialisations before reading from file. */
1885   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1886
1887   /* Generic initialisations before parsing file with readline. */
1888   lineno = 0;                  /* reset global line number */
1889   charno = 0;                  /* reset global char number */
1890   linecharno = 0;              /* reset global char number of line start */
1891
1892   parser (inf);
1893
1894   regex_tag_multiline ();
1895 }
1896
1897 \f
1898 /*
1899  * Check whether an implicitly named tag should be created,
1900  * then call `pfnote'.
1901  * NAME is a string that is internally copied by this function.
1902  *
1903  * TAGS format specification
1904  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1905  * The following is explained in some more detail in etc/ETAGS.EBNF.
1906  *
1907  * make_tag creates tags with "implicit tag names" (unnamed tags)
1908  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1909  *  1. NAME does not contain any of the characters in NONAM;
1910  *  2. LINESTART contains name as either a rightmost, or rightmost but
1911  *     one character, substring;
1912  *  3. the character, if any, immediately before NAME in LINESTART must
1913  *     be a character in NONAM;
1914  *  4. the character, if any, immediately after NAME in LINESTART must
1915  *     also be a character in NONAM.
1916  *
1917  * The implementation uses the notinname() macro, which recognises the
1918  * characters stored in the string `nonam'.
1919  * etags.el needs to use the same characters that are in NONAM.
1920  */
1921 static void
1922 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1923      char *name;                /* tag name, or NULL if unnamed */
1924      int namelen;               /* tag length */
1925      bool is_func;              /* tag is a function */
1926      char *linestart;           /* start of the line where tag is */
1927      int linelen;               /* length of the line where tag is */
1928      int lno;                   /* line number */
1929      long cno;                  /* character number */
1930 {
1931   bool named = (name != NULL && namelen > 0);
1932
1933   if (!CTAGS && named)          /* maybe set named to false */
1934     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1935        such that etags.el can guess a name from it. */
1936     {
1937       int i;
1938       register char *cp = name;
1939
1940       for (i = 0; i < namelen; i++)
1941         if (notinname (*cp++))
1942           break;
1943       if (i == namelen)                         /* rule #1 */
1944         {
1945           cp = linestart + linelen - namelen;
1946           if (notinname (linestart[linelen-1]))
1947             cp -= 1;                            /* rule #4 */
1948           if (cp >= linestart                   /* rule #2 */
1949               && (cp == linestart
1950                   || notinname (cp[-1]))        /* rule #3 */
1951               && strneq (name, cp, namelen))    /* rule #2 */
1952             named = FALSE;      /* use implicit tag name */
1953         }
1954     }
1955
1956   if (named)
1957     name = savenstr (name, namelen);
1958   else
1959     name = NULL;
1960   pfnote (name, is_func, linestart, linelen, lno, cno);
1961 }
1962
1963 /* Record a tag. */
1964 static void
1965 pfnote (name, is_func, linestart, linelen, lno, cno)
1966      char *name;                /* tag name, or NULL if unnamed */
1967      bool is_func;              /* tag is a function */
1968      char *linestart;           /* start of the line where tag is */
1969      int linelen;               /* length of the line where tag is */
1970      int lno;                   /* line number */
1971      long cno;                  /* character number */
1972 {
1973   register node *np;
1974
1975   assert (name == NULL || name[0] != '\0');
1976   if (CTAGS && name == NULL)
1977     return;
1978
1979   np = xnew (1, node);
1980
1981   /* If ctags mode, change name "main" to M<thisfilename>. */
1982   if (CTAGS && !cxref_style && streq (name, "main"))
1983     {
1984       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1985       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1986       fp = etags_strrchr (np->name, '.');
1987       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1988         fp[0] = '\0';
1989     }
1990   else
1991     np->name = name;
1992   np->valid = TRUE;
1993   np->been_warned = FALSE;
1994   np->fdp = curfdp;
1995   np->is_func = is_func;
1996   np->lno = lno;
1997   if (np->fdp->usecharno)
1998     /* Our char numbers are 0-base, because of C language tradition?
1999        ctags compatibility?  old versions compatibility?   I don't know.
2000        Anyway, since emacs's are 1-base we expect etags.el to take care
2001        of the difference.  If we wanted to have 1-based numbers, we would
2002        uncomment the +1 below. */
2003     np->cno = cno /* + 1 */ ;
2004   else
2005     np->cno = invalidcharno;
2006   np->left = np->right = NULL;
2007   if (CTAGS && !cxref_style)
2008     {
2009       if (strlen (linestart) < 50)
2010         np->regex = concat (linestart, "$", "");
2011       else
2012         np->regex = savenstr (linestart, 50);
2013     }
2014   else
2015     np->regex = savenstr (linestart, linelen);
2016
2017   add_node (np, &nodehead);
2018 }
2019
2020 /*
2021  * free_tree ()
2022  *      recurse on left children, iterate on right children.
2023  */
2024 static void
2025 free_tree (np)
2026      register node *np;
2027 {
2028   while (np)
2029     {
2030       register node *node_right = np->right;
2031       free_tree (np->left);
2032       free (np->name);
2033       free (np->regex);
2034       free (np);
2035       np = node_right;
2036     }
2037 }
2038
2039 /*
2040  * free_fdesc ()
2041  *      delete a file description
2042  */
2043 static void
2044 free_fdesc (fdp)
2045      register fdesc *fdp;
2046 {
2047   free (fdp->infname);
2048   free (fdp->infabsname);
2049   free (fdp->infabsdir);
2050   free (fdp->taggedfname);
2051   free (fdp->prop);
2052   free (fdp);
2053 }
2054
2055 /*
2056  * add_node ()
2057  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2058  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2059  *      balancing.
2060  *
2061  *      add_node is the only function allowed to add nodes, so it can
2062  *      maintain state.
2063  */
2064 static void
2065 add_node (np, cur_node_p)
2066      node *np, **cur_node_p;
2067 {
2068   register int dif;
2069   register node *cur_node = *cur_node_p;
2070
2071   if (cur_node == NULL)
2072     {
2073       *cur_node_p = np;
2074       last_node = np;
2075       return;
2076     }
2077
2078   if (!CTAGS)
2079     /* Etags Mode */
2080     {
2081       /* For each file name, tags are in a linked sublist on the right
2082          pointer.  The first tags of different files are a linked list
2083          on the left pointer.  last_node points to the end of the last
2084          used sublist. */
2085       if (last_node != NULL && last_node->fdp == np->fdp)
2086         {
2087           /* Let's use the same sublist as the last added node. */
2088           assert (last_node->right == NULL);
2089           last_node->right = np;
2090           last_node = np;
2091         }
2092       else if (cur_node->fdp == np->fdp)
2093         {
2094           /* Scanning the list we found the head of a sublist which is
2095              good for us.  Let's scan this sublist. */
2096           add_node (np, &cur_node->right);
2097         }
2098       else
2099         /* The head of this sublist is not good for us.  Let's try the
2100            next one. */
2101         add_node (np, &cur_node->left);
2102     } /* if ETAGS mode */
2103
2104   else
2105     {
2106       /* Ctags Mode */
2107       dif = strcmp (np->name, cur_node->name);
2108
2109       /*
2110        * If this tag name matches an existing one, then
2111        * do not add the node, but maybe print a warning.
2112        */
2113       if (no_duplicates && !dif)
2114         {
2115           if (np->fdp == cur_node->fdp)
2116             {
2117               if (!no_warnings)
2118                 {
2119                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2120                            np->fdp->infname, lineno, np->name);
2121                   fprintf (stderr, "Second entry ignored\n");
2122                 }
2123             }
2124           else if (!cur_node->been_warned && !no_warnings)
2125             {
2126               fprintf
2127                 (stderr,
2128                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2129                  np->fdp->infname, cur_node->fdp->infname, np->name);
2130               cur_node->been_warned = TRUE;
2131             }
2132           return;
2133         }
2134
2135       /* Actually add the node */
2136       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2137     } /* if CTAGS mode */
2138 }
2139
2140 /*
2141  * invalidate_nodes ()
2142  *      Scan the node tree and invalidate all nodes pointing to the
2143  *      given file description (CTAGS case) or free them (ETAGS case).
2144  */
2145 static void
2146 invalidate_nodes (badfdp, npp)
2147      fdesc *badfdp;
2148      node **npp;
2149 {
2150   node *np = *npp;
2151
2152   if (np == NULL)
2153     return;
2154
2155   if (CTAGS)
2156     {
2157       if (np->left != NULL)
2158         invalidate_nodes (badfdp, &np->left);
2159       if (np->fdp == badfdp)
2160         np->valid = FALSE;
2161       if (np->right != NULL)
2162         invalidate_nodes (badfdp, &np->right);
2163     }
2164   else
2165     {
2166       assert (np->fdp != NULL);
2167       if (np->fdp == badfdp)
2168         {
2169           *npp = np->left;      /* detach the sublist from the list */
2170           np->left = NULL;      /* isolate it */
2171           free_tree (np);       /* free it */
2172           invalidate_nodes (badfdp, npp);
2173         }
2174       else
2175         invalidate_nodes (badfdp, &np->left);
2176     }
2177 }
2178
2179 \f
2180 static int total_size_of_entries __P((node *));
2181 static int number_len __P((long));
2182
2183 /* Length of a non-negative number's decimal representation. */
2184 static int
2185 number_len (num)
2186      long num;
2187 {
2188   int len = 1;
2189   while ((num /= 10) > 0)
2190     len += 1;
2191   return len;
2192 }
2193
2194 /*
2195  * Return total number of characters that put_entries will output for
2196  * the nodes in the linked list at the right of the specified node.
2197  * This count is irrelevant with etags.el since emacs 19.34 at least,
2198  * but is still supplied for backward compatibility.
2199  */
2200 static int
2201 total_size_of_entries (np)
2202      register node *np;
2203 {
2204   register int total = 0;
2205
2206   for (; np != NULL; np = np->right)
2207     if (np->valid)
2208       {
2209         total += strlen (np->regex) + 1;                /* pat\177 */
2210         if (np->name != NULL)
2211           total += strlen (np->name) + 1;               /* name\001 */
2212         total += number_len ((long) np->lno) + 1;       /* lno, */
2213         if (np->cno != invalidcharno)                   /* cno */
2214           total += number_len (np->cno);
2215         total += 1;                                     /* newline */
2216       }
2217
2218   return total;
2219 }
2220
2221 static void
2222 put_entries (np)
2223      register node *np;
2224 {
2225   register char *sp;
2226   static fdesc *fdp = NULL;
2227
2228   if (np == NULL)
2229     return;
2230
2231   /* Output subentries that precede this one */
2232   if (CTAGS)
2233     put_entries (np->left);
2234
2235   /* Output this entry */
2236   if (np->valid)
2237     {
2238       if (!CTAGS)
2239         {
2240           /* Etags mode */
2241           if (fdp != np->fdp)
2242             {
2243               fdp = np->fdp;
2244               fprintf (tagf, "\f\n%s,%d\n",
2245                        fdp->taggedfname, total_size_of_entries (np));
2246               fdp->written = TRUE;
2247             }
2248           fputs (np->regex, tagf);
2249           fputc ('\177', tagf);
2250           if (np->name != NULL)
2251             {
2252               fputs (np->name, tagf);
2253               fputc ('\001', tagf);
2254             }
2255           fprintf (tagf, "%d,", np->lno);
2256           if (np->cno != invalidcharno)
2257             fprintf (tagf, "%ld", np->cno);
2258           fputs ("\n", tagf);
2259         }
2260       else
2261         {
2262           /* Ctags mode */
2263           if (np->name == NULL)
2264             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2265
2266           if (cxref_style)
2267             {
2268               if (vgrind_style)
2269                 fprintf (stdout, "%s %s %d\n",
2270                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2271               else
2272                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2273                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2274             }
2275           else
2276             {
2277               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2278
2279               if (np->is_func)
2280                 {               /* function or #define macro with args */
2281                   putc (searchar, tagf);
2282                   putc ('^', tagf);
2283
2284                   for (sp = np->regex; *sp; sp++)
2285                     {
2286                       if (*sp == '\\' || *sp == searchar)
2287                         putc ('\\', tagf);
2288                       putc (*sp, tagf);
2289                     }
2290                   putc (searchar, tagf);
2291                 }
2292               else
2293                 {               /* anything else; text pattern inadequate */
2294                   fprintf (tagf, "%d", np->lno);
2295                 }
2296               putc ('\n', tagf);
2297             }
2298         }
2299     } /* if this node contains a valid tag */
2300
2301   /* Output subentries that follow this one */
2302   put_entries (np->right);
2303   if (!CTAGS)
2304     put_entries (np->left);
2305 }
2306
2307 \f
2308 /* C extensions. */
2309 #define C_EXT   0x00fff         /* C extensions */
2310 #define C_PLAIN 0x00000         /* C */
2311 #define C_PLPL  0x00001         /* C++ */
2312 #define C_STAR  0x00003         /* C* */
2313 #define C_JAVA  0x00005         /* JAVA */
2314 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2315 #define YACC    0x10000         /* yacc file */
2316
2317 /*
2318  * The C symbol tables.
2319  */
2320 enum sym_type
2321 {
2322   st_none,
2323   st_C_objprot, st_C_objimpl, st_C_objend,
2324   st_C_gnumacro,
2325   st_C_ignore, st_C_attribute,
2326   st_C_javastruct,
2327   st_C_operator,
2328   st_C_class, st_C_template,
2329   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2330 };
2331
2332 static unsigned int hash __P((const char *, unsigned int));
2333 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2334 static enum sym_type C_symtype __P((char *, int, int));
2335
2336 /* Feed stuff between (but not including) %[ and %] lines to:
2337      gperf -m 5
2338 %[
2339 %compare-strncmp
2340 %enum
2341 %struct-type
2342 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2343 %%
2344 if,             0,                      st_C_ignore
2345 for,            0,                      st_C_ignore
2346 while,          0,                      st_C_ignore
2347 switch,         0,                      st_C_ignore
2348 return,         0,                      st_C_ignore
2349 __attribute__,  0,                      st_C_attribute
2350 GTY,            0,                      st_C_attribute
2351 @interface,     0,                      st_C_objprot
2352 @protocol,      0,                      st_C_objprot
2353 @implementation,0,                      st_C_objimpl
2354 @end,           0,                      st_C_objend
2355 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2356 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2357 friend,         C_PLPL,                 st_C_ignore
2358 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2359 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2360 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2361 class,          0,                      st_C_class
2362 namespace,      C_PLPL,                 st_C_struct
2363 domain,         C_STAR,                 st_C_struct
2364 union,          0,                      st_C_struct
2365 struct,         0,                      st_C_struct
2366 extern,         0,                      st_C_extern
2367 enum,           0,                      st_C_enum
2368 typedef,        0,                      st_C_typedef
2369 define,         0,                      st_C_define
2370 undef,          0,                      st_C_define
2371 operator,       C_PLPL,                 st_C_operator
2372 template,       0,                      st_C_template
2373 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2374 DEFUN,          0,                      st_C_gnumacro
2375 SYSCALL,        0,                      st_C_gnumacro
2376 ENTRY,          0,                      st_C_gnumacro
2377 PSEUDO,         0,                      st_C_gnumacro
2378 # These are defined inside C functions, so currently they are not met.
2379 # EXFUN used in glibc, DEFVAR_* in emacs.
2380 #EXFUN,         0,                      st_C_gnumacro
2381 #DEFVAR_,       0,                      st_C_gnumacro
2382 %]
2383 and replace lines between %< and %> with its output, then:
2384  - remove the #if characterset check
2385  - make in_word_set static and not inline. */
2386 /*%<*/
2387 /* C code produced by gperf version 3.0.1 */
2388 /* Command-line: gperf -m 5  */
2389 /* Computed positions: -k'2-3' */
2390
2391 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2392 /* maximum key range = 33, duplicates = 0 */
2393
2394 #ifdef __GNUC__
2395 __inline
2396 #else
2397 #ifdef __cplusplus
2398 inline
2399 #endif
2400 #endif
2401 static unsigned int
2402 hash (str, len)
2403      register const char *str;
2404      register unsigned int len;
2405 {
2406   static unsigned char asso_values[] =
2407     {
2408       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2409       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2410       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2411       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2412       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2413       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2414       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2415       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2416       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2417       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2418       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2419        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2420        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2421       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2422       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2423       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2424       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2425       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2426       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2427       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2428       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2429       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2430       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2431       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2432       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2433       35, 35, 35, 35, 35, 35
2434     };
2435   register int hval = len;
2436
2437   switch (hval)
2438     {
2439       default:
2440         hval += asso_values[(unsigned char)str[2]];
2441       /*FALLTHROUGH*/
2442       case 2:
2443         hval += asso_values[(unsigned char)str[1]];
2444         break;
2445     }
2446   return hval;
2447 }
2448
2449 static struct C_stab_entry *
2450 in_word_set (str, len)
2451      register const char *str;
2452      register unsigned int len;
2453 {
2454   enum
2455     {
2456       TOTAL_KEYWORDS = 33,
2457       MIN_WORD_LENGTH = 2,
2458       MAX_WORD_LENGTH = 15,
2459       MIN_HASH_VALUE = 2,
2460       MAX_HASH_VALUE = 34
2461     };
2462
2463   static struct C_stab_entry wordlist[] =
2464     {
2465       {""}, {""},
2466       {"if",            0,                      st_C_ignore},
2467       {"GTY",           0,                      st_C_attribute},
2468       {"@end",          0,                      st_C_objend},
2469       {"union",         0,                      st_C_struct},
2470       {"define",                0,                      st_C_define},
2471       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2472       {"template",      0,                      st_C_template},
2473       {"operator",      C_PLPL,                 st_C_operator},
2474       {"@interface",    0,                      st_C_objprot},
2475       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2476       {"friend",                C_PLPL,                 st_C_ignore},
2477       {"typedef",       0,                      st_C_typedef},
2478       {"return",                0,                      st_C_ignore},
2479       {"@implementation",0,                     st_C_objimpl},
2480       {"@protocol",     0,                      st_C_objprot},
2481       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2482       {"extern",                0,                      st_C_extern},
2483       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2484       {"struct",                0,                      st_C_struct},
2485       {"domain",                C_STAR,                 st_C_struct},
2486       {"switch",                0,                      st_C_ignore},
2487       {"enum",          0,                      st_C_enum},
2488       {"for",           0,                      st_C_ignore},
2489       {"namespace",     C_PLPL,                 st_C_struct},
2490       {"class",         0,                      st_C_class},
2491       {"while",         0,                      st_C_ignore},
2492       {"undef",         0,                      st_C_define},
2493       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2494       {"__attribute__", 0,                      st_C_attribute},
2495       {"SYSCALL",       0,                      st_C_gnumacro},
2496       {"ENTRY",         0,                      st_C_gnumacro},
2497       {"PSEUDO",                0,                      st_C_gnumacro},
2498       {"DEFUN",         0,                      st_C_gnumacro}
2499     };
2500
2501   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2502     {
2503       register int key = hash (str, len);
2504
2505       if (key <= MAX_HASH_VALUE && key >= 0)
2506         {
2507           register const char *s = wordlist[key].name;
2508
2509           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2510             return &wordlist[key];
2511         }
2512     }
2513   return 0;
2514 }
2515 /*%>*/
2516
2517 static enum sym_type
2518 C_symtype (str, len, c_ext)
2519      char *str;
2520      int len;
2521      int c_ext;
2522 {
2523   register struct C_stab_entry *se = in_word_set (str, len);
2524
2525   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2526     return st_none;
2527   return se->type;
2528 }
2529
2530 \f
2531 /*
2532  * Ignoring __attribute__ ((list))
2533  */
2534 static bool inattribute;        /* looking at an __attribute__ construct */
2535
2536 /*
2537  * C functions and variables are recognized using a simple
2538  * finite automaton.  fvdef is its state variable.
2539  */
2540 static enum
2541 {
2542   fvnone,                       /* nothing seen */
2543   fdefunkey,                    /* Emacs DEFUN keyword seen */
2544   fdefunname,                   /* Emacs DEFUN name seen */
2545   foperator,                    /* func: operator keyword seen (cplpl) */
2546   fvnameseen,                   /* function or variable name seen */
2547   fstartlist,                   /* func: just after open parenthesis */
2548   finlist,                      /* func: in parameter list */
2549   flistseen,                    /* func: after parameter list */
2550   fignore,                      /* func: before open brace */
2551   vignore                       /* var-like: ignore until ';' */
2552 } fvdef;
2553
2554 static bool fvextern;           /* func or var: extern keyword seen; */
2555
2556 /*
2557  * typedefs are recognized using a simple finite automaton.
2558  * typdef is its state variable.
2559  */
2560 static enum
2561 {
2562   tnone,                        /* nothing seen */
2563   tkeyseen,                     /* typedef keyword seen */
2564   ttypeseen,                    /* defined type seen */
2565   tinbody,                      /* inside typedef body */
2566   tend,                         /* just before typedef tag */
2567   tignore                       /* junk after typedef tag */
2568 } typdef;
2569
2570 /*
2571  * struct-like structures (enum, struct and union) are recognized
2572  * using another simple finite automaton.  `structdef' is its state
2573  * variable.
2574  */
2575 static enum
2576 {
2577   snone,                        /* nothing seen yet,
2578                                    or in struct body if bracelev > 0 */
2579   skeyseen,                     /* struct-like keyword seen */
2580   stagseen,                     /* struct-like tag seen */
2581   scolonseen                    /* colon seen after struct-like tag */
2582 } structdef;
2583
2584 /*
2585  * When objdef is different from onone, objtag is the name of the class.
2586  */
2587 static char *objtag = "<uninited>";
2588
2589 /*
2590  * Yet another little state machine to deal with preprocessor lines.
2591  */
2592 static enum
2593 {
2594   dnone,                        /* nothing seen */
2595   dsharpseen,                   /* '#' seen as first char on line */
2596   ddefineseen,                  /* '#' and 'define' seen */
2597   dignorerest                   /* ignore rest of line */
2598 } definedef;
2599
2600 /*
2601  * State machine for Objective C protocols and implementations.
2602  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2603  */
2604 static enum
2605 {
2606   onone,                        /* nothing seen */
2607   oprotocol,                    /* @interface or @protocol seen */
2608   oimplementation,              /* @implementations seen */
2609   otagseen,                     /* class name seen */
2610   oparenseen,                   /* parenthesis before category seen */
2611   ocatseen,                     /* category name seen */
2612   oinbody,                      /* in @implementation body */
2613   omethodsign,                  /* in @implementation body, after +/- */
2614   omethodtag,                   /* after method name */
2615   omethodcolon,                 /* after method colon */
2616   omethodparm,                  /* after method parameter */
2617   oignore                       /* wait for @end */
2618 } objdef;
2619
2620
2621 /*
2622  * Use this structure to keep info about the token read, and how it
2623  * should be tagged.  Used by the make_C_tag function to build a tag.
2624  */
2625 static struct tok
2626 {
2627   char *line;                   /* string containing the token */
2628   int offset;                   /* where the token starts in LINE */
2629   int length;                   /* token length */
2630   /*
2631     The previous members can be used to pass strings around for generic
2632     purposes.  The following ones specifically refer to creating tags.  In this
2633     case the token contained here is the pattern that will be used to create a
2634     tag.
2635   */
2636   bool valid;                   /* do not create a tag; the token should be
2637                                    invalidated whenever a state machine is
2638                                    reset prematurely */
2639   bool named;                   /* create a named tag */
2640   int lineno;                   /* source line number of tag */
2641   long linepos;                 /* source char number of tag */
2642 } token;                        /* latest token read */
2643
2644 /*
2645  * Variables and functions for dealing with nested structures.
2646  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2647  */
2648 static void pushclass_above __P((int, char *, int));
2649 static void popclass_above __P((int));
2650 static void write_classname __P((linebuffer *, char *qualifier));
2651
2652 static struct {
2653   char **cname;                 /* nested class names */
2654   int *bracelev;                /* nested class brace level */
2655   int nl;                       /* class nesting level (elements used) */
2656   int size;                     /* length of the array */
2657 } cstack;                       /* stack for nested declaration tags */
2658 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2659 #define nestlev         (cstack.nl)
2660 /* After struct keyword or in struct body, not inside a nested function. */
2661 #define instruct        (structdef == snone && nestlev > 0                      \
2662                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2663
2664 static void
2665 pushclass_above (bracelev, str, len)
2666      int bracelev;
2667      char *str;
2668      int len;
2669 {
2670   int nl;
2671
2672   popclass_above (bracelev);
2673   nl = cstack.nl;
2674   if (nl >= cstack.size)
2675     {
2676       int size = cstack.size *= 2;
2677       xrnew (cstack.cname, size, char *);
2678       xrnew (cstack.bracelev, size, int);
2679     }
2680   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2681   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2682   cstack.bracelev[nl] = bracelev;
2683   cstack.nl = nl + 1;
2684 }
2685
2686 static void
2687 popclass_above (bracelev)
2688      int bracelev;
2689 {
2690   int nl;
2691
2692   for (nl = cstack.nl - 1;
2693        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2694        nl--)
2695     {
2696       free (cstack.cname[nl]);
2697       cstack.nl = nl;
2698     }
2699 }
2700
2701 static void
2702 write_classname (cn, qualifier)
2703      linebuffer *cn;
2704      char *qualifier;
2705 {
2706   int i, len;
2707   int qlen = strlen (qualifier);
2708
2709   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2710     {
2711       len = 0;
2712       cn->len = 0;
2713       cn->buffer[0] = '\0';
2714     }
2715   else
2716     {
2717       len = strlen (cstack.cname[0]);
2718       linebuffer_setlen (cn, len);
2719       strcpy (cn->buffer, cstack.cname[0]);
2720     }
2721   for (i = 1; i < cstack.nl; i++)
2722     {
2723       char *s;
2724       int slen;
2725
2726       s = cstack.cname[i];
2727       if (s == NULL)
2728         continue;
2729       slen = strlen (s);
2730       len += slen + qlen;
2731       linebuffer_setlen (cn, len);
2732       strncat (cn->buffer, qualifier, qlen);
2733       strncat (cn->buffer, s, slen);
2734     }
2735 }
2736
2737 \f
2738 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2739 static void make_C_tag __P((bool));
2740
2741 /*
2742  * consider_token ()
2743  *      checks to see if the current token is at the start of a
2744  *      function or variable, or corresponds to a typedef, or
2745  *      is a struct/union/enum tag, or #define, or an enum constant.
2746  *
2747  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2748  *      with args.  C_EXTP points to which language we are looking at.
2749  *
2750  * Globals
2751  *      fvdef                   IN OUT
2752  *      structdef               IN OUT
2753  *      definedef               IN OUT
2754  *      typdef                  IN OUT
2755  *      objdef                  IN OUT
2756  */
2757
2758 static bool
2759 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2760      register char *str;        /* IN: token pointer */
2761      register int len;          /* IN: token length */
2762      register int c;            /* IN: first char after the token */
2763      int *c_extp;               /* IN, OUT: C extensions mask */
2764      int bracelev;              /* IN: brace level */
2765      int parlev;                /* IN: parenthesis level */
2766      bool *is_func_or_var;      /* OUT: function or variable found */
2767 {
2768   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2769      structtype is the type of the preceding struct-like keyword, and
2770      structbracelev is the brace level where it has been seen. */
2771   static enum sym_type structtype;
2772   static int structbracelev;
2773   static enum sym_type toktype;
2774
2775
2776   toktype = C_symtype (str, len, *c_extp);
2777
2778   /*
2779    * Skip __attribute__
2780    */
2781   if (toktype == st_C_attribute)
2782     {
2783       inattribute = TRUE;
2784       return FALSE;
2785      }
2786
2787    /*
2788     * Advance the definedef state machine.
2789     */
2790    switch (definedef)
2791      {
2792      case dnone:
2793        /* We're not on a preprocessor line. */
2794        if (toktype == st_C_gnumacro)
2795          {
2796            fvdef = fdefunkey;
2797            return FALSE;
2798          }
2799        break;
2800      case dsharpseen:
2801        if (toktype == st_C_define)
2802          {
2803            definedef = ddefineseen;
2804          }
2805        else
2806          {
2807            definedef = dignorerest;
2808          }
2809        return FALSE;
2810      case ddefineseen:
2811        /*
2812         * Make a tag for any macro, unless it is a constant
2813         * and constantypedefs is FALSE.
2814         */
2815        definedef = dignorerest;
2816        *is_func_or_var = (c == '(');
2817        if (!*is_func_or_var && !constantypedefs)
2818          return FALSE;
2819        else
2820          return TRUE;
2821      case dignorerest:
2822        return FALSE;
2823      default:
2824        error ("internal error: definedef value.", (char *)NULL);
2825      }
2826
2827    /*
2828     * Now typedefs
2829     */
2830    switch (typdef)
2831      {
2832      case tnone:
2833        if (toktype == st_C_typedef)
2834          {
2835            if (typedefs)
2836              typdef = tkeyseen;
2837            fvextern = FALSE;
2838            fvdef = fvnone;
2839            return FALSE;
2840          }
2841        break;
2842      case tkeyseen:
2843        switch (toktype)
2844          {
2845          case st_none:
2846          case st_C_class:
2847          case st_C_struct:
2848          case st_C_enum:
2849            typdef = ttypeseen;
2850          }
2851        break;
2852      case ttypeseen:
2853        if (structdef == snone && fvdef == fvnone)
2854          {
2855            fvdef = fvnameseen;
2856            return TRUE;
2857          }
2858        break;
2859      case tend:
2860        switch (toktype)
2861          {
2862          case st_C_class:
2863          case st_C_struct:
2864          case st_C_enum:
2865            return FALSE;
2866          }
2867        return TRUE;
2868      }
2869
2870    switch (toktype)
2871      {
2872      case st_C_javastruct:
2873        if (structdef == stagseen)
2874          structdef = scolonseen;
2875        return FALSE;
2876      case st_C_template:
2877      case st_C_class:
2878        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2879            && bracelev == 0
2880            && definedef == dnone && structdef == snone
2881            && typdef == tnone && fvdef == fvnone)
2882          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2883        if (toktype == st_C_template)
2884          break;
2885        /* FALLTHRU */
2886      case st_C_struct:
2887      case st_C_enum:
2888        if (parlev == 0
2889            && fvdef != vignore
2890            && (typdef == tkeyseen
2891                || (typedefs_or_cplusplus && structdef == snone)))
2892          {
2893            structdef = skeyseen;
2894            structtype = toktype;
2895            structbracelev = bracelev;
2896            if (fvdef == fvnameseen)
2897              fvdef = fvnone;
2898          }
2899        return FALSE;
2900      }
2901
2902    if (structdef == skeyseen)
2903      {
2904        structdef = stagseen;
2905        return TRUE;
2906      }
2907
2908    if (typdef != tnone)
2909      definedef = dnone;
2910
2911    /* Detect Objective C constructs. */
2912    switch (objdef)
2913      {
2914      case onone:
2915        switch (toktype)
2916          {
2917          case st_C_objprot:
2918            objdef = oprotocol;
2919            return FALSE;
2920          case st_C_objimpl:
2921            objdef = oimplementation;
2922            return FALSE;
2923          }
2924        break;
2925      case oimplementation:
2926        /* Save the class tag for functions or variables defined inside. */
2927        objtag = savenstr (str, len);
2928        objdef = oinbody;
2929        return FALSE;
2930      case oprotocol:
2931        /* Save the class tag for categories. */
2932        objtag = savenstr (str, len);
2933        objdef = otagseen;
2934        *is_func_or_var = TRUE;
2935        return TRUE;
2936      case oparenseen:
2937        objdef = ocatseen;
2938        *is_func_or_var = TRUE;
2939        return TRUE;
2940      case oinbody:
2941        break;
2942      case omethodsign:
2943        if (parlev == 0)
2944          {
2945            fvdef = fvnone;
2946            objdef = omethodtag;
2947            linebuffer_setlen (&token_name, len);
2948            strncpy (token_name.buffer, str, len);
2949            token_name.buffer[len] = '\0';
2950            return TRUE;
2951          }
2952        return FALSE;
2953      case omethodcolon:
2954        if (parlev == 0)
2955          objdef = omethodparm;
2956        return FALSE;
2957      case omethodparm:
2958        if (parlev == 0)
2959          {
2960            fvdef = fvnone;
2961            objdef = omethodtag;
2962            linebuffer_setlen (&token_name, token_name.len + len);
2963            strncat (token_name.buffer, str, len);
2964            return TRUE;
2965          }
2966        return FALSE;
2967      case oignore:
2968        if (toktype == st_C_objend)
2969          {
2970            /* Memory leakage here: the string pointed by objtag is
2971               never released, because many tests would be needed to
2972               avoid breaking on incorrect input code.  The amount of
2973               memory leaked here is the sum of the lengths of the
2974               class tags.
2975            free (objtag); */
2976            objdef = onone;
2977          }
2978        return FALSE;
2979      }
2980
2981    /* A function, variable or enum constant? */
2982    switch (toktype)
2983      {
2984      case st_C_extern:
2985        fvextern = TRUE;
2986        switch  (fvdef)
2987          {
2988          case finlist:
2989          case flistseen:
2990          case fignore:
2991          case vignore:
2992            break;
2993          default:
2994            fvdef = fvnone;
2995          }
2996        return FALSE;
2997      case st_C_ignore:
2998        fvextern = FALSE;
2999        fvdef = vignore;
3000        return FALSE;
3001      case st_C_operator:
3002        fvdef = foperator;
3003        *is_func_or_var = TRUE;
3004        return TRUE;
3005      case st_none:
3006        if (constantypedefs
3007            && structdef == snone
3008            && structtype == st_C_enum && bracelev > structbracelev)
3009          return TRUE;           /* enum constant */
3010        switch (fvdef)
3011          {
3012          case fdefunkey:
3013            if (bracelev > 0)
3014              break;
3015            fvdef = fdefunname;  /* GNU macro */
3016            *is_func_or_var = TRUE;
3017            return TRUE;
3018          case fvnone:
3019            switch (typdef)
3020              {
3021              case ttypeseen:
3022                return FALSE;
3023              case tnone:
3024                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3025                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3026                  {
3027                    fvdef = vignore;
3028                    return FALSE;
3029                  }
3030                break;
3031              }
3032           /* FALLTHRU */
3033           case fvnameseen:
3034           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3035             {
3036               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3037                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3038               fvdef = foperator;
3039               *is_func_or_var = TRUE;
3040               return TRUE;
3041             }
3042           if (bracelev > 0 && !instruct)
3043             break;
3044           fvdef = fvnameseen;   /* function or variable */
3045           *is_func_or_var = TRUE;
3046           return TRUE;
3047         }
3048       break;
3049     }
3050
3051   return FALSE;
3052 }
3053
3054 \f
3055 /*
3056  * C_entries often keeps pointers to tokens or lines which are older than
3057  * the line currently read.  By keeping two line buffers, and switching
3058  * them at end of line, it is possible to use those pointers.
3059  */
3060 static struct
3061 {
3062   long linepos;
3063   linebuffer lb;
3064 } lbs[2];
3065
3066 #define current_lb_is_new (newndx == curndx)
3067 #define switch_line_buffers() (curndx = 1 - curndx)
3068
3069 #define curlb (lbs[curndx].lb)
3070 #define newlb (lbs[newndx].lb)
3071 #define curlinepos (lbs[curndx].linepos)
3072 #define newlinepos (lbs[newndx].linepos)
3073
3074 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3075 #define cplpl (c_ext & C_PLPL)
3076 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3077
3078 #define CNL_SAVE_DEFINEDEF()                                            \
3079 do {                                                                    \
3080   curlinepos = charno;                                                  \
3081   readline (&curlb, inf);                                               \
3082   lp = curlb.buffer;                                                    \
3083   quotednl = FALSE;                                                     \
3084   newndx = curndx;                                                      \
3085 } while (0)
3086
3087 #define CNL()                                                           \
3088 do {                                                                    \
3089   CNL_SAVE_DEFINEDEF();                                                 \
3090   if (savetoken.valid)                                                  \
3091     {                                                                   \
3092       token = savetoken;                                                \
3093       savetoken.valid = FALSE;                                          \
3094     }                                                                   \
3095   definedef = dnone;                                                    \
3096 } while (0)
3097
3098
3099 static void
3100 make_C_tag (isfun)
3101      bool isfun;
3102 {
3103   /* This function is never called when token.valid is FALSE, but
3104      we must protect against invalid input or internal errors. */
3105   if (token.valid)
3106     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3107               token.offset+token.length+1, token.lineno, token.linepos);
3108   else if (DEBUG)
3109     {                             /* this branch is optimised away if !DEBUG */
3110       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3111                 token_name.len + 17, isfun, token.line,
3112                 token.offset+token.length+1, token.lineno, token.linepos);
3113       error ("INVALID TOKEN", NULL);
3114     }
3115
3116   token.valid = FALSE;
3117 }
3118
3119
3120 /*
3121  * C_entries ()
3122  *      This routine finds functions, variables, typedefs,
3123  *      #define's, enum constants and struct/union/enum definitions in
3124  *      C syntax and adds them to the list.
3125  */
3126 static void
3127 C_entries (c_ext, inf)
3128      int c_ext;                 /* extension of C */
3129      FILE *inf;                 /* input file */
3130 {
3131   register char c;              /* latest char read; '\0' for end of line */
3132   register char *lp;            /* pointer one beyond the character `c' */
3133   int curndx, newndx;           /* indices for current and new lb */
3134   register int tokoff;          /* offset in line of start of current token */
3135   register int toklen;          /* length of current token */
3136   char *qualifier;              /* string used to qualify names */
3137   int qlen;                     /* length of qualifier */
3138   int bracelev;                 /* current brace level */
3139   int bracketlev;               /* current bracket level */
3140   int parlev;                   /* current parenthesis level */
3141   int attrparlev;               /* __attribute__ parenthesis level */
3142   int templatelev;              /* current template level */
3143   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3144   bool incomm, inquote, inchar, quotednl, midtoken;
3145   bool yacc_rules;              /* in the rules part of a yacc file */
3146   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3147
3148
3149   linebuffer_init (&lbs[0].lb);
3150   linebuffer_init (&lbs[1].lb);
3151   if (cstack.size == 0)
3152     {
3153       cstack.size = (DEBUG) ? 1 : 4;
3154       cstack.nl = 0;
3155       cstack.cname = xnew (cstack.size, char *);
3156       cstack.bracelev = xnew (cstack.size, int);
3157     }
3158
3159   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3160   curndx = newndx = 0;
3161   lp = curlb.buffer;
3162   *lp = 0;
3163
3164   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3165   structdef = snone; definedef = dnone; objdef = onone;
3166   yacc_rules = FALSE;
3167   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3168   token.valid = savetoken.valid = FALSE;
3169   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3170   if (cjava)
3171     { qualifier = "."; qlen = 1; }
3172   else
3173     { qualifier = "::"; qlen = 2; }
3174
3175
3176   while (!feof (inf))
3177     {
3178       c = *lp++;
3179       if (c == '\\')
3180         {
3181           /* If we are at the end of the line, the next character is a
3182              '\0'; do not skip it, because it is what tells us
3183              to read the next line.  */
3184           if (*lp == '\0')
3185             {
3186               quotednl = TRUE;
3187               continue;
3188             }
3189           lp++;
3190           c = ' ';
3191         }
3192       else if (incomm)
3193         {
3194           switch (c)
3195             {
3196             case '*':
3197               if (*lp == '/')
3198                 {
3199                   c = *lp++;
3200                   incomm = FALSE;
3201                 }
3202               break;
3203             case '\0':
3204               /* Newlines inside comments do not end macro definitions in
3205                  traditional cpp. */
3206               CNL_SAVE_DEFINEDEF ();
3207               break;
3208             }
3209           continue;
3210         }
3211       else if (inquote)
3212         {
3213           switch (c)
3214             {
3215             case '"':
3216               inquote = FALSE;
3217               break;
3218             case '\0':
3219               /* Newlines inside strings do not end macro definitions
3220                  in traditional cpp, even though compilers don't
3221                  usually accept them. */
3222               CNL_SAVE_DEFINEDEF ();
3223               break;
3224             }
3225           continue;
3226         }
3227       else if (inchar)
3228         {
3229           switch (c)
3230             {
3231             case '\0':
3232               /* Hmmm, something went wrong. */
3233               CNL ();
3234               /* FALLTHRU */
3235             case '\'':
3236               inchar = FALSE;
3237               break;
3238             }
3239           continue;
3240         }
3241       else if (bracketlev > 0)
3242         {
3243           switch (c)
3244             {
3245             case ']':
3246               if (--bracketlev > 0)
3247                 continue;
3248               break;
3249             case '\0':
3250               CNL_SAVE_DEFINEDEF ();
3251               break;
3252             }
3253           continue;
3254         }
3255       else switch (c)
3256         {
3257         case '"':
3258           inquote = TRUE;
3259           if (inattribute)
3260             break;
3261           switch (fvdef)
3262             {
3263             case fdefunkey:
3264             case fstartlist:
3265             case finlist:
3266             case fignore:
3267             case vignore:
3268               break;
3269             default:
3270               fvextern = FALSE;
3271               fvdef = fvnone;
3272             }
3273           continue;
3274         case '\'':
3275           inchar = TRUE;
3276           if (inattribute)
3277             break;
3278           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3279             {
3280               fvextern = FALSE;
3281               fvdef = fvnone;
3282             }
3283           continue;
3284         case '/':
3285           if (*lp == '*')
3286             {
3287               incomm = TRUE;
3288               lp++;
3289               c = ' ';
3290             }
3291           else if (/* cplpl && */ *lp == '/')
3292             {
3293               c = '\0';
3294             }
3295           break;
3296         case '%':
3297           if ((c_ext & YACC) && *lp == '%')
3298             {
3299               /* Entering or exiting rules section in yacc file. */
3300               lp++;
3301               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3302               typdef = tnone; structdef = snone;
3303               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3304               bracelev = 0;
3305               yacc_rules = !yacc_rules;
3306               continue;
3307             }
3308           else
3309             break;
3310         case '#':
3311           if (definedef == dnone)
3312             {
3313               char *cp;
3314               bool cpptoken = TRUE;
3315
3316               /* Look back on this line.  If all blanks, or nonblanks
3317                  followed by an end of comment, this is a preprocessor
3318                  token. */
3319               for (cp = newlb.buffer; cp < lp-1; cp++)
3320                 if (!iswhite (*cp))
3321                   {
3322                     if (*cp == '*' && *(cp+1) == '/')
3323                       {
3324                         cp++;
3325                         cpptoken = TRUE;
3326                       }
3327                     else
3328                       cpptoken = FALSE;
3329                   }
3330               if (cpptoken)
3331                 definedef = dsharpseen;
3332             } /* if (definedef == dnone) */
3333           continue;
3334         case '[':
3335           bracketlev++;
3336             continue;
3337         } /* switch (c) */
3338
3339
3340       /* Consider token only if some involved conditions are satisfied. */
3341       if (typdef != tignore
3342           && definedef != dignorerest
3343           && fvdef != finlist
3344           && templatelev == 0
3345           && (definedef != dnone
3346               || structdef != scolonseen)
3347           && !inattribute)
3348         {
3349           if (midtoken)
3350             {
3351               if (endtoken (c))
3352                 {
3353                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3354                     /* This handles :: in the middle,
3355                        but not at the beginning of an identifier.
3356                        Also, space-separated :: is not recognised. */
3357                     {
3358                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3359                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3360                       lp += 2;
3361                       toklen += 2;
3362                       c = lp[-1];
3363                       goto still_in_token;
3364                     }
3365                   else
3366                     {
3367                       bool funorvar = FALSE;
3368
3369                       if (yacc_rules
3370                           || consider_token (newlb.buffer + tokoff, toklen, c,
3371                                              &c_ext, bracelev, parlev,
3372                                              &funorvar))
3373                         {
3374                           if (fvdef == foperator)
3375                             {
3376                               char *oldlp = lp;
3377                               lp = skip_spaces (lp-1);
3378                               if (*lp != '\0')
3379                                 lp += 1;
3380                               while (*lp != '\0'
3381                                      && !iswhite (*lp) && *lp != '(')
3382                                 lp += 1;
3383                               c = *lp++;
3384                               toklen += lp - oldlp;
3385                             }
3386                           token.named = FALSE;
3387                           if (!plainc
3388                               && nestlev > 0 && definedef == dnone)
3389                             /* in struct body */
3390                             {
3391                               write_classname (&token_name, qualifier);
3392                               linebuffer_setlen (&token_name,
3393                                                  token_name.len+qlen+toklen);
3394                               strcat (token_name.buffer, qualifier);
3395                               strncat (token_name.buffer,
3396                                        newlb.buffer + tokoff, toklen);
3397                               token.named = TRUE;
3398                             }
3399                           else if (objdef == ocatseen)
3400                             /* Objective C category */
3401                             {
3402                               int len = strlen (objtag) + 2 + toklen;
3403                               linebuffer_setlen (&token_name, len);
3404                               strcpy (token_name.buffer, objtag);
3405                               strcat (token_name.buffer, "(");
3406                               strncat (token_name.buffer,
3407                                        newlb.buffer + tokoff, toklen);
3408                               strcat (token_name.buffer, ")");
3409                               token.named = TRUE;
3410                             }
3411                           else if (objdef == omethodtag
3412                                    || objdef == omethodparm)
3413                             /* Objective C method */
3414                             {
3415                               token.named = TRUE;
3416                             }
3417                           else if (fvdef == fdefunname)
3418                             /* GNU DEFUN and similar macros */
3419                             {
3420                               bool defun = (newlb.buffer[tokoff] == 'F');
3421                               int off = tokoff;
3422                               int len = toklen;
3423
3424                               /* Rewrite the tag so that emacs lisp DEFUNs
3425                                  can be found by their elisp name */
3426                               if (defun)
3427                                 {
3428                                   off += 1;
3429                                   len -= 1;
3430                                 }
3431                               linebuffer_setlen (&token_name, len);
3432                               strncpy (token_name.buffer,
3433                                        newlb.buffer + off, len);
3434                               token_name.buffer[len] = '\0';
3435                               if (defun)
3436                                 while (--len >= 0)
3437                                   if (token_name.buffer[len] == '_')
3438                                     token_name.buffer[len] = '-';
3439                               token.named = defun;
3440                             }
3441                           else
3442                             {
3443                               linebuffer_setlen (&token_name, toklen);
3444                               strncpy (token_name.buffer,
3445                                        newlb.buffer + tokoff, toklen);
3446                               token_name.buffer[toklen] = '\0';
3447                               /* Name macros and members. */
3448                               token.named = (structdef == stagseen
3449                                              || typdef == ttypeseen
3450                                              || typdef == tend
3451                                              || (funorvar
3452                                                  && definedef == dignorerest)
3453                                              || (funorvar
3454                                                  && definedef == dnone
3455                                                  && structdef == snone
3456                                                  && bracelev > 0));
3457                             }
3458                           token.lineno = lineno;
3459                           token.offset = tokoff;
3460                           token.length = toklen;
3461                           token.line = newlb.buffer;
3462                           token.linepos = newlinepos;
3463                           token.valid = TRUE;
3464
3465                           if (definedef == dnone
3466                               && (fvdef == fvnameseen
3467                                   || fvdef == foperator
3468                                   || structdef == stagseen
3469                                   || typdef == tend
3470                                   || typdef == ttypeseen
3471                                   || objdef != onone))
3472                             {
3473                               if (current_lb_is_new)
3474                                 switch_line_buffers ();
3475                             }
3476                           else if (definedef != dnone
3477                                    || fvdef == fdefunname
3478                                    || instruct)
3479                             make_C_tag (funorvar);
3480                         }
3481                       else /* not yacc and consider_token failed */
3482                         {
3483                           if (inattribute && fvdef == fignore)
3484                             {
3485                               /* We have just met __attribute__ after a
3486                                  function parameter list: do not tag the
3487                                  function again. */
3488                               fvdef = fvnone;
3489                             }
3490                         }
3491                       midtoken = FALSE;
3492                     }
3493                 } /* if (endtoken (c)) */
3494               else if (intoken (c))
3495                 still_in_token:
3496                 {
3497                   toklen++;
3498                   continue;
3499                 }
3500             } /* if (midtoken) */
3501           else if (begtoken (c))
3502             {
3503               switch (definedef)
3504                 {
3505                 case dnone:
3506                   switch (fvdef)
3507                     {
3508                     case fstartlist:
3509                       /* This prevents tagging fb in
3510                          void (__attribute__((noreturn)) *fb) (void);
3511                          Fixing this is not easy and not very important. */
3512                       fvdef = finlist;
3513                       continue;
3514                     case flistseen:
3515                       if (plainc || declarations)
3516                         {
3517                           make_C_tag (TRUE); /* a function */
3518                           fvdef = fignore;
3519                         }
3520                       break;
3521                     }
3522                   if (structdef == stagseen && !cjava)
3523                     {
3524                       popclass_above (bracelev);
3525                       structdef = snone;
3526                     }
3527                   break;
3528                 case dsharpseen:
3529                   savetoken = token;
3530                   break;
3531                 }
3532               if (!yacc_rules || lp == newlb.buffer + 1)
3533                 {
3534                   tokoff = lp - 1 - newlb.buffer;
3535                   toklen = 1;
3536                   midtoken = TRUE;
3537                 }
3538               continue;
3539             } /* if (begtoken) */
3540         } /* if must look at token */
3541
3542
3543       /* Detect end of line, colon, comma, semicolon and various braces
3544          after having handled a token.*/
3545       switch (c)
3546         {
3547         case ':':
3548           if (inattribute)
3549             break;
3550           if (yacc_rules && token.offset == 0 && token.valid)
3551             {
3552               make_C_tag (FALSE); /* a yacc function */
3553               break;
3554             }
3555           if (definedef != dnone)
3556             break;
3557           switch (objdef)
3558             {
3559             case  otagseen:
3560               objdef = oignore;
3561               make_C_tag (TRUE); /* an Objective C class */
3562               break;
3563             case omethodtag:
3564             case omethodparm:
3565               objdef = omethodcolon;
3566               linebuffer_setlen (&token_name, token_name.len + 1);
3567               strcat (token_name.buffer, ":");
3568               break;
3569             }
3570           if (structdef == stagseen)
3571             {
3572               structdef = scolonseen;
3573               break;
3574             }
3575           /* Should be useless, but may be work as a safety net. */
3576           if (cplpl && fvdef == flistseen)
3577             {
3578               make_C_tag (TRUE); /* a function */
3579               fvdef = fignore;
3580               break;
3581             }
3582           break;
3583         case ';':
3584           if (definedef != dnone || inattribute)
3585             break;
3586           switch (typdef)
3587             {
3588             case tend:
3589             case ttypeseen:
3590               make_C_tag (FALSE); /* a typedef */
3591               typdef = tnone;
3592               fvdef = fvnone;
3593               break;
3594             case tnone:
3595             case tinbody:
3596             case tignore:
3597               switch (fvdef)
3598                 {
3599                 case fignore:
3600                   if (typdef == tignore || cplpl)
3601                     fvdef = fvnone;
3602                   break;
3603                 case fvnameseen:
3604                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3605                       || (members && instruct))
3606                     make_C_tag (FALSE); /* a variable */
3607                   fvextern = FALSE;
3608                   fvdef = fvnone;
3609                   token.valid = FALSE;
3610                   break;
3611                 case flistseen:
3612                   if ((declarations
3613                        && (cplpl || !instruct)
3614                        && (typdef == tnone || (typdef != tignore && instruct)))
3615                       || (members
3616                           && plainc && instruct))
3617                     make_C_tag (TRUE);  /* a function */
3618                   /* FALLTHRU */
3619                 default:
3620                   fvextern = FALSE;
3621                   fvdef = fvnone;
3622                   if (declarations
3623                        && cplpl && structdef == stagseen)
3624                     make_C_tag (FALSE); /* forward declaration */
3625                   else
3626                     token.valid = FALSE;
3627                 } /* switch (fvdef) */
3628               /* FALLTHRU */
3629             default:
3630               if (!instruct)
3631                 typdef = tnone;
3632             }
3633           if (structdef == stagseen)
3634             structdef = snone;
3635           break;
3636         case ',':
3637           if (definedef != dnone || inattribute)
3638             break;
3639           switch (objdef)
3640             {
3641             case omethodtag:
3642             case omethodparm:
3643               make_C_tag (TRUE); /* an Objective C method */
3644               objdef = oinbody;
3645               break;
3646             }
3647           switch (fvdef)
3648             {
3649             case fdefunkey:
3650             case foperator:
3651             case fstartlist:
3652             case finlist:
3653             case fignore:
3654             case vignore:
3655               break;
3656             case fdefunname:
3657               fvdef = fignore;
3658               break;
3659             case fvnameseen:
3660               if (parlev == 0
3661                   && ((globals
3662                        && bracelev == 0
3663                        && templatelev == 0
3664                        && (!fvextern || declarations))
3665                       || (members && instruct)))
3666                   make_C_tag (FALSE); /* a variable */
3667               break;
3668             case flistseen:
3669               if ((declarations && typdef == tnone && !instruct)
3670                   || (members && typdef != tignore && instruct))
3671                 {
3672                   make_C_tag (TRUE); /* a function */
3673                   fvdef = fvnameseen;
3674                 }
3675               else if (!declarations)
3676                 fvdef = fvnone;
3677               token.valid = FALSE;
3678               break;
3679             default:
3680               fvdef = fvnone;
3681             }
3682           if (structdef == stagseen)
3683             structdef = snone;
3684           break;
3685         case ']':
3686           if (definedef != dnone || inattribute)
3687             break;
3688           if (structdef == stagseen)
3689             structdef = snone;
3690           switch (typdef)
3691             {
3692             case ttypeseen:
3693             case tend:
3694               typdef = tignore;
3695               make_C_tag (FALSE);       /* a typedef */
3696               break;
3697             case tnone:
3698             case tinbody:
3699               switch (fvdef)
3700                 {
3701                 case foperator:
3702                 case finlist:
3703                 case fignore:
3704                 case vignore:
3705                   break;
3706                 case fvnameseen:
3707                   if ((members && bracelev == 1)
3708                       || (globals && bracelev == 0
3709                           && (!fvextern || declarations)))
3710                     make_C_tag (FALSE); /* a variable */
3711                   /* FALLTHRU */
3712                 default:
3713                   fvdef = fvnone;
3714                 }
3715               break;
3716             }
3717           break;
3718         case '(':
3719           if (inattribute)
3720             {
3721               attrparlev++;
3722               break;
3723             }
3724           if (definedef != dnone)
3725             break;
3726           if (objdef == otagseen && parlev == 0)
3727             objdef = oparenseen;
3728           switch (fvdef)
3729             {
3730             case fvnameseen:
3731               if (typdef == ttypeseen
3732                   && *lp != '*'
3733                   && !instruct)
3734                 {
3735                   /* This handles constructs like:
3736                      typedef void OperatorFun (int fun); */
3737                   make_C_tag (FALSE);
3738                   typdef = tignore;
3739                   fvdef = fignore;
3740                   break;
3741                 }
3742               /* FALLTHRU */
3743             case foperator:
3744               fvdef = fstartlist;
3745               break;
3746             case flistseen:
3747               fvdef = finlist;
3748               break;
3749             }
3750           parlev++;
3751           break;
3752         case ')':
3753           if (inattribute)
3754             {
3755               if (--attrparlev == 0)
3756                 inattribute = FALSE;
3757               break;
3758             }
3759           if (definedef != dnone)
3760             break;
3761           if (objdef == ocatseen && parlev == 1)
3762             {
3763               make_C_tag (TRUE); /* an Objective C category */
3764               objdef = oignore;
3765             }
3766           if (--parlev == 0)
3767             {
3768               switch (fvdef)
3769                 {
3770                 case fstartlist:
3771                 case finlist:
3772                   fvdef = flistseen;
3773                   break;
3774                 }
3775               if (!instruct
3776                   && (typdef == tend
3777                       || typdef == ttypeseen))
3778                 {
3779                   typdef = tignore;
3780                   make_C_tag (FALSE); /* a typedef */
3781                 }
3782             }
3783           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3784             parlev = 0;
3785           break;
3786         case '{':
3787           if (definedef != dnone)
3788             break;
3789           if (typdef == ttypeseen)
3790             {
3791               /* Whenever typdef is set to tinbody (currently only
3792                  here), typdefbracelev should be set to bracelev. */
3793               typdef = tinbody;
3794               typdefbracelev = bracelev;
3795             }
3796           switch (fvdef)
3797             {
3798             case flistseen:
3799               make_C_tag (TRUE);    /* a function */
3800               /* FALLTHRU */
3801             case fignore:
3802               fvdef = fvnone;
3803               break;
3804             case fvnone:
3805               switch (objdef)
3806                 {
3807                 case otagseen:
3808                   make_C_tag (TRUE); /* an Objective C class */
3809                   objdef = oignore;
3810                   break;
3811                 case omethodtag:
3812                 case omethodparm:
3813                   make_C_tag (TRUE); /* an Objective C method */
3814                   objdef = oinbody;
3815                   break;
3816                 default:
3817                   /* Neutralize `extern "C" {' grot. */
3818                   if (bracelev == 0 && structdef == snone && nestlev == 0
3819                       && typdef == tnone)
3820                     bracelev = -1;
3821                 }
3822               break;
3823             }
3824           switch (structdef)
3825             {
3826             case skeyseen:         /* unnamed struct */
3827               pushclass_above (bracelev, NULL, 0);
3828               structdef = snone;
3829               break;
3830             case stagseen:         /* named struct or enum */
3831             case scolonseen:       /* a class */
3832               pushclass_above (bracelev,token.line+token.offset, token.length);
3833               structdef = snone;
3834               make_C_tag (FALSE);  /* a struct or enum */
3835               break;
3836             }
3837           bracelev += 1;
3838           break;
3839         case '*':
3840           if (definedef != dnone)
3841             break;
3842           if (fvdef == fstartlist)
3843             {
3844               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3845               token.valid = FALSE;
3846             }
3847           break;
3848         case '}':
3849           if (definedef != dnone)
3850             break;
3851           bracelev -= 1;
3852           if (!ignoreindent && lp == newlb.buffer + 1)
3853             {
3854               if (bracelev != 0)
3855                 token.valid = FALSE; /* unexpected value, token unreliable */
3856               bracelev = 0;     /* reset brace level if first column */
3857               parlev = 0;       /* also reset paren level, just in case... */
3858             }
3859           else if (bracelev < 0)
3860             {
3861               token.valid = FALSE; /* something gone amiss, token unreliable */
3862               bracelev = 0;
3863             }
3864           if (bracelev == 0 && fvdef == vignore)
3865             fvdef = fvnone;             /* end of function */
3866           popclass_above (bracelev);
3867           structdef = snone;
3868           /* Only if typdef == tinbody is typdefbracelev significant. */
3869           if (typdef == tinbody && bracelev <= typdefbracelev)
3870             {
3871               assert (bracelev == typdefbracelev);
3872               typdef = tend;
3873             }
3874           break;
3875         case '=':
3876           if (definedef != dnone)
3877             break;
3878           switch (fvdef)
3879             {
3880             case foperator:
3881             case finlist:
3882             case fignore:
3883             case vignore:
3884               break;
3885             case fvnameseen:
3886               if ((members && bracelev == 1)
3887                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3888                 make_C_tag (FALSE); /* a variable */
3889               /* FALLTHRU */
3890             default:
3891               fvdef = vignore;
3892             }
3893           break;
3894         case '<':
3895           if (cplpl
3896               && (structdef == stagseen || fvdef == fvnameseen))
3897             {
3898               templatelev++;
3899               break;
3900             }
3901           goto resetfvdef;
3902         case '>':
3903           if (templatelev > 0)
3904             {
3905               templatelev--;
3906               break;
3907             }
3908           goto resetfvdef;
3909         case '+':
3910         case '-':
3911           if (objdef == oinbody && bracelev == 0)
3912             {
3913               objdef = omethodsign;
3914               break;
3915             }
3916           /* FALLTHRU */
3917         resetfvdef:
3918         case '#': case '~': case '&': case '%': case '/':
3919         case '|': case '^': case '!': case '.': case '?':
3920           if (definedef != dnone)
3921             break;
3922           /* These surely cannot follow a function tag in C. */
3923           switch (fvdef)
3924             {
3925             case foperator:
3926             case finlist:
3927             case fignore:
3928             case vignore:
3929               break;
3930             default:
3931               fvdef = fvnone;
3932             }
3933           break;
3934         case '\0':
3935           if (objdef == otagseen)
3936             {
3937               make_C_tag (TRUE); /* an Objective C class */
3938               objdef = oignore;
3939             }
3940           /* If a macro spans multiple lines don't reset its state. */
3941           if (quotednl)
3942             CNL_SAVE_DEFINEDEF ();
3943           else
3944             CNL ();
3945           break;
3946         } /* switch (c) */
3947
3948     } /* while not eof */
3949
3950   free (lbs[0].lb.buffer);
3951   free (lbs[1].lb.buffer);
3952 }
3953
3954 /*
3955  * Process either a C++ file or a C file depending on the setting
3956  * of a global flag.
3957  */
3958 static void
3959 default_C_entries (inf)
3960      FILE *inf;
3961 {
3962   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3963 }
3964
3965 /* Always do plain C. */
3966 static void
3967 plain_C_entries (inf)
3968      FILE *inf;
3969 {
3970   C_entries (0, inf);
3971 }
3972
3973 /* Always do C++. */
3974 static void
3975 Cplusplus_entries (inf)
3976      FILE *inf;
3977 {
3978   C_entries (C_PLPL, inf);
3979 }
3980
3981 /* Always do Java. */
3982 static void
3983 Cjava_entries (inf)
3984      FILE *inf;
3985 {
3986   C_entries (C_JAVA, inf);
3987 }
3988
3989 /* Always do C*. */
3990 static void
3991 Cstar_entries (inf)
3992      FILE *inf;
3993 {
3994   C_entries (C_STAR, inf);
3995 }
3996
3997 /* Always do Yacc. */
3998 static void
3999 Yacc_entries (inf)
4000      FILE *inf;
4001 {
4002   C_entries (YACC, inf);
4003 }
4004
4005 \f
4006 /* Useful macros. */
4007 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4008   for (;                        /* loop initialization */               \
4009        !feof (file_pointer)     /* loop test */                         \
4010        &&                       /* instructions at start of loop */     \
4011           (readline (&line_buffer, file_pointer),                       \
4012            char_pointer = line_buffer.buffer,                           \
4013            TRUE);                                                       \
4014       )
4015
4016 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4017   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4018    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4019    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4020    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4021
4022 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4023 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4024   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4025    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4026    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4027
4028 /*
4029  * Read a file, but do no processing.  This is used to do regexp
4030  * matching on files that have no language defined.
4031  */
4032 static void
4033 just_read_file (inf)
4034      FILE *inf;
4035 {
4036   register char *dummy;
4037
4038   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4039     continue;
4040 }
4041
4042 \f
4043 /* Fortran parsing */
4044
4045 static void F_takeprec __P((void));
4046 static void F_getit __P((FILE *));
4047
4048 static void
4049 F_takeprec ()
4050 {
4051   dbp = skip_spaces (dbp);
4052   if (*dbp != '*')
4053     return;
4054   dbp++;
4055   dbp = skip_spaces (dbp);
4056   if (strneq (dbp, "(*)", 3))
4057     {
4058       dbp += 3;
4059       return;
4060     }
4061   if (!ISDIGIT (*dbp))
4062     {
4063       --dbp;                    /* force failure */
4064       return;
4065     }
4066   do
4067     dbp++;
4068   while (ISDIGIT (*dbp));
4069 }
4070
4071 static void
4072 F_getit (inf)
4073      FILE *inf;
4074 {
4075   register char *cp;
4076
4077   dbp = skip_spaces (dbp);
4078   if (*dbp == '\0')
4079     {
4080       readline (&lb, inf);
4081       dbp = lb.buffer;
4082       if (dbp[5] != '&')
4083         return;
4084       dbp += 6;
4085       dbp = skip_spaces (dbp);
4086     }
4087   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4088     return;
4089   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4090     continue;
4091   make_tag (dbp, cp-dbp, TRUE,
4092             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4093 }
4094
4095
4096 static void
4097 Fortran_functions (inf)
4098      FILE *inf;
4099 {
4100   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4101     {
4102       if (*dbp == '%')
4103         dbp++;                  /* Ratfor escape to fortran */
4104       dbp = skip_spaces (dbp);
4105       if (*dbp == '\0')
4106         continue;
4107       switch (lowcase (*dbp))
4108         {
4109         case 'i':
4110           if (nocase_tail ("integer"))
4111             F_takeprec ();
4112           break;
4113         case 'r':
4114           if (nocase_tail ("real"))
4115             F_takeprec ();
4116           break;
4117         case 'l':
4118           if (nocase_tail ("logical"))
4119             F_takeprec ();
4120           break;
4121         case 'c':
4122           if (nocase_tail ("complex") || nocase_tail ("character"))
4123             F_takeprec ();
4124           break;
4125         case 'd':
4126           if (nocase_tail ("double"))
4127             {
4128               dbp = skip_spaces (dbp);
4129               if (*dbp == '\0')
4130                 continue;
4131               if (nocase_tail ("precision"))
4132                 break;
4133               continue;
4134             }
4135           break;
4136         }
4137       dbp = skip_spaces (dbp);
4138       if (*dbp == '\0')
4139         continue;
4140       switch (lowcase (*dbp))
4141         {
4142         case 'f':
4143           if (nocase_tail ("function"))
4144             F_getit (inf);
4145           continue;
4146         case 's':
4147           if (nocase_tail ("subroutine"))
4148             F_getit (inf);
4149           continue;
4150         case 'e':
4151           if (nocase_tail ("entry"))
4152             F_getit (inf);
4153           continue;
4154         case 'b':
4155           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4156             {
4157               dbp = skip_spaces (dbp);
4158               if (*dbp == '\0') /* assume un-named */
4159                 make_tag ("blockdata", 9, TRUE,
4160                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4161               else
4162                 F_getit (inf);  /* look for name */
4163             }
4164           continue;
4165         }
4166     }
4167 }
4168
4169 \f
4170 /*
4171  * Ada parsing
4172  * Original code by
4173  * Philippe Waroquiers (1998)
4174  */
4175
4176 static void Ada_getit __P((FILE *, char *));
4177
4178 /* Once we are positioned after an "interesting" keyword, let's get
4179    the real tag value necessary. */
4180 static void
4181 Ada_getit (inf, name_qualifier)
4182      FILE *inf;
4183      char *name_qualifier;
4184 {
4185   register char *cp;
4186   char *name;
4187   char c;
4188
4189   while (!feof (inf))
4190     {
4191       dbp = skip_spaces (dbp);
4192       if (*dbp == '\0'
4193           || (dbp[0] == '-' && dbp[1] == '-'))
4194         {
4195           readline (&lb, inf);
4196           dbp = lb.buffer;
4197         }
4198       switch (lowcase(*dbp))
4199         {
4200         case 'b':
4201           if (nocase_tail ("body"))
4202             {
4203               /* Skipping body of   procedure body   or   package body or ....
4204                  resetting qualifier to body instead of spec. */
4205               name_qualifier = "/b";
4206               continue;
4207             }
4208           break;
4209         case 't':
4210           /* Skipping type of   task type   or   protected type ... */
4211           if (nocase_tail ("type"))
4212             continue;
4213           break;
4214         }
4215       if (*dbp == '"')
4216         {
4217           dbp += 1;
4218           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4219             continue;
4220         }
4221       else
4222         {
4223           dbp = skip_spaces (dbp);
4224           for (cp = dbp;
4225                (*cp != '\0'
4226                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4227                cp++)
4228             continue;
4229           if (cp == dbp)
4230             return;
4231         }
4232       c = *cp;
4233       *cp = '\0';
4234       name = concat (dbp, name_qualifier, "");
4235       *cp = c;
4236       make_tag (name, strlen (name), TRUE,
4237                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4238       free (name);
4239       if (c == '"')
4240         dbp = cp + 1;
4241       return;
4242     }
4243 }
4244
4245 static void
4246 Ada_funcs (inf)
4247      FILE *inf;
4248 {
4249   bool inquote = FALSE;
4250   bool skip_till_semicolumn = FALSE;
4251
4252   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4253     {
4254       while (*dbp != '\0')
4255         {
4256           /* Skip a string i.e. "abcd". */
4257           if (inquote || (*dbp == '"'))
4258             {
4259               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4260               if (dbp != NULL)
4261                 {
4262                   inquote = FALSE;
4263                   dbp += 1;
4264                   continue;     /* advance char */
4265                 }
4266               else
4267                 {
4268                   inquote = TRUE;
4269                   break;        /* advance line */
4270                 }
4271             }
4272
4273           /* Skip comments. */
4274           if (dbp[0] == '-' && dbp[1] == '-')
4275             break;              /* advance line */
4276
4277           /* Skip character enclosed in single quote i.e. 'a'
4278              and skip single quote starting an attribute i.e. 'Image. */
4279           if (*dbp == '\'')
4280             {
4281               dbp++ ;
4282               if (*dbp != '\0')
4283                 dbp++;
4284               continue;
4285             }
4286
4287           if (skip_till_semicolumn)
4288             {
4289               if (*dbp == ';')
4290                 skip_till_semicolumn = FALSE;
4291               dbp++;
4292               continue;         /* advance char */
4293             }
4294
4295           /* Search for beginning of a token.  */
4296           if (!begtoken (*dbp))
4297             {
4298               dbp++;
4299               continue;         /* advance char */
4300             }
4301
4302           /* We are at the beginning of a token. */
4303           switch (lowcase(*dbp))
4304             {
4305             case 'f':
4306               if (!packages_only && nocase_tail ("function"))
4307                 Ada_getit (inf, "/f");
4308               else
4309                 break;          /* from switch */
4310               continue;         /* advance char */
4311             case 'p':
4312               if (!packages_only && nocase_tail ("procedure"))
4313                 Ada_getit (inf, "/p");
4314               else if (nocase_tail ("package"))
4315                 Ada_getit (inf, "/s");
4316               else if (nocase_tail ("protected")) /* protected type */
4317                 Ada_getit (inf, "/t");
4318               else
4319                 break;          /* from switch */
4320               continue;         /* advance char */
4321
4322             case 'u':
4323               if (typedefs && !packages_only && nocase_tail ("use"))
4324                 {
4325                   /* when tagging types, avoid tagging  use type Pack.Typename;
4326                      for this, we will skip everything till a ; */
4327                   skip_till_semicolumn = TRUE;
4328                   continue;     /* advance char */
4329                 }
4330
4331             case 't':
4332               if (!packages_only && nocase_tail ("task"))
4333                 Ada_getit (inf, "/k");
4334               else if (typedefs && !packages_only && nocase_tail ("type"))
4335                 {
4336                   Ada_getit (inf, "/t");
4337                   while (*dbp != '\0')
4338                     dbp += 1;
4339                 }
4340               else
4341                 break;          /* from switch */
4342               continue;         /* advance char */
4343             }
4344
4345           /* Look for the end of the token. */
4346           while (!endtoken (*dbp))
4347             dbp++;
4348
4349         } /* advance char */
4350     } /* advance line */
4351 }
4352
4353 \f
4354 /*
4355  * Unix and microcontroller assembly tag handling
4356  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4357  * Idea by Bob Weiner, Motorola Inc. (1994)
4358  */
4359 static void
4360 Asm_labels (inf)
4361      FILE *inf;
4362 {
4363   register char *cp;
4364
4365   LOOP_ON_INPUT_LINES (inf, lb, cp)
4366     {
4367       /* If first char is alphabetic or one of [_.$], test for colon
4368          following identifier. */
4369       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4370         {
4371           /* Read past label. */
4372           cp++;
4373           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4374             cp++;
4375           if (*cp == ':' || iswhite (*cp))
4376             /* Found end of label, so copy it and add it to the table. */
4377             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4378                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4379         }
4380     }
4381 }
4382
4383 \f
4384 /*
4385  * Perl support
4386  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4387  * Perl variable names: /^(my|local).../
4388  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4389  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4390  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4391  */
4392 static void
4393 Perl_functions (inf)
4394      FILE *inf;
4395 {
4396   char *package = savestr ("main"); /* current package name */
4397   register char *cp;
4398
4399   LOOP_ON_INPUT_LINES (inf, lb, cp)
4400     {
4401       cp = skip_spaces (cp);
4402
4403       if (LOOKING_AT (cp, "package"))
4404         {
4405           free (package);
4406           get_tag (cp, &package);
4407         }
4408       else if (LOOKING_AT (cp, "sub"))
4409         {
4410           char *pos;
4411           char *sp = cp;
4412
4413           while (!notinname (*cp))
4414             cp++;
4415           if (cp == sp)
4416             continue;           /* nothing found */
4417           if ((pos = etags_strchr (sp, ':')) != NULL
4418               && pos < cp && pos[1] == ':')
4419             /* The name is already qualified. */
4420             make_tag (sp, cp - sp, TRUE,
4421                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4422           else
4423             /* Qualify it. */
4424             {
4425               char savechar, *name;
4426
4427               savechar = *cp;
4428               *cp = '\0';
4429               name = concat (package, "::", sp);
4430               *cp = savechar;
4431               make_tag (name, strlen(name), TRUE,
4432                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4433               free (name);
4434             }
4435         }
4436        else if (globals)        /* only if we are tagging global vars */
4437         {
4438           /* Skip a qualifier, if any. */
4439           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4440           /* After "my" or "local", but before any following paren or space. */
4441           char *varstart = cp;
4442
4443           if (qual              /* should this be removed?  If yes, how? */
4444               && (*cp == '$' || *cp == '@' || *cp == '%'))
4445             {
4446               varstart += 1;
4447               do
4448                 cp++;
4449               while (ISALNUM (*cp) || *cp == '_');
4450             }
4451           else if (qual)
4452             {
4453               /* Should be examining a variable list at this point;
4454                  could insist on seeing an open parenthesis. */
4455               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4456                 cp++;
4457             }
4458           else
4459             continue;
4460
4461           make_tag (varstart, cp - varstart, FALSE,
4462                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4463         }
4464     }
4465   free (package);
4466 }
4467
4468
4469 /*
4470  * Python support
4471  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4472  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4473  * More ideas by seb bacon <seb@jamkit.com> (2002)
4474  */
4475 static void
4476 Python_functions (inf)
4477      FILE *inf;
4478 {
4479   register char *cp;
4480
4481   LOOP_ON_INPUT_LINES (inf, lb, cp)
4482     {
4483       cp = skip_spaces (cp);
4484       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4485         {
4486           char *name = cp;
4487           while (!notinname (*cp) && *cp != ':')
4488             cp++;
4489           make_tag (name, cp - name, TRUE,
4490                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4491         }
4492     }
4493 }
4494
4495 \f
4496 /*
4497  * PHP support
4498  * Look for:
4499  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4500  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4501  *  - /^[ \t]*define\(\"[^\"]+/
4502  * Only with --members:
4503  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4504  * Idea by Diez B. Roggisch (2001)
4505  */
4506 static void
4507 PHP_functions (inf)
4508      FILE *inf;
4509 {
4510   register char *cp, *name;
4511   bool search_identifier = FALSE;
4512
4513   LOOP_ON_INPUT_LINES (inf, lb, cp)
4514     {
4515       cp = skip_spaces (cp);
4516       name = cp;
4517       if (search_identifier
4518           && *cp != '\0')
4519         {
4520           while (!notinname (*cp))
4521             cp++;
4522           make_tag (name, cp - name, TRUE,
4523                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4524           search_identifier = FALSE;
4525         }
4526       else if (LOOKING_AT (cp, "function"))
4527         {
4528           if(*cp == '&')
4529             cp = skip_spaces (cp+1);
4530           if(*cp != '\0')
4531             {
4532               name = cp;
4533               while (!notinname (*cp))
4534                 cp++;
4535               make_tag (name, cp - name, TRUE,
4536                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4537             }
4538           else
4539             search_identifier = TRUE;
4540         }
4541       else if (LOOKING_AT (cp, "class"))
4542         {
4543           if (*cp != '\0')
4544             {
4545               name = cp;
4546               while (*cp != '\0' && !iswhite (*cp))
4547                 cp++;
4548               make_tag (name, cp - name, FALSE,
4549                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4550             }
4551           else
4552             search_identifier = TRUE;
4553         }
4554       else if (strneq (cp, "define", 6)
4555                && (cp = skip_spaces (cp+6))
4556                && *cp++ == '('
4557                && (*cp == '"' || *cp == '\''))
4558         {
4559           char quote = *cp++;
4560           name = cp;
4561           while (*cp != quote && *cp != '\0')
4562             cp++;
4563           make_tag (name, cp - name, FALSE,
4564                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4565         }
4566       else if (members
4567                && LOOKING_AT (cp, "var")
4568                && *cp == '$')
4569         {
4570           name = cp;
4571           while (!notinname(*cp))
4572             cp++;
4573           make_tag (name, cp - name, FALSE,
4574                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4575         }
4576     }
4577 }
4578
4579 \f
4580 /*
4581  * Cobol tag functions
4582  * We could look for anything that could be a paragraph name.
4583  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4584  * Idea by Corny de Souza (1993)
4585  */
4586 static void
4587 Cobol_paragraphs (inf)
4588      FILE *inf;
4589 {
4590   register char *bp, *ep;
4591
4592   LOOP_ON_INPUT_LINES (inf, lb, bp)
4593     {
4594       if (lb.len < 9)
4595         continue;
4596       bp += 8;
4597
4598       /* If eoln, compiler option or comment ignore whole line. */
4599       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4600         continue;
4601
4602       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4603         continue;
4604       if (*ep++ == '.')
4605         make_tag (bp, ep - bp, TRUE,
4606                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4607     }
4608 }
4609
4610 \f
4611 /*
4612  * Makefile support
4613  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4614  */
4615 static void
4616 Makefile_targets (inf)
4617      FILE *inf;
4618 {
4619   register char *bp;
4620
4621   LOOP_ON_INPUT_LINES (inf, lb, bp)
4622     {
4623       if (*bp == '\t' || *bp == '#')
4624         continue;
4625       while (*bp != '\0' && *bp != '=' && *bp != ':')
4626         bp++;
4627       if (*bp == ':' || (globals && *bp == '='))
4628         {
4629           /* We should detect if there is more than one tag, but we do not.
4630              We just skip initial and final spaces. */
4631           char * namestart = skip_spaces (lb.buffer);
4632           while (--bp > namestart)
4633             if (!notinname (*bp))
4634               break;
4635           make_tag (namestart, bp - namestart + 1, TRUE,
4636                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4637         }
4638     }
4639 }
4640
4641 \f
4642 /*
4643  * Pascal parsing
4644  * Original code by Mosur K. Mohan (1989)
4645  *
4646  *  Locates tags for procedures & functions.  Doesn't do any type- or
4647  *  var-definitions.  It does look for the keyword "extern" or
4648  *  "forward" immediately following the procedure statement; if found,
4649  *  the tag is skipped.
4650  */
4651 static void
4652 Pascal_functions (inf)
4653      FILE *inf;
4654 {
4655   linebuffer tline;             /* mostly copied from C_entries */
4656   long save_lcno;
4657   int save_lineno, namelen, taglen;
4658   char c, *name;
4659
4660   bool                          /* each of these flags is TRUE if: */
4661     incomment,                  /* point is inside a comment */
4662     inquote,                    /* point is inside '..' string */
4663     get_tagname,                /* point is after PROCEDURE/FUNCTION
4664                                    keyword, so next item = potential tag */
4665     found_tag,                  /* point is after a potential tag */
4666     inparms,                    /* point is within parameter-list */
4667     verify_tag;                 /* point has passed the parm-list, so the
4668                                    next token will determine whether this
4669                                    is a FORWARD/EXTERN to be ignored, or
4670                                    whether it is a real tag */
4671
4672   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4673   name = NULL;                  /* keep compiler quiet */
4674   dbp = lb.buffer;
4675   *dbp = '\0';
4676   linebuffer_init (&tline);
4677
4678   incomment = inquote = FALSE;
4679   found_tag = FALSE;            /* have a proc name; check if extern */
4680   get_tagname = FALSE;          /* found "procedure" keyword         */
4681   inparms = FALSE;              /* found '(' after "proc"            */
4682   verify_tag = FALSE;           /* check if "extern" is ahead        */
4683
4684
4685   while (!feof (inf))           /* long main loop to get next char */
4686     {
4687       c = *dbp++;
4688       if (c == '\0')            /* if end of line */
4689         {
4690           readline (&lb, inf);
4691           dbp = lb.buffer;
4692           if (*dbp == '\0')
4693             continue;
4694           if (!((found_tag && verify_tag)
4695                 || get_tagname))
4696             c = *dbp++;         /* only if don't need *dbp pointing
4697                                    to the beginning of the name of
4698                                    the procedure or function */
4699         }
4700       if (incomment)
4701         {
4702           if (c == '}')         /* within { } comments */
4703             incomment = FALSE;
4704           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4705             {
4706               dbp++;
4707               incomment = FALSE;
4708             }
4709           continue;
4710         }
4711       else if (inquote)
4712         {
4713           if (c == '\'')
4714             inquote = FALSE;
4715           continue;
4716         }
4717       else
4718         switch (c)
4719           {
4720           case '\'':
4721             inquote = TRUE;     /* found first quote */
4722             continue;
4723           case '{':             /* found open { comment */
4724             incomment = TRUE;
4725             continue;
4726           case '(':
4727             if (*dbp == '*')    /* found open (* comment */
4728               {
4729                 incomment = TRUE;
4730                 dbp++;
4731               }
4732             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4733               inparms = TRUE;
4734             continue;
4735           case ')':             /* end of parms list */
4736             if (inparms)
4737               inparms = FALSE;
4738             continue;
4739           case ';':
4740             if (found_tag && !inparms) /* end of proc or fn stmt */
4741               {
4742                 verify_tag = TRUE;
4743                 break;
4744               }
4745             continue;
4746           }
4747       if (found_tag && verify_tag && (*dbp != ' '))
4748         {
4749           /* Check if this is an "extern" declaration. */
4750           if (*dbp == '\0')
4751             continue;
4752           if (lowcase (*dbp == 'e'))
4753             {
4754               if (nocase_tail ("extern")) /* superfluous, really! */
4755                 {
4756                   found_tag = FALSE;
4757                   verify_tag = FALSE;
4758                 }
4759             }
4760           else if (lowcase (*dbp) == 'f')
4761             {
4762               if (nocase_tail ("forward")) /* check for forward reference */
4763                 {
4764                   found_tag = FALSE;
4765                   verify_tag = FALSE;
4766                 }
4767             }
4768           if (found_tag && verify_tag) /* not external proc, so make tag */
4769             {
4770               found_tag = FALSE;
4771               verify_tag = FALSE;
4772               make_tag (name, namelen, TRUE,
4773                         tline.buffer, taglen, save_lineno, save_lcno);
4774               continue;
4775             }
4776         }
4777       if (get_tagname)          /* grab name of proc or fn */
4778         {
4779           char *cp;
4780
4781           if (*dbp == '\0')
4782             continue;
4783
4784           /* Find block name. */
4785           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4786             continue;
4787
4788           /* Save all values for later tagging. */
4789           linebuffer_setlen (&tline, lb.len);
4790           strcpy (tline.buffer, lb.buffer);
4791           save_lineno = lineno;
4792           save_lcno = linecharno;
4793           name = tline.buffer + (dbp - lb.buffer);
4794           namelen = cp - dbp;
4795           taglen = cp - lb.buffer + 1;
4796
4797           dbp = cp;             /* set dbp to e-o-token */
4798           get_tagname = FALSE;
4799           found_tag = TRUE;
4800           continue;
4801
4802           /* And proceed to check for "extern". */
4803         }
4804       else if (!incomment && !inquote && !found_tag)
4805         {
4806           /* Check for proc/fn keywords. */
4807           switch (lowcase (c))
4808             {
4809             case 'p':
4810               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4811                 get_tagname = TRUE;
4812               continue;
4813             case 'f':
4814               if (nocase_tail ("unction"))
4815                 get_tagname = TRUE;
4816               continue;
4817             }
4818         }
4819     } /* while not eof */
4820
4821   free (tline.buffer);
4822 }
4823
4824 \f
4825 /*
4826  * Lisp tag functions
4827  *  look for (def or (DEF, quote or QUOTE
4828  */
4829
4830 static void L_getit __P((void));
4831
4832 static void
4833 L_getit ()
4834 {
4835   if (*dbp == '\'')             /* Skip prefix quote */
4836     dbp++;
4837   else if (*dbp == '(')
4838   {
4839     dbp++;
4840     /* Try to skip "(quote " */
4841     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4842       /* Ok, then skip "(" before name in (defstruct (foo)) */
4843       dbp = skip_spaces (dbp);
4844   }
4845   get_tag (dbp, NULL);
4846 }
4847
4848 static void
4849 Lisp_functions (inf)
4850      FILE *inf;
4851 {
4852   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4853     {
4854       if (dbp[0] != '(')
4855         continue;
4856
4857       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4858         {
4859           dbp = skip_non_spaces (dbp);
4860           dbp = skip_spaces (dbp);
4861           L_getit ();
4862         }
4863       else
4864         {
4865           /* Check for (foo::defmumble name-defined ... */
4866           do
4867             dbp++;
4868           while (!notinname (*dbp) && *dbp != ':');
4869           if (*dbp == ':')
4870             {
4871               do
4872                 dbp++;
4873               while (*dbp == ':');
4874
4875               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4876                 {
4877                   dbp = skip_non_spaces (dbp);
4878                   dbp = skip_spaces (dbp);
4879                   L_getit ();
4880                 }
4881             }
4882         }
4883     }
4884 }
4885
4886 \f
4887 /*
4888  * Lua script language parsing
4889  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4890  *
4891  *  "function" and "local function" are tags if they start at column 1.
4892  */
4893 static void
4894 Lua_functions (inf)
4895      FILE *inf;
4896 {
4897   register char *bp;
4898
4899   LOOP_ON_INPUT_LINES (inf, lb, bp)
4900     {
4901       if (bp[0] != 'f' && bp[0] != 'l')
4902         continue;
4903
4904       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4905
4906       if (LOOKING_AT (bp, "function"))
4907         get_tag (bp, NULL);
4908     }
4909 }
4910
4911 \f
4912 /*
4913  * Postscript tags
4914  * Just look for lines where the first character is '/'
4915  * Also look at "defineps" for PSWrap
4916  * Ideas by:
4917  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4918  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4919  */
4920 static void
4921 PS_functions (inf)
4922      FILE *inf;
4923 {
4924   register char *bp, *ep;
4925
4926   LOOP_ON_INPUT_LINES (inf, lb, bp)
4927     {
4928       if (bp[0] == '/')
4929         {
4930           for (ep = bp+1;
4931                *ep != '\0' && *ep != ' ' && *ep != '{';
4932                ep++)
4933             continue;
4934           make_tag (bp, ep - bp, TRUE,
4935                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4936         }
4937       else if (LOOKING_AT (bp, "defineps"))
4938         get_tag (bp, NULL);
4939     }
4940 }
4941
4942 \f
4943 /*
4944  * Forth tags
4945  * Ignore anything after \ followed by space or in ( )
4946  * Look for words defined by :
4947  * Look for constant, code, create, defer, value, and variable
4948  * OBP extensions:  Look for buffer:, field,
4949  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4950  */
4951 static void
4952 Forth_words (inf)
4953      FILE *inf;
4954 {
4955   register char *bp;
4956
4957   LOOP_ON_INPUT_LINES (inf, lb, bp)
4958     while ((bp = skip_spaces (bp))[0] != '\0')
4959       if (bp[0] == '\\' && iswhite(bp[1]))
4960         break;                  /* read next line */
4961       else if (bp[0] == '(' && iswhite(bp[1]))
4962         do                      /* skip to ) or eol */
4963           bp++;
4964         while (*bp != ')' && *bp != '\0');
4965       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
4966                || LOOKING_AT_NOCASE (bp, "constant")
4967                || LOOKING_AT_NOCASE (bp, "code")
4968                || LOOKING_AT_NOCASE (bp, "create")
4969                || LOOKING_AT_NOCASE (bp, "defer")
4970                || LOOKING_AT_NOCASE (bp, "value")
4971                || LOOKING_AT_NOCASE (bp, "variable")
4972                || LOOKING_AT_NOCASE (bp, "buffer:")
4973                || LOOKING_AT_NOCASE (bp, "field"))
4974         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4975       else
4976         bp = skip_non_spaces (bp);
4977 }
4978
4979 \f
4980 /*
4981  * Scheme tag functions
4982  * look for (def... xyzzy
4983  *          (def... (xyzzy
4984  *          (def ... ((...(xyzzy ....
4985  *          (set! xyzzy
4986  * Original code by Ken Haase (1985?)
4987  */
4988 static void
4989 Scheme_functions (inf)
4990      FILE *inf;
4991 {
4992   register char *bp;
4993
4994   LOOP_ON_INPUT_LINES (inf, lb, bp)
4995     {
4996       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4997         {
4998           bp = skip_non_spaces (bp+4);
4999           /* Skip over open parens and white space */
5000           while (notinname (*bp))
5001             bp++;
5002           get_tag (bp, NULL);
5003         }
5004       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5005         get_tag (bp, NULL);
5006     }
5007 }
5008
5009 \f
5010 /* Find tags in TeX and LaTeX input files.  */
5011
5012 /* TEX_toktab is a table of TeX control sequences that define tags.
5013  * Each entry records one such control sequence.
5014  *
5015  * Original code from who knows whom.
5016  * Ideas by:
5017  *   Stefan Monnier (2002)
5018  */
5019
5020 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5021
5022 /* Default set of control sequences to put into TEX_toktab.
5023    The value of environment var TEXTAGS is prepended to this.  */
5024 static char *TEX_defenv = "\
5025 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5026 :part:appendix:entry:index:def\
5027 :newcommand:renewcommand:newenvironment:renewenvironment";
5028
5029 static void TEX_mode __P((FILE *));
5030 static void TEX_decode_env __P((char *, char *));
5031
5032 static char TEX_esc = '\\';
5033 static char TEX_opgrp = '{';
5034 static char TEX_clgrp = '}';
5035
5036 /*
5037  * TeX/LaTeX scanning loop.
5038  */
5039 static void
5040 TeX_commands (inf)
5041      FILE *inf;
5042 {
5043   char *cp;
5044   linebuffer *key;
5045
5046   /* Select either \ or ! as escape character.  */
5047   TEX_mode (inf);
5048
5049   /* Initialize token table once from environment. */
5050   if (TEX_toktab == NULL)
5051     TEX_decode_env ("TEXTAGS", TEX_defenv);
5052
5053   LOOP_ON_INPUT_LINES (inf, lb, cp)
5054     {
5055       /* Look at each TEX keyword in line. */
5056       for (;;)
5057         {
5058           /* Look for a TEX escape. */
5059           while (*cp++ != TEX_esc)
5060             if (cp[-1] == '\0' || cp[-1] == '%')
5061               goto tex_next_line;
5062
5063           for (key = TEX_toktab; key->buffer != NULL; key++)
5064             if (strneq (cp, key->buffer, key->len))
5065               {
5066                 register char *p;
5067                 int namelen, linelen;
5068                 bool opgrp = FALSE;
5069
5070                 cp = skip_spaces (cp + key->len);
5071                 if (*cp == TEX_opgrp)
5072                   {
5073                     opgrp = TRUE;
5074                     cp++;
5075                   }
5076                 for (p = cp;
5077                      (!iswhite (*p) && *p != '#' &&
5078                       *p != TEX_opgrp && *p != TEX_clgrp);
5079                      p++)
5080                   continue;
5081                 namelen = p - cp;
5082                 linelen = lb.len;
5083                 if (!opgrp || *p == TEX_clgrp)
5084                   {
5085                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5086                       p++;
5087                     linelen = p - lb.buffer + 1;
5088                   }
5089                 make_tag (cp, namelen, TRUE,
5090                           lb.buffer, linelen, lineno, linecharno);
5091                 goto tex_next_line; /* We only tag a line once */
5092               }
5093         }
5094     tex_next_line:
5095       ;
5096     }
5097 }
5098
5099 #define TEX_LESC '\\'
5100 #define TEX_SESC '!'
5101
5102 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5103    chars accordingly. */
5104 static void
5105 TEX_mode (inf)
5106      FILE *inf;
5107 {
5108   int c;
5109
5110   while ((c = getc (inf)) != EOF)
5111     {
5112       /* Skip to next line if we hit the TeX comment char. */
5113       if (c == '%')
5114         while (c != '\n' && c != EOF)
5115           c = getc (inf);
5116       else if (c == TEX_LESC || c == TEX_SESC )
5117         break;
5118     }
5119
5120   if (c == TEX_LESC)
5121     {
5122       TEX_esc = TEX_LESC;
5123       TEX_opgrp = '{';
5124       TEX_clgrp = '}';
5125     }
5126   else
5127     {
5128       TEX_esc = TEX_SESC;
5129       TEX_opgrp = '<';
5130       TEX_clgrp = '>';
5131     }
5132   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5133      No attempt is made to correct the situation. */
5134   rewind (inf);
5135 }
5136
5137 /* Read environment and prepend it to the default string.
5138    Build token table. */
5139 static void
5140 TEX_decode_env (evarname, defenv)
5141      char *evarname;
5142      char *defenv;
5143 {
5144   register char *env, *p;
5145   int i, len;
5146
5147   /* Append default string to environment. */
5148   env = getenv (evarname);
5149   if (!env)
5150     env = defenv;
5151   else
5152     {
5153       char *oldenv = env;
5154       env = concat (oldenv, defenv, "");
5155     }
5156
5157   /* Allocate a token table */
5158   for (len = 1, p = env; p;)
5159     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5160       len++;
5161   TEX_toktab = xnew (len, linebuffer);
5162
5163   /* Unpack environment string into token table. Be careful about */
5164   /* zero-length strings (leading ':', "::" and trailing ':') */
5165   for (i = 0; *env != '\0';)
5166     {
5167       p = etags_strchr (env, ':');
5168       if (!p)                   /* End of environment string. */
5169         p = env + strlen (env);
5170       if (p - env > 0)
5171         {                       /* Only non-zero strings. */
5172           TEX_toktab[i].buffer = savenstr (env, p - env);
5173           TEX_toktab[i].len = p - env;
5174           i++;
5175         }
5176       if (*p)
5177         env = p + 1;
5178       else
5179         {
5180           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5181           TEX_toktab[i].len = 0;
5182           break;
5183         }
5184     }
5185 }
5186
5187 \f
5188 /* Texinfo support.  Dave Love, Mar. 2000.  */
5189 static void
5190 Texinfo_nodes (inf)
5191      FILE * inf;
5192 {
5193   char *cp, *start;
5194   LOOP_ON_INPUT_LINES (inf, lb, cp)
5195     if (LOOKING_AT (cp, "@node"))
5196       {
5197         start = cp;
5198         while (*cp != '\0' && *cp != ',')
5199           cp++;
5200         make_tag (start, cp - start, TRUE,
5201                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5202       }
5203 }
5204
5205 \f
5206 /*
5207  * HTML support.
5208  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5209  * Contents of <a name=xxx> are tags with name xxx.
5210  *
5211  * Francesco Potortì, 2002.
5212  */
5213 static void
5214 HTML_labels (inf)
5215      FILE * inf;
5216 {
5217   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5218   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5219   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5220   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5221   char *end;
5222
5223
5224   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5225
5226   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5227     for (;;)                    /* loop on the same line */
5228       {
5229         if (skiptag)            /* skip HTML tag */
5230           {
5231             while (*dbp != '\0' && *dbp != '>')
5232               dbp++;
5233             if (*dbp == '>')
5234               {
5235                 dbp += 1;
5236                 skiptag = FALSE;
5237                 continue;       /* look on the same line */
5238               }
5239             break;              /* go to next line */
5240           }
5241
5242         else if (intag) /* look for "name=" or "id=" */
5243           {
5244             while (*dbp != '\0' && *dbp != '>'
5245                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5246               dbp++;
5247             if (*dbp == '\0')
5248               break;            /* go to next line */
5249             if (*dbp == '>')
5250               {
5251                 dbp += 1;
5252                 intag = FALSE;
5253                 continue;       /* look on the same line */
5254               }
5255             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5256                 || LOOKING_AT_NOCASE (dbp, "id="))
5257               {
5258                 bool quoted = (dbp[0] == '"');
5259
5260                 if (quoted)
5261                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5262                     continue;
5263                 else
5264                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5265                     continue;
5266                 linebuffer_setlen (&token_name, end - dbp);
5267                 strncpy (token_name.buffer, dbp, end - dbp);
5268                 token_name.buffer[end - dbp] = '\0';
5269
5270                 dbp = end;
5271                 intag = FALSE;  /* we found what we looked for */
5272                 skiptag = TRUE; /* skip to the end of the tag */
5273                 getnext = TRUE; /* then grab the text */
5274                 continue;       /* look on the same line */
5275               }
5276             dbp += 1;
5277           }
5278
5279         else if (getnext)       /* grab next tokens and tag them */
5280           {
5281             dbp = skip_spaces (dbp);
5282             if (*dbp == '\0')
5283               break;            /* go to next line */
5284             if (*dbp == '<')
5285               {
5286                 intag = TRUE;
5287                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5288                 continue;       /* look on the same line */
5289               }
5290
5291             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5292               continue;
5293             make_tag (token_name.buffer, token_name.len, TRUE,
5294                       dbp, end - dbp, lineno, linecharno);
5295             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5296             getnext = FALSE;
5297             break;              /* go to next line */
5298           }
5299
5300         else                    /* look for an interesting HTML tag */
5301           {
5302             while (*dbp != '\0' && *dbp != '<')
5303               dbp++;
5304             if (*dbp == '\0')
5305               break;            /* go to next line */
5306             intag = TRUE;
5307             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5308               {
5309                 inanchor = TRUE;
5310                 continue;       /* look on the same line */
5311               }
5312             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5313                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5314                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5315                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5316               {
5317                 intag = FALSE;
5318                 getnext = TRUE;
5319                 continue;       /* look on the same line */
5320               }
5321             dbp += 1;
5322           }
5323       }
5324 }
5325
5326 \f
5327 /*
5328  * Prolog support
5329  *
5330  * Assumes that the predicate or rule starts at column 0.
5331  * Only the first clause of a predicate or rule is added.
5332  * Original code by Sunichirou Sugou (1989)
5333  * Rewritten by Anders Lindgren (1996)
5334  */
5335 static int prolog_pr __P((char *, char *));
5336 static void prolog_skip_comment __P((linebuffer *, FILE *));
5337 static int prolog_atom __P((char *, int));
5338
5339 static void
5340 Prolog_functions (inf)
5341      FILE *inf;
5342 {
5343   char *cp, *last;
5344   int len;
5345   int allocated;
5346
5347   allocated = 0;
5348   len = 0;
5349   last = NULL;
5350
5351   LOOP_ON_INPUT_LINES (inf, lb, cp)
5352     {
5353       if (cp[0] == '\0')        /* Empty line */
5354         continue;
5355       else if (iswhite (cp[0])) /* Not a predicate */
5356         continue;
5357       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5358         prolog_skip_comment (&lb, inf);
5359       else if ((len = prolog_pr (cp, last)) > 0)
5360         {
5361           /* Predicate or rule.  Store the function name so that we
5362              only generate a tag for the first clause.  */
5363           if (last == NULL)
5364             last = xnew(len + 1, char);
5365           else if (len + 1 > allocated)
5366             xrnew (last, len + 1, char);
5367           allocated = len + 1;
5368           strncpy (last, cp, len);
5369           last[len] = '\0';
5370         }
5371     }
5372   free (last);
5373 }
5374
5375
5376 static void
5377 prolog_skip_comment (plb, inf)
5378      linebuffer *plb;
5379      FILE *inf;
5380 {
5381   char *cp;
5382
5383   do
5384     {
5385       for (cp = plb->buffer; *cp != '\0'; cp++)
5386         if (cp[0] == '*' && cp[1] == '/')
5387           return;
5388       readline (plb, inf);
5389     }
5390   while (!feof(inf));
5391 }
5392
5393 /*
5394  * A predicate or rule definition is added if it matches:
5395  *     <beginning of line><Prolog Atom><whitespace>(
5396  * or  <beginning of line><Prolog Atom><whitespace>:-
5397  *
5398  * It is added to the tags database if it doesn't match the
5399  * name of the previous clause header.
5400  *
5401  * Return the size of the name of the predicate or rule, or 0 if no
5402  * header was found.
5403  */
5404 static int
5405 prolog_pr (s, last)
5406      char *s;
5407      char *last;                /* Name of last clause. */
5408 {
5409   int pos;
5410   int len;
5411
5412   pos = prolog_atom (s, 0);
5413   if (pos < 1)
5414     return 0;
5415
5416   len = pos;
5417   pos = skip_spaces (s + pos) - s;
5418
5419   if ((s[pos] == '.'
5420        || (s[pos] == '(' && (pos += 1))
5421        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5422       && (last == NULL          /* save only the first clause */
5423           || len != (int)strlen (last)
5424           || !strneq (s, last, len)))
5425         {
5426           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5427           return len;
5428         }
5429   else
5430     return 0;
5431 }
5432
5433 /*
5434  * Consume a Prolog atom.
5435  * Return the number of bytes consumed, or -1 if there was an error.
5436  *
5437  * A prolog atom, in this context, could be one of:
5438  * - An alphanumeric sequence, starting with a lower case letter.
5439  * - A quoted arbitrary string. Single quotes can escape themselves.
5440  *   Backslash quotes everything.
5441  */
5442 static int
5443 prolog_atom (s, pos)
5444      char *s;
5445      int pos;
5446 {
5447   int origpos;
5448
5449   origpos = pos;
5450
5451   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5452     {
5453       /* The atom is unquoted. */
5454       pos++;
5455       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5456         {
5457           pos++;
5458         }
5459       return pos - origpos;
5460     }
5461   else if (s[pos] == '\'')
5462     {
5463       pos++;
5464
5465       for (;;)
5466         {
5467           if (s[pos] == '\'')
5468             {
5469               pos++;
5470               if (s[pos] != '\'')
5471                 break;
5472               pos++;            /* A double quote */
5473             }
5474           else if (s[pos] == '\0')
5475             /* Multiline quoted atoms are ignored. */
5476             return -1;
5477           else if (s[pos] == '\\')
5478             {
5479               if (s[pos+1] == '\0')
5480                 return -1;
5481               pos += 2;
5482             }
5483           else
5484             pos++;
5485         }
5486       return pos - origpos;
5487     }
5488   else
5489     return -1;
5490 }
5491
5492 \f
5493 /*
5494  * Support for Erlang
5495  *
5496  * Generates tags for functions, defines, and records.
5497  * Assumes that Erlang functions start at column 0.
5498  * Original code by Anders Lindgren (1996)
5499  */
5500 static int erlang_func __P((char *, char *));
5501 static void erlang_attribute __P((char *));
5502 static int erlang_atom __P((char *));
5503
5504 static void
5505 Erlang_functions (inf)
5506      FILE *inf;
5507 {
5508   char *cp, *last;
5509   int len;
5510   int allocated;
5511
5512   allocated = 0;
5513   len = 0;
5514   last = NULL;
5515
5516   LOOP_ON_INPUT_LINES (inf, lb, cp)
5517     {
5518       if (cp[0] == '\0')        /* Empty line */
5519         continue;
5520       else if (iswhite (cp[0])) /* Not function nor attribute */
5521         continue;
5522       else if (cp[0] == '%')    /* comment */
5523         continue;
5524       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5525         continue;
5526       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5527         {
5528           erlang_attribute (cp);
5529           if (last != NULL)
5530             {
5531               free (last);
5532               last = NULL;
5533             }
5534         }
5535       else if ((len = erlang_func (cp, last)) > 0)
5536         {
5537           /*
5538            * Function.  Store the function name so that we only
5539            * generates a tag for the first clause.
5540            */
5541           if (last == NULL)
5542             last = xnew (len + 1, char);
5543           else if (len + 1 > allocated)
5544             xrnew (last, len + 1, char);
5545           allocated = len + 1;
5546           strncpy (last, cp, len);
5547           last[len] = '\0';
5548         }
5549     }
5550   free (last);
5551 }
5552
5553
5554 /*
5555  * A function definition is added if it matches:
5556  *     <beginning of line><Erlang Atom><whitespace>(
5557  *
5558  * It is added to the tags database if it doesn't match the
5559  * name of the previous clause header.
5560  *
5561  * Return the size of the name of the function, or 0 if no function
5562  * was found.
5563  */
5564 static int
5565 erlang_func (s, last)
5566      char *s;
5567      char *last;                /* Name of last clause. */
5568 {
5569   int pos;
5570   int len;
5571
5572   pos = erlang_atom (s);
5573   if (pos < 1)
5574     return 0;
5575
5576   len = pos;
5577   pos = skip_spaces (s + pos) - s;
5578
5579   /* Save only the first clause. */
5580   if (s[pos++] == '('
5581       && (last == NULL
5582           || len != (int)strlen (last)
5583           || !strneq (s, last, len)))
5584         {
5585           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5586           return len;
5587         }
5588
5589   return 0;
5590 }
5591
5592
5593 /*
5594  * Handle attributes.  Currently, tags are generated for defines
5595  * and records.
5596  *
5597  * They are on the form:
5598  * -define(foo, bar).
5599  * -define(Foo(M, N), M+N).
5600  * -record(graph, {vtab = notable, cyclic = true}).
5601  */
5602 static void
5603 erlang_attribute (s)
5604      char *s;
5605 {
5606   char *cp = s;
5607
5608   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5609       && *cp++ == '(')
5610     {
5611       int len = erlang_atom (skip_spaces (cp));
5612       if (len > 0)
5613         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5614     }
5615   return;
5616 }
5617
5618
5619 /*
5620  * Consume an Erlang atom (or variable).
5621  * Return the number of bytes consumed, or -1 if there was an error.
5622  */
5623 static int
5624 erlang_atom (s)
5625      char *s;
5626 {
5627   int pos = 0;
5628
5629   if (ISALPHA (s[pos]) || s[pos] == '_')
5630     {
5631       /* The atom is unquoted. */
5632       do
5633         pos++;
5634       while (ISALNUM (s[pos]) || s[pos] == '_');
5635     }
5636   else if (s[pos] == '\'')
5637     {
5638       for (pos++; s[pos] != '\''; pos++)
5639         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5640             || (s[pos] == '\\' && s[++pos] == '\0'))
5641           return 0;
5642       pos++;
5643     }
5644
5645   return pos;
5646 }
5647
5648 \f
5649 static char *scan_separators __P((char *));
5650 static void add_regex __P((char *, language *));
5651 static char *substitute __P((char *, char *, struct re_registers *));
5652
5653 /*
5654  * Take a string like "/blah/" and turn it into "blah", verifying
5655  * that the first and last characters are the same, and handling
5656  * quoted separator characters.  Actually, stops on the occurrence of
5657  * an unquoted separator.  Also process \t, \n, etc. and turn into
5658  * appropriate characters. Works in place.  Null terminates name string.
5659  * Returns pointer to terminating separator, or NULL for
5660  * unterminated regexps.
5661  */
5662 static char *
5663 scan_separators (name)
5664      char *name;
5665 {
5666   char sep = name[0];
5667   char *copyto = name;
5668   bool quoted = FALSE;
5669
5670   for (++name; *name != '\0'; ++name)
5671     {
5672       if (quoted)
5673         {
5674           switch (*name)
5675             {
5676             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5677             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5678             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5679             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5680             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5681             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5682             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5683             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5684             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5685             default:
5686               if (*name == sep)
5687                 *copyto++ = sep;
5688               else
5689                 {
5690                   /* Something else is quoted, so preserve the quote. */
5691                   *copyto++ = '\\';
5692                   *copyto++ = *name;
5693                 }
5694               break;
5695             }
5696           quoted = FALSE;
5697         }
5698       else if (*name == '\\')
5699         quoted = TRUE;
5700       else if (*name == sep)
5701         break;
5702       else
5703         *copyto++ = *name;
5704     }
5705   if (*name != sep)
5706     name = NULL;                /* signal unterminated regexp */
5707
5708   /* Terminate copied string. */
5709   *copyto = '\0';
5710   return name;
5711 }
5712
5713 /* Look at the argument of --regex or --no-regex and do the right
5714    thing.  Same for each line of a regexp file. */
5715 static void
5716 analyse_regex (regex_arg)
5717      char *regex_arg;
5718 {
5719   if (regex_arg == NULL)
5720     {
5721       free_regexps ();          /* --no-regex: remove existing regexps */
5722       return;
5723     }
5724
5725   /* A real --regexp option or a line in a regexp file. */
5726   switch (regex_arg[0])
5727     {
5728       /* Comments in regexp file or null arg to --regex. */
5729     case '\0':
5730     case ' ':
5731     case '\t':
5732       break;
5733
5734       /* Read a regex file.  This is recursive and may result in a
5735          loop, which will stop when the file descriptors are exhausted. */
5736     case '@':
5737       {
5738         FILE *regexfp;
5739         linebuffer regexbuf;
5740         char *regexfile = regex_arg + 1;
5741
5742         /* regexfile is a file containing regexps, one per line. */
5743         regexfp = fopen (regexfile, "r");
5744         if (regexfp == NULL)
5745           {
5746             pfatal (regexfile);
5747             return;
5748           }
5749         linebuffer_init (&regexbuf);
5750         while (readline_internal (&regexbuf, regexfp) > 0)
5751           analyse_regex (regexbuf.buffer);
5752         free (regexbuf.buffer);
5753         fclose (regexfp);
5754       }
5755       break;
5756
5757       /* Regexp to be used for a specific language only. */
5758     case '{':
5759       {
5760         language *lang;
5761         char *lang_name = regex_arg + 1;
5762         char *cp;
5763
5764         for (cp = lang_name; *cp != '}'; cp++)
5765           if (*cp == '\0')
5766             {
5767               error ("unterminated language name in regex: %s", regex_arg);
5768               return;
5769             }
5770         *cp++ = '\0';
5771         lang = get_language_from_langname (lang_name);
5772         if (lang == NULL)
5773           return;
5774         add_regex (cp, lang);
5775       }
5776       break;
5777
5778       /* Regexp to be used for any language. */
5779     default:
5780       add_regex (regex_arg, NULL);
5781       break;
5782     }
5783 }
5784
5785 /* Separate the regexp pattern, compile it,
5786    and care for optional name and modifiers. */
5787 static void
5788 add_regex (regexp_pattern, lang)
5789      char *regexp_pattern;
5790      language *lang;
5791 {
5792   static struct re_pattern_buffer zeropattern;
5793   char sep, *pat, *name, *modifiers;
5794   const char *err;
5795   struct re_pattern_buffer *patbuf;
5796   regexp *rp;
5797   bool
5798     force_explicit_name = TRUE, /* do not use implicit tag names */
5799     ignore_case = FALSE,        /* case is significant */
5800     multi_line = FALSE,         /* matches are done one line at a time */
5801     single_line = FALSE;        /* dot does not match newline */
5802
5803
5804   if (strlen(regexp_pattern) < 3)
5805     {
5806       error ("null regexp", (char *)NULL);
5807       return;
5808     }
5809   sep = regexp_pattern[0];
5810   name = scan_separators (regexp_pattern);
5811   if (name == NULL)
5812     {
5813       error ("%s: unterminated regexp", regexp_pattern);
5814       return;
5815     }
5816   if (name[1] == sep)
5817     {
5818       error ("null name for regexp \"%s\"", regexp_pattern);
5819       return;
5820     }
5821   modifiers = scan_separators (name);
5822   if (modifiers == NULL)        /* no terminating separator --> no name */
5823     {
5824       modifiers = name;
5825       name = "";
5826     }
5827   else
5828     modifiers += 1;             /* skip separator */
5829
5830   /* Parse regex modifiers. */
5831   for (; modifiers[0] != '\0'; modifiers++)
5832     switch (modifiers[0])
5833       {
5834       case 'N':
5835         if (modifiers == name)
5836           error ("forcing explicit tag name but no name, ignoring", NULL);
5837         force_explicit_name = TRUE;
5838         break;
5839       case 'i':
5840         ignore_case = TRUE;
5841         break;
5842       case 's':
5843         single_line = TRUE;
5844         /* FALLTHRU */
5845       case 'm':
5846         multi_line = TRUE;
5847         need_filebuf = TRUE;
5848         break;
5849       default:
5850         {
5851           char wrongmod [2];
5852           wrongmod[0] = modifiers[0];
5853           wrongmod[1] = '\0';
5854           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5855         }
5856         break;
5857       }
5858
5859   patbuf = xnew (1, struct re_pattern_buffer);
5860   *patbuf = zeropattern;
5861   if (ignore_case)
5862     {
5863       static char lc_trans[CHARS];
5864       int i;
5865       for (i = 0; i < CHARS; i++)
5866         lc_trans[i] = lowcase (i);
5867       patbuf->translate = lc_trans;     /* translation table to fold case  */
5868     }
5869
5870   if (multi_line)
5871     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5872   else
5873     pat = regexp_pattern;
5874
5875   if (single_line)
5876     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5877   else
5878     re_set_syntax (RE_SYNTAX_EMACS);
5879
5880   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5881   if (multi_line)
5882     free (pat);
5883   if (err != NULL)
5884     {
5885       error ("%s while compiling pattern", err);
5886       return;
5887     }
5888
5889   rp = p_head;
5890   p_head = xnew (1, regexp);
5891   p_head->pattern = savestr (regexp_pattern);
5892   p_head->p_next = rp;
5893   p_head->lang = lang;
5894   p_head->pat = patbuf;
5895   p_head->name = savestr (name);
5896   p_head->error_signaled = FALSE;
5897   p_head->force_explicit_name = force_explicit_name;
5898   p_head->ignore_case = ignore_case;
5899   p_head->multi_line = multi_line;
5900 }
5901
5902 /*
5903  * Do the substitutions indicated by the regular expression and
5904  * arguments.
5905  */
5906 static char *
5907 substitute (in, out, regs)
5908      char *in, *out;
5909      struct re_registers *regs;
5910 {
5911   char *result, *t;
5912   int size, dig, diglen;
5913
5914   result = NULL;
5915   size = strlen (out);
5916
5917   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5918   if (out[size - 1] == '\\')
5919     fatal ("pattern error in \"%s\"", out);
5920   for (t = etags_strchr (out, '\\');
5921        t != NULL;
5922        t = etags_strchr (t + 2, '\\'))
5923     if (ISDIGIT (t[1]))
5924       {
5925         dig = t[1] - '0';
5926         diglen = regs->end[dig] - regs->start[dig];
5927         size += diglen - 2;
5928       }
5929     else
5930       size -= 1;
5931
5932   /* Allocate space and do the substitutions. */
5933   assert (size >= 0);
5934   result = xnew (size + 1, char);
5935
5936   for (t = result; *out != '\0'; out++)
5937     if (*out == '\\' && ISDIGIT (*++out))
5938       {
5939         dig = *out - '0';
5940         diglen = regs->end[dig] - regs->start[dig];
5941         strncpy (t, in + regs->start[dig], diglen);
5942         t += diglen;
5943       }
5944     else
5945       *t++ = *out;
5946   *t = '\0';
5947
5948   assert (t <= result + size);
5949   assert (t - result == (int)strlen (result));
5950
5951   return result;
5952 }
5953
5954 /* Deallocate all regexps. */
5955 static void
5956 free_regexps ()
5957 {
5958   regexp *rp;
5959   while (p_head != NULL)
5960     {
5961       rp = p_head->p_next;
5962       free (p_head->pattern);
5963       free (p_head->name);
5964       free (p_head);
5965       p_head = rp;
5966     }
5967   return;
5968 }
5969
5970 /*
5971  * Reads the whole file as a single string from `filebuf' and looks for
5972  * multi-line regular expressions, creating tags on matches.
5973  * readline already dealt with normal regexps.
5974  *
5975  * Idea by Ben Wing <ben@666.com> (2002).
5976  */
5977 static void
5978 regex_tag_multiline ()
5979 {
5980   char *buffer = filebuf.buffer;
5981   regexp *rp;
5982   char *name;
5983
5984   for (rp = p_head; rp != NULL; rp = rp->p_next)
5985     {
5986       int match = 0;
5987
5988       if (!rp->multi_line)
5989         continue;               /* skip normal regexps */
5990
5991       /* Generic initialisations before parsing file from memory. */
5992       lineno = 1;               /* reset global line number */
5993       charno = 0;               /* reset global char number */
5994       linecharno = 0;           /* reset global char number of line start */
5995
5996       /* Only use generic regexps or those for the current language. */
5997       if (rp->lang != NULL && rp->lang != curfdp->lang)
5998         continue;
5999
6000       while (match >= 0 && match < filebuf.len)
6001         {
6002           match = re_search (rp->pat, buffer, filebuf.len, charno,
6003                              filebuf.len - match, &rp->regs);
6004           switch (match)
6005             {
6006             case -2:
6007               /* Some error. */
6008               if (!rp->error_signaled)
6009                 {
6010                   error ("regexp stack overflow while matching \"%s\"",
6011                          rp->pattern);
6012                   rp->error_signaled = TRUE;
6013                 }
6014               break;
6015             case -1:
6016               /* No match. */
6017               break;
6018             default:
6019               if (match == rp->regs.end[0])
6020                 {
6021                   if (!rp->error_signaled)
6022                     {
6023                       error ("regexp matches the empty string: \"%s\"",
6024                              rp->pattern);
6025                       rp->error_signaled = TRUE;
6026                     }
6027                   match = -3;   /* exit from while loop */
6028                   break;
6029                 }
6030
6031               /* Match occurred.  Construct a tag. */
6032               while (charno < rp->regs.end[0])
6033                 if (buffer[charno++] == '\n')
6034                   lineno++, linecharno = charno;
6035               name = rp->name;
6036               if (name[0] == '\0')
6037                 name = NULL;
6038               else /* make a named tag */
6039                 name = substitute (buffer, rp->name, &rp->regs);
6040               if (rp->force_explicit_name)
6041                 /* Force explicit tag name, if a name is there. */
6042                 pfnote (name, TRUE, buffer + linecharno,
6043                         charno - linecharno + 1, lineno, linecharno);
6044               else
6045                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6046                           charno - linecharno + 1, lineno, linecharno);
6047               break;
6048             }
6049         }
6050     }
6051 }
6052
6053 \f
6054 static bool
6055 nocase_tail (cp)
6056      char *cp;
6057 {
6058   register int len = 0;
6059
6060   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6061     cp++, len++;
6062   if (*cp == '\0' && !intoken (dbp[len]))
6063     {
6064       dbp += len;
6065       return TRUE;
6066     }
6067   return FALSE;
6068 }
6069
6070 static void
6071 get_tag (bp, namepp)
6072      register char *bp;
6073      char **namepp;
6074 {
6075   register char *cp = bp;
6076
6077   if (*bp != '\0')
6078     {
6079       /* Go till you get to white space or a syntactic break */
6080       for (cp = bp + 1; !notinname (*cp); cp++)
6081         continue;
6082       make_tag (bp, cp - bp, TRUE,
6083                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6084     }
6085
6086   if (namepp != NULL)
6087     *namepp = savenstr (bp, cp - bp);
6088 }
6089
6090 /*
6091  * Read a line of text from `stream' into `lbp', excluding the
6092  * newline or CR-NL, if any.  Return the number of characters read from
6093  * `stream', which is the length of the line including the newline.
6094  *
6095  * On DOS or Windows we do not count the CR character, if any before the
6096  * NL, in the returned length; this mirrors the behavior of Emacs on those
6097  * platforms (for text files, it translates CR-NL to NL as it reads in the
6098  * file).
6099  *
6100  * If multi-line regular expressions are requested, each line read is
6101  * appended to `filebuf'.
6102  */
6103 static long
6104 readline_internal (lbp, stream)
6105      linebuffer *lbp;
6106      register FILE *stream;
6107 {
6108   char *buffer = lbp->buffer;
6109   register char *p = lbp->buffer;
6110   register char *pend;
6111   int chars_deleted;
6112
6113   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6114
6115   for (;;)
6116     {
6117       register int c = getc (stream);
6118       if (p == pend)
6119         {
6120           /* We're at the end of linebuffer: expand it. */
6121           lbp->size *= 2;
6122           xrnew (buffer, lbp->size, char);
6123           p += buffer - lbp->buffer;
6124           pend = buffer + lbp->size;
6125           lbp->buffer = buffer;
6126         }
6127       if (c == EOF)
6128         {
6129           *p = '\0';
6130           chars_deleted = 0;
6131           break;
6132         }
6133       if (c == '\n')
6134         {
6135           if (p > buffer && p[-1] == '\r')
6136             {
6137               p -= 1;
6138 #ifdef DOS_NT
6139              /* Assume CRLF->LF translation will be performed by Emacs
6140                 when loading this file, so CRs won't appear in the buffer.
6141                 It would be cleaner to compensate within Emacs;
6142                 however, Emacs does not know how many CRs were deleted
6143                 before any given point in the file.  */
6144               chars_deleted = 1;
6145 #else
6146               chars_deleted = 2;
6147 #endif
6148             }
6149           else
6150             {
6151               chars_deleted = 1;
6152             }
6153           *p = '\0';
6154           break;
6155         }
6156       *p++ = c;
6157     }
6158   lbp->len = p - buffer;
6159
6160   if (need_filebuf              /* we need filebuf for multi-line regexps */
6161       && chars_deleted > 0)     /* not at EOF */
6162     {
6163       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6164         {
6165           /* Expand filebuf. */
6166           filebuf.size *= 2;
6167           xrnew (filebuf.buffer, filebuf.size, char);
6168         }
6169       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6170       filebuf.len += lbp->len;
6171       filebuf.buffer[filebuf.len++] = '\n';
6172       filebuf.buffer[filebuf.len] = '\0';
6173     }
6174
6175   return lbp->len + chars_deleted;
6176 }
6177
6178 /*
6179  * Like readline_internal, above, but in addition try to match the
6180  * input line against relevant regular expressions and manage #line
6181  * directives.
6182  */
6183 static void
6184 readline (lbp, stream)
6185      linebuffer *lbp;
6186      FILE *stream;
6187 {
6188   long result;
6189
6190   linecharno = charno;          /* update global char number of line start */
6191   result = readline_internal (lbp, stream); /* read line */
6192   lineno += 1;                  /* increment global line number */
6193   charno += result;             /* increment global char number */
6194
6195   /* Honour #line directives. */
6196   if (!no_line_directive)
6197     {
6198       static bool discard_until_line_directive;
6199
6200       /* Check whether this is a #line directive. */
6201       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6202         {
6203           unsigned int lno;
6204           int start = 0;
6205
6206           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6207               && start > 0)     /* double quote character found */
6208             {
6209               char *endp = lbp->buffer + start;
6210
6211               while ((endp = etags_strchr (endp, '"')) != NULL
6212                      && endp[-1] == '\\')
6213                 endp++;
6214               if (endp != NULL)
6215                 /* Ok, this is a real #line directive.  Let's deal with it. */
6216                 {
6217                   char *taggedabsname;  /* absolute name of original file */
6218                   char *taggedfname;    /* name of original file as given */
6219                   char *name;           /* temp var */
6220
6221                   discard_until_line_directive = FALSE; /* found it */
6222                   name = lbp->buffer + start;
6223                   *endp = '\0';
6224                   canonicalize_filename (name); /* for DOS */
6225                   taggedabsname = absolute_filename (name, tagfiledir);
6226                   if (filename_is_absolute (name)
6227                       || filename_is_absolute (curfdp->infname))
6228                     taggedfname = savestr (taggedabsname);
6229                   else
6230                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6231
6232                   if (streq (curfdp->taggedfname, taggedfname))
6233                     /* The #line directive is only a line number change.  We
6234                        deal with this afterwards. */
6235                     free (taggedfname);
6236                   else
6237                     /* The tags following this #line directive should be
6238                        attributed to taggedfname.  In order to do this, set
6239                        curfdp accordingly. */
6240                     {
6241                       fdesc *fdp; /* file description pointer */
6242
6243                       /* Go look for a file description already set up for the
6244                          file indicated in the #line directive.  If there is
6245                          one, use it from now until the next #line
6246                          directive. */
6247                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6248                         if (streq (fdp->infname, curfdp->infname)
6249                             && streq (fdp->taggedfname, taggedfname))
6250                           /* If we remove the second test above (after the &&)
6251                              then all entries pertaining to the same file are
6252                              coalesced in the tags file.  If we use it, then
6253                              entries pertaining to the same file but generated
6254                              from different files (via #line directives) will
6255                              go into separate sections in the tags file.  These
6256                              alternatives look equivalent.  The first one
6257                              destroys some apparently useless information. */
6258                           {
6259                             curfdp = fdp;
6260                             free (taggedfname);
6261                             break;
6262                           }
6263                       /* Else, if we already tagged the real file, skip all
6264                          input lines until the next #line directive. */
6265                       if (fdp == NULL) /* not found */
6266                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6267                           if (streq (fdp->infabsname, taggedabsname))
6268                             {
6269                               discard_until_line_directive = TRUE;
6270                               free (taggedfname);
6271                               break;
6272                             }
6273                       /* Else create a new file description and use that from
6274                          now on, until the next #line directive. */
6275                       if (fdp == NULL) /* not found */
6276                         {
6277                           fdp = fdhead;
6278                           fdhead = xnew (1, fdesc);
6279                           *fdhead = *curfdp; /* copy curr. file description */
6280                           fdhead->next = fdp;
6281                           fdhead->infname = savestr (curfdp->infname);
6282                           fdhead->infabsname = savestr (curfdp->infabsname);
6283                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6284                           fdhead->taggedfname = taggedfname;
6285                           fdhead->usecharno = FALSE;
6286                           fdhead->prop = NULL;
6287                           fdhead->written = FALSE;
6288                           curfdp = fdhead;
6289                         }
6290                     }
6291                   free (taggedabsname);
6292                   lineno = lno - 1;
6293                   readline (lbp, stream);
6294                   return;
6295                 } /* if a real #line directive */
6296             } /* if #line is followed by a a number */
6297         } /* if line begins with "#line " */
6298
6299       /* If we are here, no #line directive was found. */
6300       if (discard_until_line_directive)
6301         {
6302           if (result > 0)
6303             {
6304               /* Do a tail recursion on ourselves, thus discarding the contents
6305                  of the line buffer. */
6306               readline (lbp, stream);
6307               return;
6308             }
6309           /* End of file. */
6310           discard_until_line_directive = FALSE;
6311           return;
6312         }
6313     } /* if #line directives should be considered */
6314
6315   {
6316     int match;
6317     regexp *rp;
6318     char *name;
6319
6320     /* Match against relevant regexps. */
6321     if (lbp->len > 0)
6322       for (rp = p_head; rp != NULL; rp = rp->p_next)
6323         {
6324           /* Only use generic regexps or those for the current language.
6325              Also do not use multiline regexps, which is the job of
6326              regex_tag_multiline. */
6327           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6328               || rp->multi_line)
6329             continue;
6330
6331           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6332           switch (match)
6333             {
6334             case -2:
6335               /* Some error. */
6336               if (!rp->error_signaled)
6337                 {
6338                   error ("regexp stack overflow while matching \"%s\"",
6339                          rp->pattern);
6340                   rp->error_signaled = TRUE;
6341                 }
6342               break;
6343             case -1:
6344               /* No match. */
6345               break;
6346             case 0:
6347               /* Empty string matched. */
6348               if (!rp->error_signaled)
6349                 {
6350                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6351                   rp->error_signaled = TRUE;
6352                 }
6353               break;
6354             default:
6355               /* Match occurred.  Construct a tag. */
6356               name = rp->name;
6357               if (name[0] == '\0')
6358                 name = NULL;
6359               else /* make a named tag */
6360                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6361               if (rp->force_explicit_name)
6362                 /* Force explicit tag name, if a name is there. */
6363                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6364               else
6365                 make_tag (name, strlen (name), TRUE,
6366                           lbp->buffer, match, lineno, linecharno);
6367               break;
6368             }
6369         }
6370   }
6371 }
6372
6373 \f
6374 /*
6375  * Return a pointer to a space of size strlen(cp)+1 allocated
6376  * with xnew where the string CP has been copied.
6377  */
6378 static char *
6379 savestr (cp)
6380      char *cp;
6381 {
6382   return savenstr (cp, strlen (cp));
6383 }
6384
6385 /*
6386  * Return a pointer to a space of size LEN+1 allocated with xnew where
6387  * the string CP has been copied for at most the first LEN characters.
6388  */
6389 static char *
6390 savenstr (cp, len)
6391      char *cp;
6392      int len;
6393 {
6394   register char *dp;
6395
6396   dp = xnew (len + 1, char);
6397   strncpy (dp, cp, len);
6398   dp[len] = '\0';
6399   return dp;
6400 }
6401
6402 /*
6403  * Return the ptr in sp at which the character c last
6404  * appears; NULL if not found
6405  *
6406  * Identical to POSIX strrchr, included for portability.
6407  */
6408 static char *
6409 etags_strrchr (sp, c)
6410      register const char *sp;
6411      register int c;
6412 {
6413   register const char *r;
6414
6415   r = NULL;
6416   do
6417     {
6418       if (*sp == c)
6419         r = sp;
6420   } while (*sp++);
6421   return (char *)r;
6422 }
6423
6424 /*
6425  * Return the ptr in sp at which the character c first
6426  * appears; NULL if not found
6427  *
6428  * Identical to POSIX strchr, included for portability.
6429  */
6430 static char *
6431 etags_strchr (sp, c)
6432      register const char *sp;
6433      register int c;
6434 {
6435   do
6436     {
6437       if (*sp == c)
6438         return (char *)sp;
6439     } while (*sp++);
6440   return NULL;
6441 }
6442
6443 /*
6444  * Compare two strings, ignoring case for alphabetic characters.
6445  *
6446  * Same as BSD's strcasecmp, included for portability.
6447  */
6448 static int
6449 etags_strcasecmp (s1, s2)
6450      register const char *s1;
6451      register const char *s2;
6452 {
6453   while (*s1 != '\0'
6454          && (ISALPHA (*s1) && ISALPHA (*s2)
6455              ? lowcase (*s1) == lowcase (*s2)
6456              : *s1 == *s2))
6457     s1++, s2++;
6458
6459   return (ISALPHA (*s1) && ISALPHA (*s2)
6460           ? lowcase (*s1) - lowcase (*s2)
6461           : *s1 - *s2);
6462 }
6463
6464 /*
6465  * Compare two strings, ignoring case for alphabetic characters.
6466  * Stop after a given number of characters
6467  *
6468  * Same as BSD's strncasecmp, included for portability.
6469  */
6470 static int
6471 etags_strncasecmp (s1, s2, n)
6472      register const char *s1;
6473      register const char *s2;
6474      register int n;
6475 {
6476   while (*s1 != '\0' && n-- > 0
6477          && (ISALPHA (*s1) && ISALPHA (*s2)
6478              ? lowcase (*s1) == lowcase (*s2)
6479              : *s1 == *s2))
6480     s1++, s2++;
6481
6482   if (n < 0)
6483     return 0;
6484   else
6485     return (ISALPHA (*s1) && ISALPHA (*s2)
6486             ? lowcase (*s1) - lowcase (*s2)
6487             : *s1 - *s2);
6488 }
6489
6490 /* Skip spaces (end of string is not space), return new pointer. */
6491 static char *
6492 skip_spaces (cp)
6493      char *cp;
6494 {
6495   while (iswhite (*cp))
6496     cp++;
6497   return cp;
6498 }
6499
6500 /* Skip non spaces, except end of string, return new pointer. */
6501 static char *
6502 skip_non_spaces (cp)
6503      char *cp;
6504 {
6505   while (*cp != '\0' && !iswhite (*cp))
6506     cp++;
6507   return cp;
6508 }
6509
6510 /* Print error message and exit.  */
6511 void
6512 fatal (s1, s2)
6513      char *s1, *s2;
6514 {
6515   error (s1, s2);
6516   exit (EXIT_FAILURE);
6517 }
6518
6519 static void
6520 pfatal (s1)
6521      char *s1;
6522 {
6523   perror (s1);
6524   exit (EXIT_FAILURE);
6525 }
6526
6527 static void
6528 suggest_asking_for_help ()
6529 {
6530   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6531            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6532   exit (EXIT_FAILURE);
6533 }
6534
6535 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6536 static void
6537 error (s1, s2)
6538      const char *s1, *s2;
6539 {
6540   fprintf (stderr, "%s: ", progname);
6541   fprintf (stderr, s1, s2);
6542   fprintf (stderr, "\n");
6543 }
6544
6545 /* Return a newly-allocated string whose contents
6546    concatenate those of s1, s2, s3.  */
6547 static char *
6548 concat (s1, s2, s3)
6549      char *s1, *s2, *s3;
6550 {
6551   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6552   char *result = xnew (len1 + len2 + len3 + 1, char);
6553
6554   strcpy (result, s1);
6555   strcpy (result + len1, s2);
6556   strcpy (result + len1 + len2, s3);
6557   result[len1 + len2 + len3] = '\0';
6558
6559   return result;
6560 }
6561
6562 \f
6563 /* Does the same work as the system V getcwd, but does not need to
6564    guess the buffer size in advance. */
6565 static char *
6566 etags_getcwd ()
6567 {
6568 #ifdef HAVE_GETCWD
6569   int bufsize = 200;
6570   char *path = xnew (bufsize, char);
6571
6572   while (getcwd (path, bufsize) == NULL)
6573     {
6574       if (errno != ERANGE)
6575         pfatal ("getcwd");
6576       bufsize *= 2;
6577       free (path);
6578       path = xnew (bufsize, char);
6579     }
6580
6581   canonicalize_filename (path);
6582   return path;
6583
6584 #else /* not HAVE_GETCWD */
6585 #if MSDOS
6586
6587   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6588
6589   getwd (path);
6590
6591   for (p = path; *p != '\0'; p++)
6592     if (*p == '\\')
6593       *p = '/';
6594     else
6595       *p = lowcase (*p);
6596
6597   return strdup (path);
6598 #else /* not MSDOS */
6599   linebuffer path;
6600   FILE *pipe;
6601
6602   linebuffer_init (&path);
6603   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6604   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6605     pfatal ("pwd");
6606   pclose (pipe);
6607
6608   return path.buffer;
6609 #endif /* not MSDOS */
6610 #endif /* not HAVE_GETCWD */
6611 }
6612
6613 /* Return a newly allocated string containing the file name of FILE
6614    relative to the absolute directory DIR (which should end with a slash). */
6615 static char *
6616 relative_filename (file, dir)
6617      char *file, *dir;
6618 {
6619   char *fp, *dp, *afn, *res;
6620   int i;
6621
6622   /* Find the common root of file and dir (with a trailing slash). */
6623   afn = absolute_filename (file, cwd);
6624   fp = afn;
6625   dp = dir;
6626   while (*fp++ == *dp++)
6627     continue;
6628   fp--, dp--;                   /* back to the first differing char */
6629 #ifdef DOS_NT
6630   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6631     return afn;
6632 #endif
6633   do                            /* look at the equal chars until '/' */
6634     fp--, dp--;
6635   while (*fp != '/');
6636
6637   /* Build a sequence of "../" strings for the resulting relative file name. */
6638   i = 0;
6639   while (*dp == '/')
6640     ++dp;
6641   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6642     {
6643       i += 1;
6644       while (*dp == '/')
6645         ++dp;
6646     }
6647   res = xnew (3*i + strlen (fp + 1) + 1, char);
6648   res[0] = '\0';
6649   while (i-- > 0)
6650     strcat (res, "../");
6651
6652   /* Add the file name relative to the common root of file and dir. */
6653   strcat (res, fp + 1);
6654   free (afn);
6655
6656   return res;
6657 }
6658
6659 /* Return a newly allocated string containing the absolute file name
6660    of FILE given DIR (which should end with a slash). */
6661 static char *
6662 absolute_filename (file, dir)
6663      char *file, *dir;
6664 {
6665   char *slashp, *cp, *res;
6666
6667   if (filename_is_absolute (file))
6668     res = savestr (file);
6669 #ifdef DOS_NT
6670   /* We don't support non-absolute file names with a drive
6671      letter, like `d:NAME' (it's too much hassle).  */
6672   else if (file[1] == ':')
6673     fatal ("%s: relative file names with drive letters not supported", file);
6674 #endif
6675   else
6676     res = concat (dir, file, "");
6677
6678   /* Delete the "/dirname/.." and "/." substrings. */
6679   slashp = etags_strchr (res, '/');
6680   while (slashp != NULL && slashp[0] != '\0')
6681     {
6682       if (slashp[1] == '.')
6683         {
6684           if (slashp[2] == '.'
6685               && (slashp[3] == '/' || slashp[3] == '\0'))
6686             {
6687               cp = slashp;
6688               do
6689                 cp--;
6690               while (cp >= res && !filename_is_absolute (cp));
6691               if (cp < res)
6692                 cp = slashp;    /* the absolute name begins with "/.." */
6693 #ifdef DOS_NT
6694               /* Under MSDOS and NT we get `d:/NAME' as absolute
6695                  file name, so the luser could say `d:/../NAME'.
6696                  We silently treat this as `d:/NAME'.  */
6697               else if (cp[0] != '/')
6698                 cp = slashp;
6699 #endif
6700               strcpy (cp, slashp + 3);
6701               slashp = cp;
6702               continue;
6703             }
6704           else if (slashp[2] == '/' || slashp[2] == '\0')
6705             {
6706               strcpy (slashp, slashp + 2);
6707               continue;
6708             }
6709         }
6710
6711       slashp = etags_strchr (slashp + 1, '/');
6712     }
6713
6714   if (res[0] == '\0')           /* just a safety net: should never happen */
6715     {
6716       free (res);
6717       return savestr ("/");
6718     }
6719   else
6720     return res;
6721 }
6722
6723 /* Return a newly allocated string containing the absolute
6724    file name of dir where FILE resides given DIR (which should
6725    end with a slash). */
6726 static char *
6727 absolute_dirname (file, dir)
6728      char *file, *dir;
6729 {
6730   char *slashp, *res;
6731   char save;
6732
6733   canonicalize_filename (file);
6734   slashp = etags_strrchr (file, '/');
6735   if (slashp == NULL)
6736     return savestr (dir);
6737   save = slashp[1];
6738   slashp[1] = '\0';
6739   res = absolute_filename (file, dir);
6740   slashp[1] = save;
6741
6742   return res;
6743 }
6744
6745 /* Whether the argument string is an absolute file name.  The argument
6746    string must have been canonicalized with canonicalize_filename. */
6747 static bool
6748 filename_is_absolute (fn)
6749      char *fn;
6750 {
6751   return (fn[0] == '/'
6752 #ifdef DOS_NT
6753           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6754 #endif
6755           );
6756 }
6757
6758 /* Translate backslashes into slashes.  Works in place. */
6759 static void
6760 canonicalize_filename (fn)
6761      register char *fn;
6762 {
6763 #ifdef DOS_NT
6764   /* Canonicalize drive letter case.  */
6765   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6766     fn[0] = upcase (fn[0]);
6767   /* Convert backslashes to slashes.  */
6768   for (; *fn != '\0'; fn++)
6769     if (*fn == '\\')
6770       *fn = '/';
6771 #else
6772   /* No action. */
6773   fn = NULL;                    /* shut up the compiler */
6774 #endif
6775 }
6776
6777 \f
6778 /* Initialize a linebuffer for use */
6779 static void
6780 linebuffer_init (lbp)
6781      linebuffer *lbp;
6782 {
6783   lbp->size = (DEBUG) ? 3 : 200;
6784   lbp->buffer = xnew (lbp->size, char);
6785   lbp->buffer[0] = '\0';
6786   lbp->len = 0;
6787 }
6788
6789 /* Set the minimum size of a string contained in a linebuffer. */
6790 static void
6791 linebuffer_setlen (lbp, toksize)
6792      linebuffer *lbp;
6793      int toksize;
6794 {
6795   while (lbp->size <= toksize)
6796     {
6797       lbp->size *= 2;
6798       xrnew (lbp->buffer, lbp->size, char);
6799     }
6800   lbp->len = toksize;
6801 }
6802
6803 /* Like malloc but get fatal error if memory is exhausted. */
6804 static PTR
6805 xmalloc (size)
6806      unsigned int size;
6807 {
6808   PTR result = (PTR) malloc (size);
6809   if (result == NULL)
6810     fatal ("virtual memory exhausted", (char *)NULL);
6811   return result;
6812 }
6813
6814 static PTR
6815 xrealloc (ptr, size)
6816      char *ptr;
6817      unsigned int size;
6818 {
6819   PTR result = (PTR) realloc (ptr, size);
6820   if (result == NULL)
6821     fatal ("virtual memory exhausted", (char *)NULL);
6822   return result;
6823 }
6824
6825 /*
6826  * Local Variables:
6827  * indent-tabs-mode: t
6828  * tab-width: 8
6829  * fill-column: 79
6830  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6831  * c-file-style: "gnu"
6832  * End:
6833  */
6834
6835 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6836    (do not change this comment) */
6837
6838 /* etags.c ends here */