lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995, 1998, 1999,
  32   2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
  33   Free Software Foundation, Inc.
  34
  35 This file is not considered part of GNU Emacs.
  36
  37 This program is free software: you can redistribute it and/or modify
  38 it under the terms of the GNU General Public License as published by
  39 the Free Software Foundation, either version 3 of the License, or
  40 (at your option) any later version.
  41
  42 This program is distributed in the hope that it will be useful,
  43 but WITHOUT ANY WARRANTY; without even the implied warranty of
  44 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  45 GNU General Public License for more details.
  46
  47 You should have received a copy of the GNU General Public License
  48 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  49
  50
  51 /* NB To comply with the above BSD license, copyright information is
  52 reproduced in etc/ETAGS.README.  That file should be updated when the
  53 above notices are.
  54
  55 To the best of our knowledge, this code was originally based on the
  56 ctags.c distributed with BSD4.2, which was copyrighted by the
  57 University of California, as described above. */
  58
  59
  60 /*
  61  * Authors:
  62  * 1983 Ctags originally by Ken Arnold.
  63  * 1984 Fortran added by Jim Kleckner.
  64  * 1984 Ed Pelegri-Llopart added C typedefs.
  65  * 1985 Emacs TAGS format by Richard Stallman.
  66  * 1989 Sam Kendall added C++.
  67  * 1992 Joseph B. Wells improved C and C++ parsing.
  68  * 1993 Francesco Potortì reorganized C and C++.
  69  * 1994 Line-by-line regexp tags by Tom Tromey.
  70  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  71  * 2002 #line directives by Francesco Potortì.
  72  *
  73  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  74  */
  75
  76 /*
  77  * If you want to add support for a new language, start by looking at the LUA
  78  * language, which is the simplest.  Alternatively, consider shipping a
  79  * configuration file containing regexp definitions for etags.
  80  */
  81
  82 char pot_etags_version[] = "@(#) pot revision number is 17.38";
  83
  84 #define TRUE    1
  85 #define FALSE   0
  86
  87 #ifdef DEBUG
  88 #  undef DEBUG
  89 #  define DEBUG TRUE
  90 #else
  91 #  define DEBUG  FALSE
  92 #  define NDEBUG                /* disable assert */
  93 #endif
  94
  95 #ifdef HAVE_CONFIG_H
  96 # include <config.h>
  97   /* On some systems, Emacs defines static as nothing for the sake
  98      of unexec.  We don't want that here since we don't use unexec. */
  99 # undef static
 100 # ifndef PTR                    /* for XEmacs */
 101 #   define PTR void *
 102 # endif
 103 # ifndef __P                    /* for XEmacs */
 104 #   define __P(args) args
 105 # endif
 106 #else  /* no config.h */
 107 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
 108 #   define __P(args) args       /* use prototypes */
 109 #   define PTR void *           /* for generic pointers */
 110 # else /* not standard C */
 111 #   define __P(args) ()         /* no prototypes */
 112 #   define const                /* remove const for old compilers' sake */
 113 #   define PTR long *           /* don't use void* */
 114 # endif
 115 #endif /* !HAVE_CONFIG_H */
 116
 117 #ifndef _GNU_SOURCE
 118 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
 119 #endif
 120
 121 /* WIN32_NATIVE is for XEmacs.
 122    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 123 #ifdef WIN32_NATIVE
 124 # undef MSDOS
 125 # undef  WINDOWSNT
 126 # define WINDOWSNT
 127 #endif /* WIN32_NATIVE */
 128
 129 #ifdef MSDOS
 130 # undef MSDOS
 131 # define MSDOS TRUE
 132 # include <fcntl.h>
 133 # include <sys/param.h>
 134 # include <io.h>
 135 # ifndef HAVE_CONFIG_H
 136 #   define DOS_NT
 137 #   include <sys/config.h>
 138 # endif
 139 #else
 140 # define MSDOS FALSE
 141 #endif /* MSDOS */
 142
 143 #ifdef WINDOWSNT
 144 # include <stdlib.h>
 145 # include <fcntl.h>
 146 # include <string.h>
 147 # include <direct.h>
 148 # include <io.h>
 149 # define MAXPATHLEN _MAX_PATH
 150 # undef HAVE_NTGUI
 151 # undef  DOS_NT
 152 # define DOS_NT
 153 # ifndef HAVE_GETCWD
 154 #   define HAVE_GETCWD
 155 # endif /* undef HAVE_GETCWD */
 156 #else /* not WINDOWSNT */
 157 # ifdef STDC_HEADERS
 158 #  include <stdlib.h>
 159 #  include <string.h>
 160 # else /* no standard C headers */
 161    extern char *getenv __P((const char *));
 162    extern char *strcpy __P((char *, const char *));
 163    extern char *strncpy __P((char *, const char *, unsigned long));
 164    extern char *strcat __P((char *, const char *));
 165    extern char *strncat __P((char *, const char *, unsigned long));
 166    extern int strcmp __P((const char *, const char *));
 167    extern int strncmp __P((const char *, const char *, unsigned long));
 168    extern int system __P((const char *));
 169    extern unsigned long strlen __P((const char *));
 170    extern void *malloc __P((unsigned long));
 171    extern void *realloc __P((void *, unsigned long));
 172    extern void exit __P((int));
 173    extern void free __P((void *));
 174    extern void *memmove __P((void *, const void *, unsigned long));
 175 #  ifdef VMS
 176 #   define EXIT_SUCCESS 1
 177 #   define EXIT_FAILURE 0
 178 #  else /* no VMS */
 179 #   define EXIT_SUCCESS 0
 180 #   define EXIT_FAILURE 1
 181 #  endif
 182 # endif
 183 #endif /* !WINDOWSNT */
 184
 185 #ifdef HAVE_UNISTD_H
 186 # include <unistd.h>
 187 #else
 188 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 189     extern char *getcwd (char *buf, size_t size);
 190 # endif
 191 #endif /* HAVE_UNISTD_H */
 192
 193 #include <stdio.h>
 194 #include <ctype.h>
 195 #include <errno.h>
 196 #ifndef errno
 197   extern int errno;
 198 #endif
 199 #include <sys/types.h>
 200 #include <sys/stat.h>
 201
 202 #include <assert.h>
 203 #ifdef NDEBUG
 204 # undef  assert                 /* some systems have a buggy assert.h */
 205 # define assert(x) ((void) 0)
 206 #endif
 207
 208 #if !defined (S_ISREG) && defined (S_IFREG)
 209 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 210 #endif
 211
 212 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 213 # define NO_LONG_OPTIONS TRUE
 214 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 215   extern char *optarg;
 216   extern int optind, opterr;
 217 #else
 218 # define NO_LONG_OPTIONS FALSE
 219 # include <getopt.h>
 220 #endif /* NO_LONG_OPTIONS */
 221
 222 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 223 # ifdef __CYGWIN__              /* compiling on Cygwin */
 224                              !!! NOTICE !!!
 225  the regex.h distributed with Cygwin is not compatible with etags, alas!
 226 If you want regular expression support, you should delete this notice and
 227               arrange to use the GNU regex.h and regex.c.
 228 # endif
 229 #endif
 230 #include <regex.h>
 231
 232 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 233  Leave it undefined to make the program "etags", which makes emacs-style
 234  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 235 #ifdef CTAGS
 236 # undef  CTAGS
 237 # define CTAGS TRUE
 238 #else
 239 # define CTAGS FALSE
 240 #endif
 241
 242 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 243 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 244 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 245 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 246
 247 #define CHARS 256               /* 2^sizeof(char) */
 248 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 249 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 250 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 251 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 252 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 253 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 254
 255 #define ISALNUM(c)      isalnum (CHAR(c))
 256 #define ISALPHA(c)      isalpha (CHAR(c))
 257 #define ISDIGIT(c)      isdigit (CHAR(c))
 258 #define ISLOWER(c)      islower (CHAR(c))
 259
 260 #define lowcase(c)      tolower (CHAR(c))
 261 #define upcase(c)       toupper (CHAR(c))
 262
 263
 264 /*
 265  *      xnew, xrnew -- allocate, reallocate storage
 266  *
 267  * SYNOPSIS:    Type *xnew (int n, Type);
 268  *              void xrnew (OldPointer, int n, Type);
 269  */
 270 #if DEBUG
 271 # include "chkmalloc.h"
 272 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 273                                                   (n) * sizeof (Type)))
 274 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 275                                         (char *) (op), (n) * sizeof (Type)))
 276 #else
 277 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 278 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 279                                         (char *) (op), (n) * sizeof (Type)))
 280 #endif
 281
 282 #define bool int
 283
 284 typedef void Lang_function __P((FILE *));
 285
 286 typedef struct
 287 {
 288   char *suffix;                 /* file name suffix for this compressor */
 289   char *command;                /* takes one arg and decompresses to stdout */
 290 } compressor;
 291
 292 typedef struct
 293 {
 294   char *name;                   /* language name */
 295   char *help;                   /* detailed help for the language */
 296   Lang_function *function;      /* parse function */
 297   char **suffixes;              /* name suffixes of this language's files */
 298   char **filenames;             /* names of this language's files */
 299   char **interpreters;          /* interpreters for this language */
 300   bool metasource;              /* source used to generate other sources */
 301 } language;
 302
 303 typedef struct fdesc
 304 {
 305   struct fdesc *next;           /* for the linked list */
 306   char *infname;                /* uncompressed input file name */
 307   char *infabsname;             /* absolute uncompressed input file name */
 308   char *infabsdir;              /* absolute dir of input file */
 309   char *taggedfname;            /* file name to write in tagfile */
 310   language *lang;               /* language of file */
 311   char *prop;                   /* file properties to write in tagfile */
 312   bool usecharno;               /* etags tags shall contain char number */
 313   bool written;                 /* entry written in the tags file */
 314 } fdesc;
 315
 316 typedef struct node_st
 317 {                               /* sorting structure */
 318   struct node_st *left, *right; /* left and right sons */
 319   fdesc *fdp;                   /* description of file to whom tag belongs */
 320   char *name;                   /* tag name */
 321   char *regex;                  /* search regexp */
 322   bool valid;                   /* write this tag on the tag file */
 323   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 324   bool been_warned;             /* warning already given for duplicated tag */
 325   int lno;                      /* line number tag is on */
 326   long cno;                     /* character number line starts on */
 327 } node;
 328
 329 /*
 330  * A `linebuffer' is a structure which holds a line of text.
 331  * `readline_internal' reads a line from a stream into a linebuffer
 332  * and works regardless of the length of the line.
 333  * SIZE is the size of BUFFER, LEN is the length of the string in
 334  * BUFFER after readline reads it.
 335  */
 336 typedef struct
 337 {
 338   long size;
 339   int len;
 340   char *buffer;
 341 } linebuffer;
 342
 343 /* Used to support mixing of --lang and file names. */
 344 typedef struct
 345 {
 346   enum {
 347     at_language,                /* a language specification */
 348     at_regexp,                  /* a regular expression */
 349     at_filename,                /* a file name */
 350     at_stdin,                   /* read from stdin here */
 351     at_end                      /* stop parsing the list */
 352   } arg_type;                   /* argument type */
 353   language *lang;               /* language associated with the argument */
 354   char *what;                   /* the argument itself */
 355 } argument;
 356
 357 /* Structure defining a regular expression. */
 358 typedef struct regexp
 359 {
 360   struct regexp *p_next;        /* pointer to next in list */
 361   language *lang;               /* if set, use only for this language */
 362   char *pattern;                /* the regexp pattern */
 363   char *name;                   /* tag name */
 364   struct re_pattern_buffer *pat; /* the compiled pattern */
 365   struct re_registers regs;     /* re registers */
 366   bool error_signaled;          /* already signaled for this regexp */
 367   bool force_explicit_name;     /* do not allow implict tag name */
 368   bool ignore_case;             /* ignore case when matching */
 369   bool multi_line;              /* do a multi-line match on the whole file */
 370 } regexp;
 371
 372
 373 /* Many compilers barf on this:
 374         Lang_function Ada_funcs;
 375    so let's write it this way */
 376 static void Ada_funcs __P((FILE *));
 377 static void Asm_labels __P((FILE *));
 378 static void C_entries __P((int c_ext, FILE *));
 379 static void default_C_entries __P((FILE *));
 380 static void plain_C_entries __P((FILE *));
 381 static void Cjava_entries __P((FILE *));
 382 static void Cobol_paragraphs __P((FILE *));
 383 static void Cplusplus_entries __P((FILE *));
 384 static void Cstar_entries __P((FILE *));
 385 static void Erlang_functions __P((FILE *));
 386 static void Forth_words __P((FILE *));
 387 static void Fortran_functions __P((FILE *));
 388 static void HTML_labels __P((FILE *));
 389 static void Lisp_functions __P((FILE *));
 390 static void Lua_functions __P((FILE *));
 391 static void Makefile_targets __P((FILE *));
 392 static void Pascal_functions __P((FILE *));
 393 static void Perl_functions __P((FILE *));
 394 static void PHP_functions __P((FILE *));
 395 static void PS_functions __P((FILE *));
 396 static void Prolog_functions __P((FILE *));
 397 static void Python_functions __P((FILE *));
 398 static void Scheme_functions __P((FILE *));
 399 static void TeX_commands __P((FILE *));
 400 static void Texinfo_nodes __P((FILE *));
 401 static void Yacc_entries __P((FILE *));
 402 static void just_read_file __P((FILE *));
 403
 404 static void print_language_names __P((void));
 405 static void print_version __P((void));
 406 static void print_help __P((argument *));
 407 int main __P((int, char **));
 408
 409 static compressor *get_compressor_from_suffix __P((char *, char **));
 410 static language *get_language_from_langname __P((const char *));
 411 static language *get_language_from_interpreter __P((char *));
 412 static language *get_language_from_filename __P((char *, bool));
 413 static void readline __P((linebuffer *, FILE *));
 414 static long readline_internal __P((linebuffer *, FILE *));
 415 static bool nocase_tail __P((char *));
 416 static void get_tag __P((char *, char **));
 417
 418 static void analyse_regex __P((char *));
 419 static void free_regexps __P((void));
 420 static void regex_tag_multiline __P((void));
 421 static void error __P((const char *, const char *));
 422 static void suggest_asking_for_help __P((void));
 423 void fatal __P((char *, char *));
 424 static void pfatal __P((char *));
 425 static void add_node __P((node *, node **));
 426
 427 static void init __P((void));
 428 static void process_file_name __P((char *, language *));
 429 static void process_file __P((FILE *, char *, language *));
 430 static void find_entries __P((FILE *));
 431 static void free_tree __P((node *));
 432 static void free_fdesc __P((fdesc *));
 433 static void pfnote __P((char *, bool, char *, int, int, long));
 434 static void make_tag __P((char *, int, bool, char *, int, int, long));
 435 static void invalidate_nodes __P((fdesc *, node **));
 436 static void put_entries __P((node *));
 437
 438 static char *concat __P((char *, char *, char *));
 439 static char *skip_spaces __P((char *));
 440 static char *skip_non_spaces __P((char *));
 441 static char *savenstr __P((char *, int));
 442 static char *savestr __P((char *));
 443 static char *etags_strchr __P((const char *, int));
 444 static char *etags_strrchr __P((const char *, int));
 445 static int etags_strcasecmp __P((const char *, const char *));
 446 static int etags_strncasecmp __P((const char *, const char *, int));
 447 static char *etags_getcwd __P((void));
 448 static char *relative_filename __P((char *, char *));
 449 static char *absolute_filename __P((char *, char *));
 450 static char *absolute_dirname __P((char *, char *));
 451 static bool filename_is_absolute __P((char *f));
 452 static void canonicalize_filename __P((char *));
 453 static void linebuffer_init __P((linebuffer *));
 454 static void linebuffer_setlen __P((linebuffer *, int));
 455 static PTR xmalloc __P((unsigned int));
 456 static PTR xrealloc __P((char *, unsigned int));
 457
 458 \f
 459 static char searchar = '/';     /* use /.../ searches */
 460
 461 static char *tagfile;           /* output file */
 462 static char *progname;          /* name this program was invoked with */
 463 static char *cwd;               /* current working directory */
 464 static char *tagfiledir;        /* directory of tagfile */
 465 static FILE *tagf;              /* ioptr for tags file */
 466
 467 static fdesc *fdhead;           /* head of file description list */
 468 static fdesc *curfdp;           /* current file description */
 469 static int lineno;              /* line number of current line */
 470 static long charno;             /* current character number */
 471 static long linecharno;         /* charno of start of current line */
 472 static char *dbp;               /* pointer to start of current tag */
 473
 474 static const int invalidcharno = -1;
 475
 476 static node *nodehead;          /* the head of the binary tree of tags */
 477 static node *last_node;         /* the last node created */
 478
 479 static linebuffer lb;           /* the current line */
 480 static linebuffer filebuf;      /* a buffer containing the whole file */
 481 static linebuffer token_name;   /* a buffer containing a tag name */
 482
 483 /* boolean "functions" (see init)       */
 484 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 485 static char
 486   /* white chars */
 487   *white = " \f\t\n\r\v",
 488   /* not in a name */
 489   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 490   /* token ending chars */
 491   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 492   /* token starting chars */
 493   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 494   /* valid in-token chars */
 495   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 496
 497 static bool append_to_tagfile;  /* -a: append to tags */
 498 /* The next five default to TRUE in C and derived languages.  */
 499 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 500 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 501                                 /* 0 struct/enum/union decls, and C++ */
 502                                 /* member functions. */
 503 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 504                                 /* constants and variables. */
 505                                 /* -D: opposite of -d.  Default under ctags. */
 506 static bool globals;            /* create tags for global variables */
 507 static bool members;            /* create tags for C member variables */
 508 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 509 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 510 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 511 static bool update;             /* -u: update tags */
 512 static bool vgrind_style;       /* -v: create vgrind style index output */
 513 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 514 static bool cxref_style;        /* -x: create cxref style output */
 515 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 516 static bool ignoreindent;       /* -I: ignore indentation in C */
 517 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 518
 519 /* STDIN is defined in LynxOS system headers */
 520 #ifdef STDIN
 521 # undef STDIN
 522 #endif
 523
 524 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 525 static bool parsing_stdin;      /* --parse-stdin used */
 526
 527 static regexp *p_head;          /* list of all regexps */
 528 static bool need_filebuf;       /* some regexes are multi-line */
 529
 530 static struct option longopts[] =
 531 {
 532   { "append",             no_argument,       NULL,               'a'   },
 533   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 534   { "c++",                no_argument,       NULL,               'C'   },
 535   { "declarations",       no_argument,       &declarations,      TRUE  },
 536   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 537   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 538   { "help",               no_argument,       NULL,               'h'   },
 539   { "help",               no_argument,       NULL,               'H'   },
 540   { "ignore-indentation", no_argument,       NULL,               'I'   },
 541   { "language",           required_argument, NULL,               'l'   },
 542   { "members",            no_argument,       &members,           TRUE  },
 543   { "no-members",         no_argument,       &members,           FALSE },
 544   { "output",             required_argument, NULL,               'o'   },
 545   { "regex",              required_argument, NULL,               'r'   },
 546   { "no-regex",           no_argument,       NULL,               'R'   },
 547   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 548   { "parse-stdin",        required_argument, NULL,               STDIN },
 549   { "version",            no_argument,       NULL,               'V'   },
 550
 551 #if CTAGS /* Ctags options */
 552   { "backward-search",    no_argument,       NULL,               'B'   },
 553   { "cxref",              no_argument,       NULL,               'x'   },
 554   { "defines",            no_argument,       NULL,               'd'   },
 555   { "globals",            no_argument,       &globals,           TRUE  },
 556   { "typedefs",           no_argument,       NULL,               't'   },
 557   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 558   { "update",             no_argument,       NULL,               'u'   },
 559   { "vgrind",             no_argument,       NULL,               'v'   },
 560   { "no-warn",            no_argument,       NULL,               'w'   },
 561
 562 #else /* Etags options */
 563   { "no-defines",         no_argument,       NULL,               'D'   },
 564   { "no-globals",         no_argument,       &globals,           FALSE },
 565   { "include",            required_argument, NULL,               'i'   },
 566 #endif
 567   { NULL }
 568 };
 569
 570 static compressor compressors[] =
 571 {
 572   { "z", "gzip -d -c"},
 573   { "Z", "gzip -d -c"},
 574   { "gz", "gzip -d -c"},
 575   { "GZ", "gzip -d -c"},
 576   { "bz2", "bzip2 -d -c" },
 577   { NULL }
 578 };
 579
 580 /*
 581  * Language stuff.
 582  */
 583
 584 /* Ada code */
 585 static char *Ada_suffixes [] =
 586   { "ads", "adb", "ada", NULL };
 587 static char Ada_help [] =
 588 "In Ada code, functions, procedures, packages, tasks and types are\n\
 589 tags.  Use the `--packages-only' option to create tags for\n\
 590 packages only.\n\
 591 Ada tag names have suffixes indicating the type of entity:\n\
 592         Entity type:    Qualifier:\n\
 593         ------------    ----------\n\
 594         function        /f\n\
 595         procedure       /p\n\
 596         package spec    /s\n\
 597         package body    /b\n\
 598         type            /t\n\
 599         task            /k\n\
 600 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 601 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 602 will just search for any tag `bidule'.";
 603
 604 /* Assembly code */
 605 static char *Asm_suffixes [] =
 606   { "a",        /* Unix assembler */
 607     "asm", /* Microcontroller assembly */
 608     "def", /* BSO/Tasking definition includes  */
 609     "inc", /* Microcontroller include files */
 610     "ins", /* Microcontroller include files */
 611     "s", "sa", /* Unix assembler */
 612     "S",   /* cpp-processed Unix assembler */
 613     "src", /* BSO/Tasking C compiler output */
 614     NULL
 615   };
 616 static char Asm_help [] =
 617 "In assembler code, labels appearing at the beginning of a line,\n\
 618 followed by a colon, are tags.";
 619
 620
 621 /* Note that .c and .h can be considered C++, if the --c++ flag was
 622    given, or if the `class' or `template' keywords are met inside the file.
 623    That is why default_C_entries is called for these. */
 624 static char *default_C_suffixes [] =
 625   { "c", "h", NULL };
 626 #if CTAGS                               /* C help for Ctags */
 627 static char default_C_help [] =
 628 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 629 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 630 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 631 Use --globals to tag global variables.\n\
 632 You can tag function declarations and external variables by\n\
 633 using `--declarations', and struct members by using `--members'.";
 634 #else                                   /* C help for Etags */
 635 static char default_C_help [] =
 636 "In C code, any C function or typedef is a tag, and so are\n\
 637 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 638 definitions and `enum' constants are tags unless you specify\n\
 639 `--no-defines'.  Global variables are tags unless you specify\n\
 640 `--no-globals' and so are struct members unless you specify\n\
 641 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 642 `--no-members' can make the tags table file much smaller.\n\
 643 You can tag function declarations and external variables by\n\
 644 using `--declarations'.";
 645 #endif  /* C help for Ctags and Etags */
 646
 647 static char *Cplusplus_suffixes [] =
 648   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 649     "M",                        /* Objective C++ */
 650     "pdb",                      /* Postscript with C syntax */
 651     NULL };
 652 static char Cplusplus_help [] =
 653 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 654 --help --lang=c --lang=c++ for full help.)\n\
 655 In addition to C tags, member functions are also recognized.  Member\n\
 656 variables are recognized unless you use the `--no-members' option.\n\
 657 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 658 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 659 `operator+'.";
 660
 661 static char *Cjava_suffixes [] =
 662   { "java", NULL };
 663 static char Cjava_help [] =
 664 "In Java code, all the tags constructs of C and C++ code are\n\
 665 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 666
 667
 668 static char *Cobol_suffixes [] =
 669   { "COB", "cob", NULL };
 670 static char Cobol_help [] =
 671 "In Cobol code, tags are paragraph names; that is, any word\n\
 672 starting in column 8 and followed by a period.";
 673
 674 static char *Cstar_suffixes [] =
 675   { "cs", "hs", NULL };
 676
 677 static char *Erlang_suffixes [] =
 678   { "erl", "hrl", NULL };
 679 static char Erlang_help [] =
 680 "In Erlang code, the tags are the functions, records and macros\n\
 681 defined in the file.";
 682
 683 char *Forth_suffixes [] =
 684   { "fth", "tok", NULL };
 685 static char Forth_help [] =
 686 "In Forth code, tags are words defined by `:',\n\
 687 constant, code, create, defer, value, variable, buffer:, field.";
 688
 689 static char *Fortran_suffixes [] =
 690   { "F", "f", "f90", "for", NULL };
 691 static char Fortran_help [] =
 692 "In Fortran code, functions, subroutines and block data are tags.";
 693
 694 static char *HTML_suffixes [] =
 695   { "htm", "html", "shtml", NULL };
 696 static char HTML_help [] =
 697 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 698 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 699 occurrences of `id='.";
 700
 701 static char *Lisp_suffixes [] =
 702   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 703 static char Lisp_help [] =
 704 "In Lisp code, any function defined with `defun', any variable\n\
 705 defined with `defvar' or `defconst', and in general the first\n\
 706 argument of any expression that starts with `(def' in column zero\n\
 707 is a tag.";
 708
 709 static char *Lua_suffixes [] =
 710   { "lua", "LUA", NULL };
 711 static char Lua_help [] =
 712 "In Lua scripts, all functions are tags.";
 713
 714 static char *Makefile_filenames [] =
 715   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 716 static char Makefile_help [] =
 717 "In makefiles, targets are tags; additionally, variables are tags\n\
 718 unless you specify `--no-globals'.";
 719
 720 static char *Objc_suffixes [] =
 721   { "lm",                       /* Objective lex file */
 722     "m",                        /* Objective C file */
 723      NULL };
 724 static char Objc_help [] =
 725 "In Objective C code, tags include Objective C definitions for classes,\n\
 726 class categories, methods and protocols.  Tags for variables and\n\
 727 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 728 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 729
 730 static char *Pascal_suffixes [] =
 731   { "p", "pas", NULL };
 732 static char Pascal_help [] =
 733 "In Pascal code, the tags are the functions and procedures defined\n\
 734 in the file.";
 735 /* " // this is for working around an Emacs highlighting bug... */
 736
 737 static char *Perl_suffixes [] =
 738   { "pl", "pm", NULL };
 739 static char *Perl_interpreters [] =
 740   { "perl", "@PERL@", NULL };
 741 static char Perl_help [] =
 742 "In Perl code, the tags are the packages, subroutines and variables\n\
 743 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 744 `--globals' if you want to tag global variables.  Tags for\n\
 745 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 746 defined in the default package is `main::SUB'.";
 747
 748 static char *PHP_suffixes [] =
 749   { "php", "php3", "php4", NULL };
 750 static char PHP_help [] =
 751 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 752 the `--no-members' option, vars are tags too.";
 753
 754 static char *plain_C_suffixes [] =
 755   { "pc",                       /* Pro*C file */
 756      NULL };
 757
 758 static char *PS_suffixes [] =
 759   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 760 static char PS_help [] =
 761 "In PostScript code, the tags are the functions.";
 762
 763 static char *Prolog_suffixes [] =
 764   { "prolog", NULL };
 765 static char Prolog_help [] =
 766 "In Prolog code, tags are predicates and rules at the beginning of\n\
 767 line.";
 768
 769 static char *Python_suffixes [] =
 770   { "py", NULL };
 771 static char Python_help [] =
 772 "In Python code, `def' or `class' at the beginning of a line\n\
 773 generate a tag.";
 774
 775 /* Can't do the `SCM' or `scm' prefix with a version number. */
 776 static char *Scheme_suffixes [] =
 777   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 778 static char Scheme_help [] =
 779 "In Scheme code, tags include anything defined with `def' or with a\n\
 780 construct whose name starts with `def'.  They also include\n\
 781 variables set with `set!' at top level in the file.";
 782
 783 static char *TeX_suffixes [] =
 784   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 785 static char TeX_help [] =
 786 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 787 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 788 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 789 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 790 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 791 \n\
 792 Other commands can be specified by setting the environment variable\n\
 793 `TEXTAGS' to a colon-separated list like, for example,\n\
 794      TEXTAGS=\"mycommand:myothercommand\".";
 795
 796
 797 static char *Texinfo_suffixes [] =
 798   { "texi", "texinfo", "txi", NULL };
 799 static char Texinfo_help [] =
 800 "for texinfo files, lines starting with @node are tagged.";
 801
 802 static char *Yacc_suffixes [] =
 803   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 804 static char Yacc_help [] =
 805 "In Bison or Yacc input files, each rule defines as a tag the\n\
 806 nonterminal it constructs.  The portions of the file that contain\n\
 807 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 808 for full help).";
 809
 810 static char auto_help [] =
 811 "`auto' is not a real language, it indicates to use\n\
 812 a default language for files base on file name suffix and file contents.";
 813
 814 static char none_help [] =
 815 "`none' is not a real language, it indicates to only do\n\
 816 regexp processing on files.";
 817
 818 static char no_lang_help [] =
 819 "No detailed help available for this language.";
 820
 821
 822 /*
 823  * Table of languages.
 824  *
 825  * It is ok for a given function to be listed under more than one
 826  * name.  I just didn't.
 827  */
 828
 829 static language lang_names [] =
 830 {
 831   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 832   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 833   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 834   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 835   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 836   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 837   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 838   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 839   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 840   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 841   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 842   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 843   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 844   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 845   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 846   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 847   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 848   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 849   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 850   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 851   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 852   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 853   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 854   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 855   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 856   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 857   { "auto",      auto_help },                      /* default guessing scheme */
 858   { "none",      none_help,      just_read_file }, /* regexp matching only */
 859   { NULL }                /* end of list */
 860 };
 861
 862 \f
 863 static void
 864 print_language_names ()
 865 {
 866   language *lang;
 867   char **name, **ext;
 868
 869   puts ("\nThese are the currently supported languages, along with the\n\
 870 default file names and dot suffixes:");
 871   for (lang = lang_names; lang->name != NULL; lang++)
 872     {
 873       printf ("  %-*s", 10, lang->name);
 874       if (lang->filenames != NULL)
 875         for (name = lang->filenames; *name != NULL; name++)
 876           printf (" %s", *name);
 877       if (lang->suffixes != NULL)
 878         for (ext = lang->suffixes; *ext != NULL; ext++)
 879           printf (" .%s", *ext);
 880       puts ("");
 881     }
 882   puts ("where `auto' means use default language for files based on file\n\
 883 name suffix, and `none' means only do regexp processing on files.\n\
 884 If no language is specified and no matching suffix is found,\n\
 885 the first line of the file is read for a sharp-bang (#!) sequence\n\
 886 followed by the name of an interpreter.  If no such sequence is found,\n\
 887 Fortran is tried first; if no tags are found, C is tried next.\n\
 888 When parsing any C file, a \"class\" or \"template\" keyword\n\
 889 switches to C++.");
 890   puts ("Compressed files are supported using gzip and bzip2.\n\
 891 \n\
 892 For detailed help on a given language use, for example,\n\
 893 etags --help --lang=ada.");
 894 }
 895
 896 #ifndef EMACS_NAME
 897 # define EMACS_NAME "standalone"
 898 #endif
 899 #ifndef VERSION
 900 # define VERSION "17.38"
 901 #endif
 902 static void
 903 print_version ()
 904 {
 905   /* Makes it easier to update automatically. */
 906   char emacs_copyright[] = "Copyright (C) 2008 Free Software Foundation, Inc.";
 907
 908   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 909   puts (emacs_copyright);
 910   puts ("This program is distributed under the terms in ETAGS.README");
 911
 912   exit (EXIT_SUCCESS);
 913 }
 914
 915 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 916 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 917 #endif
 918
 919 static void
 920 print_help (argbuffer)
 921      argument *argbuffer;
 922 {
 923   bool help_for_lang = FALSE;
 924
 925   for (; argbuffer->arg_type != at_end; argbuffer++)
 926     if (argbuffer->arg_type == at_language)
 927       {
 928         if (help_for_lang)
 929           puts ("");
 930         puts (argbuffer->lang->help);
 931         help_for_lang = TRUE;
 932       }
 933
 934   if (help_for_lang)
 935     exit (EXIT_SUCCESS);
 936
 937   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 938 \n\
 939 These are the options accepted by %s.\n", progname, progname);
 940   if (NO_LONG_OPTIONS)
 941     puts ("WARNING: long option names do not work with this executable,\n\
 942 as it is not linked with GNU getopt.");
 943   else
 944     puts ("You may use unambiguous abbreviations for the long option names.");
 945   puts ("  A - as file name means read names from stdin (one per line).\n\
 946 Absolute names are stored in the output file as they are.\n\
 947 Relative ones are stored relative to the output file's directory.\n");
 948
 949   puts ("-a, --append\n\
 950         Append tag entries to existing tags file.");
 951
 952   puts ("--packages-only\n\
 953         For Ada files, only generate tags for packages.");
 954
 955   if (CTAGS)
 956     puts ("-B, --backward-search\n\
 957         Write the search commands for the tag entries using '?', the\n\
 958         backward-search command instead of '/', the forward-search command.");
 959
 960   /* This option is mostly obsolete, because etags can now automatically
 961      detect C++.  Retained for backward compatibility and for debugging and
 962      experimentation.  In principle, we could want to tag as C++ even
 963      before any "class" or "template" keyword.
 964   puts ("-C, --c++\n\
 965         Treat files whose name suffix defaults to C language as C++ files.");
 966   */
 967
 968   puts ("--declarations\n\
 969         In C and derived languages, create tags for function declarations,");
 970   if (CTAGS)
 971     puts ("\tand create tags for extern variables if --globals is used.");
 972   else
 973     puts
 974       ("\tand create tags for extern variables unless --no-globals is used.");
 975
 976   if (CTAGS)
 977     puts ("-d, --defines\n\
 978         Create tag entries for C #define constants and enum constants, too.");
 979   else
 980     puts ("-D, --no-defines\n\
 981         Don't create tag entries for C #define constants and enum constants.\n\
 982         This makes the tags file smaller.");
 983
 984   if (!CTAGS)
 985     puts ("-i FILE, --include=FILE\n\
 986         Include a note in tag file indicating that, when searching for\n\
 987         a tag, one should also consult the tags file FILE after\n\
 988         checking the current file.");
 989
 990   puts ("-l LANG, --language=LANG\n\
 991         Force the following files to be considered as written in the\n\
 992         named language up to the next --language=LANG option.");
 993
 994   if (CTAGS)
 995     puts ("--globals\n\
 996         Create tag entries for global variables in some languages.");
 997   else
 998     puts ("--no-globals\n\
 999         Do not create tag entries for global variables in some\n\
1000         languages.  This makes the tags file smaller.");
1001
1002   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1003     puts ("--no-line-directive\n\
1004         Ignore #line preprocessor directives in C and derived languages.");
1005
1006   if (CTAGS)
1007     puts ("--members\n\
1008         Create tag entries for members of structures in some languages.");
1009   else
1010     puts ("--no-members\n\
1011         Do not create tag entries for members of structures\n\
1012         in some languages.");
1013
1014   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
1015         Make a tag for each line matching a regular expression pattern\n\
1016         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
1017         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
1018         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
1019         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
1020   puts ("       If TAGNAME/ is present, the tags created are named.\n\
1021         For example Tcl named tags can be created with:\n\
1022           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
1023         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
1024         `m' means to allow multi-line matches, `s' implies `m' and\n\
1025         causes dot to match any character, including newline.");
1026
1027   puts ("-R, --no-regex\n\
1028         Don't create tags from regexps for the following files.");
1029
1030   puts ("-I, --ignore-indentation\n\
1031         In C and C++ do not assume that a closing brace in the first\n\
1032         column is the final brace of a function or structure definition.");
1033
1034   puts ("-o FILE, --output=FILE\n\
1035         Write the tags to FILE.");
1036
1037   puts ("--parse-stdin=NAME\n\
1038         Read from standard input and record tags as belonging to file NAME.");
1039
1040   if (CTAGS)
1041     {
1042       puts ("-t, --typedefs\n\
1043         Generate tag entries for C and Ada typedefs.");
1044       puts ("-T, --typedefs-and-c++\n\
1045         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
1046         and C++ member functions.");
1047     }
1048
1049   if (CTAGS)
1050     puts ("-u, --update\n\
1051         Update the tag entries for the given files, leaving tag\n\
1052         entries for other files in place.  Currently, this is\n\
1053         implemented by deleting the existing entries for the given\n\
1054         files and then rewriting the new entries at the end of the\n\
1055         tags file.  It is often faster to simply rebuild the entire\n\
1056         tag file than to use this.");
1057
1058   if (CTAGS)
1059     {
1060       puts ("-v, --vgrind\n\
1061         Print on the standard output an index of items intended for\n\
1062         human consumption, similar to the output of vgrind.  The index\n\
1063         is sorted, and gives the page number of each item.");
1064
1065       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1066         puts ("-w, --no-duplicates\n\
1067         Do not create duplicate tag entries, for compatibility with\n\
1068         traditional ctags.");
1069
1070       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
1071         puts ("-w, --no-warn\n\
1072         Suppress warning messages about duplicate tag entries.");
1073
1074       puts ("-x, --cxref\n\
1075         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1076         The output uses line numbers instead of page numbers, but\n\
1077         beyond that the differences are cosmetic; try both to see\n\
1078         which you like.");
1079     }
1080
1081   puts ("-V, --version\n\
1082         Print the version of the program.\n\
1083 -h, --help\n\
1084         Print this help message.\n\
1085         Followed by one or more `--language' options prints detailed\n\
1086         help about tag generation for the specified languages.");
1087
1088   print_language_names ();
1089
1090   puts ("");
1091   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1092
1093   exit (EXIT_SUCCESS);
1094 }
1095
1096 \f
1097 #ifdef VMS                      /* VMS specific functions */
1098
1099 #define EOS     '\0'
1100
1101 /* This is a BUG!  ANY arbitrary limit is a BUG!
1102    Won't someone please fix this?  */
1103 #define MAX_FILE_SPEC_LEN       255
1104 typedef struct  {
1105   short   curlen;
1106   char    body[MAX_FILE_SPEC_LEN + 1];
1107 } vspec;
1108
1109 /*
1110  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1111  returning in each successive call the next file name matching the input
1112  spec. The function expects that each in_spec passed
1113  to it will be processed to completion; in particular, up to and
1114  including the call following that in which the last matching name
1115  is returned, the function ignores the value of in_spec, and will
1116  only start processing a new spec with the following call.
1117  If an error occurs, on return out_spec contains the value
1118  of in_spec when the error occurred.
1119
1120  With each successive file name returned in out_spec, the
1121  function's return value is one. When there are no more matching
1122  names the function returns zero. If on the first call no file
1123  matches in_spec, or there is any other error, -1 is returned.
1124 */
1125
1126 #include        <rmsdef.h>
1127 #include        <descrip.h>
1128 #define         OUTSIZE MAX_FILE_SPEC_LEN
1129 static short
1130 fn_exp (out, in)
1131      vspec *out;
1132      char *in;
1133 {
1134   static long context = 0;
1135   static struct dsc$descriptor_s o;
1136   static struct dsc$descriptor_s i;
1137   static bool pass1 = TRUE;
1138   long status;
1139   short retval;
1140
1141   if (pass1)
1142     {
1143       pass1 = FALSE;
1144       o.dsc$a_pointer = (char *) out;
1145       o.dsc$w_length = (short)OUTSIZE;
1146       i.dsc$a_pointer = in;
1147       i.dsc$w_length = (short)strlen(in);
1148       i.dsc$b_dtype = DSC$K_DTYPE_T;
1149       i.dsc$b_class = DSC$K_CLASS_S;
1150       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1151       o.dsc$b_class = DSC$K_CLASS_VS;
1152     }
1153   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1154     {
1155       out->body[out->curlen] = EOS;
1156       return 1;
1157     }
1158   else if (status == RMS$_NMF)
1159     retval = 0;
1160   else
1161     {
1162       strcpy(out->body, in);
1163       retval = -1;
1164     }
1165   lib$find_file_end(&context);
1166   pass1 = TRUE;
1167   return retval;
1168 }
1169
1170 /*
1171   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1172   name of each file specified by the provided arg expanding wildcards.
1173 */
1174 static char *
1175 gfnames (arg, p_error)
1176      char *arg;
1177      bool *p_error;
1178 {
1179   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1180
1181   switch (fn_exp (&filename, arg))
1182     {
1183     case 1:
1184       *p_error = FALSE;
1185       return filename.body;
1186     case 0:
1187       *p_error = FALSE;
1188       return NULL;
1189     default:
1190       *p_error = TRUE;
1191       return filename.body;
1192     }
1193 }
1194
1195 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1196 system (cmd)
1197      char *cmd;
1198 {
1199   error ("%s", "system() function not implemented under VMS");
1200 }
1201 #endif
1202
1203 #define VERSION_DELIM   ';'
1204 char *massage_name (s)
1205      char *s;
1206 {
1207   char *start = s;
1208
1209   for ( ; *s; s++)
1210     if (*s == VERSION_DELIM)
1211       {
1212         *s = EOS;
1213         break;
1214       }
1215     else
1216       *s = lowcase (*s);
1217   return start;
1218 }
1219 #endif /* VMS */
1220
1221 \f
1222 int
1223 main (argc, argv)
1224      int argc;
1225      char *argv[];
1226 {
1227   int i;
1228   unsigned int nincluded_files;
1229   char **included_files;
1230   argument *argbuffer;
1231   int current_arg, file_count;
1232   linebuffer filename_lb;
1233   bool help_asked = FALSE;
1234 #ifdef VMS
1235   bool got_err;
1236 #endif
1237  char *optstring;
1238  int opt;
1239
1240
1241 #ifdef DOS_NT
1242   _fmode = O_BINARY;   /* all of files are treated as binary files */
1243 #endif /* DOS_NT */
1244
1245   progname = argv[0];
1246   nincluded_files = 0;
1247   included_files = xnew (argc, char *);
1248   current_arg = 0;
1249   file_count = 0;
1250
1251   /* Allocate enough no matter what happens.  Overkill, but each one
1252      is small. */
1253   argbuffer = xnew (argc, argument);
1254
1255   /*
1256    * Always find typedefs and structure tags.
1257    * Also default to find macro constants, enum constants, struct
1258    * members and global variables.  Do it for both etags and ctags.
1259    */
1260   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1261   globals = members = TRUE;
1262
1263   /* When the optstring begins with a '-' getopt_long does not rearrange the
1264      non-options arguments to be at the end, but leaves them alone. */
1265   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1266                       "ac:Cf:Il:o:r:RSVhH",
1267                       (CTAGS) ? "BxdtTuvw" : "Di:");
1268
1269   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1270     switch (opt)
1271       {
1272       case 0:
1273         /* If getopt returns 0, then it has already processed a
1274            long-named option.  We should do nothing.  */
1275         break;
1276
1277       case 1:
1278         /* This means that a file name has been seen.  Record it. */
1279         argbuffer[current_arg].arg_type = at_filename;
1280         argbuffer[current_arg].what     = optarg;
1281         ++current_arg;
1282         ++file_count;
1283         break;
1284
1285       case STDIN:
1286         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1287         argbuffer[current_arg].arg_type = at_stdin;
1288         argbuffer[current_arg].what     = optarg;
1289         ++current_arg;
1290         ++file_count;
1291         if (parsing_stdin)
1292           fatal ("cannot parse standard input more than once", (char *)NULL);
1293         parsing_stdin = TRUE;
1294         break;
1295
1296         /* Common options. */
1297       case 'a': append_to_tagfile = TRUE;       break;
1298       case 'C': cplusplus = TRUE;               break;
1299       case 'f':         /* for compatibility with old makefiles */
1300       case 'o':
1301         if (tagfile)
1302           {
1303             error ("-o option may only be given once.", (char *)NULL);
1304             suggest_asking_for_help ();
1305             /* NOTREACHED */
1306           }
1307         tagfile = optarg;
1308         break;
1309       case 'I':
1310       case 'S':         /* for backward compatibility */
1311         ignoreindent = TRUE;
1312         break;
1313       case 'l':
1314         {
1315           language *lang = get_language_from_langname (optarg);
1316           if (lang != NULL)
1317             {
1318               argbuffer[current_arg].lang = lang;
1319               argbuffer[current_arg].arg_type = at_language;
1320               ++current_arg;
1321             }
1322         }
1323         break;
1324       case 'c':
1325         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1326         optarg = concat (optarg, "i", ""); /* memory leak here */
1327         /* FALLTHRU */
1328       case 'r':
1329         argbuffer[current_arg].arg_type = at_regexp;
1330         argbuffer[current_arg].what = optarg;
1331         ++current_arg;
1332         break;
1333       case 'R':
1334         argbuffer[current_arg].arg_type = at_regexp;
1335         argbuffer[current_arg].what = NULL;
1336         ++current_arg;
1337         break;
1338       case 'V':
1339         print_version ();
1340         break;
1341       case 'h':
1342       case 'H':
1343         help_asked = TRUE;
1344         break;
1345
1346         /* Etags options */
1347       case 'D': constantypedefs = FALSE;                        break;
1348       case 'i': included_files[nincluded_files++] = optarg;     break;
1349
1350         /* Ctags options. */
1351       case 'B': searchar = '?';                                 break;
1352       case 'd': constantypedefs = TRUE;                         break;
1353       case 't': typedefs = TRUE;                                break;
1354       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1355       case 'u': update = TRUE;                                  break;
1356       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1357       case 'x': cxref_style = TRUE;                             break;
1358       case 'w': no_warnings = TRUE;                             break;
1359       default:
1360         suggest_asking_for_help ();
1361         /* NOTREACHED */
1362       }
1363
1364   /* No more options.  Store the rest of arguments. */
1365   for (; optind < argc; optind++)
1366     {
1367       argbuffer[current_arg].arg_type = at_filename;
1368       argbuffer[current_arg].what = argv[optind];
1369       ++current_arg;
1370       ++file_count;
1371     }
1372
1373   argbuffer[current_arg].arg_type = at_end;
1374
1375   if (help_asked)
1376     print_help (argbuffer);
1377     /* NOTREACHED */
1378
1379   if (nincluded_files == 0 && file_count == 0)
1380     {
1381       error ("no input files specified.", (char *)NULL);
1382       suggest_asking_for_help ();
1383       /* NOTREACHED */
1384     }
1385
1386   if (tagfile == NULL)
1387     tagfile = CTAGS ? "tags" : "TAGS";
1388   cwd = etags_getcwd ();        /* the current working directory */
1389   if (cwd[strlen (cwd) - 1] != '/')
1390     {
1391       char *oldcwd = cwd;
1392       cwd = concat (oldcwd, "/", "");
1393       free (oldcwd);
1394     }
1395   /* Relative file names are made relative to the current directory. */
1396   if (streq (tagfile, "-")
1397       || strneq (tagfile, "/dev/", 5))
1398     tagfiledir = cwd;
1399   else
1400     tagfiledir = absolute_dirname (tagfile, cwd);
1401
1402   init ();                      /* set up boolean "functions" */
1403
1404   linebuffer_init (&lb);
1405   linebuffer_init (&filename_lb);
1406   linebuffer_init (&filebuf);
1407   linebuffer_init (&token_name);
1408
1409   if (!CTAGS)
1410     {
1411       if (streq (tagfile, "-"))
1412         {
1413           tagf = stdout;
1414 #ifdef DOS_NT
1415           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1416              doesn't take effect until after `stdout' is already open). */
1417           if (!isatty (fileno (stdout)))
1418             setmode (fileno (stdout), O_BINARY);
1419 #endif /* DOS_NT */
1420         }
1421       else
1422         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1423       if (tagf == NULL)
1424         pfatal (tagfile);
1425     }
1426
1427   /*
1428    * Loop through files finding functions.
1429    */
1430   for (i = 0; i < current_arg; i++)
1431     {
1432       static language *lang;    /* non-NULL if language is forced */
1433       char *this_file;
1434
1435       switch (argbuffer[i].arg_type)
1436         {
1437         case at_language:
1438           lang = argbuffer[i].lang;
1439           break;
1440         case at_regexp:
1441           analyse_regex (argbuffer[i].what);
1442           break;
1443         case at_filename:
1444 #ifdef VMS
1445           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1446             {
1447               if (got_err)
1448                 {
1449                   error ("can't find file %s\n", this_file);
1450                   argc--, argv++;
1451                 }
1452               else
1453                 {
1454                   this_file = massage_name (this_file);
1455                 }
1456 #else
1457               this_file = argbuffer[i].what;
1458 #endif
1459               /* Input file named "-" means read file names from stdin
1460                  (one per line) and use them. */
1461               if (streq (this_file, "-"))
1462                 {
1463                   if (parsing_stdin)
1464                     fatal ("cannot parse standard input AND read file names from it",
1465                            (char *)NULL);
1466                   while (readline_internal (&filename_lb, stdin) > 0)
1467                     process_file_name (filename_lb.buffer, lang);
1468                 }
1469               else
1470                 process_file_name (this_file, lang);
1471 #ifdef VMS
1472             }
1473 #endif
1474           break;
1475         case at_stdin:
1476           this_file = argbuffer[i].what;
1477           process_file (stdin, this_file, lang);
1478           break;
1479         }
1480     }
1481
1482   free_regexps ();
1483   free (lb.buffer);
1484   free (filebuf.buffer);
1485   free (token_name.buffer);
1486
1487   if (!CTAGS || cxref_style)
1488     {
1489       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1490       put_entries (nodehead);
1491       free_tree (nodehead);
1492       nodehead = NULL;
1493       if (!CTAGS)
1494         {
1495           fdesc *fdp;
1496
1497           /* Output file entries that have no tags. */
1498           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1499             if (!fdp->written)
1500               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1501
1502           while (nincluded_files-- > 0)
1503             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1504
1505           if (fclose (tagf) == EOF)
1506             pfatal (tagfile);
1507         }
1508
1509       exit (EXIT_SUCCESS);
1510     }
1511
1512   /* From here on, we are in (CTAGS && !cxref_style) */
1513   if (update)
1514     {
1515       char cmd[BUFSIZ];
1516       for (i = 0; i < current_arg; ++i)
1517         {
1518           switch (argbuffer[i].arg_type)
1519             {
1520             case at_filename:
1521             case at_stdin:
1522               break;
1523             default:
1524               continue;         /* the for loop */
1525             }
1526           sprintf (cmd,
1527                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1528                    tagfile, argbuffer[i].what, tagfile);
1529           if (system (cmd) != EXIT_SUCCESS)
1530             fatal ("failed to execute shell command", (char *)NULL);
1531         }
1532       append_to_tagfile = TRUE;
1533     }
1534
1535   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1536   if (tagf == NULL)
1537     pfatal (tagfile);
1538   put_entries (nodehead);       /* write all the tags (CTAGS) */
1539   free_tree (nodehead);
1540   nodehead = NULL;
1541   if (fclose (tagf) == EOF)
1542     pfatal (tagfile);
1543
1544   if (CTAGS)
1545     if (append_to_tagfile || update)
1546       {
1547         char cmd[2*BUFSIZ+20];
1548         /* Maybe these should be used:
1549            setenv ("LC_COLLATE", "C", 1);
1550            setenv ("LC_ALL", "C", 1); */
1551         sprintf (cmd, "sort -u -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1552         exit (system (cmd));
1553       }
1554   return EXIT_SUCCESS;
1555 }
1556
1557
1558 /*
1559  * Return a compressor given the file name.  If EXTPTR is non-zero,
1560  * return a pointer into FILE where the compressor-specific
1561  * extension begins.  If no compressor is found, NULL is returned
1562  * and EXTPTR is not significant.
1563  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1564  */
1565 static compressor *
1566 get_compressor_from_suffix (file, extptr)
1567      char *file;
1568      char **extptr;
1569 {
1570   compressor *compr;
1571   char *slash, *suffix;
1572
1573   /* This relies on FN to be after canonicalize_filename,
1574      so we don't need to consider backslashes on DOS_NT.  */
1575   slash = etags_strrchr (file, '/');
1576   suffix = etags_strrchr (file, '.');
1577   if (suffix == NULL || suffix < slash)
1578     return NULL;
1579   if (extptr != NULL)
1580     *extptr = suffix;
1581   suffix += 1;
1582   /* Let those poor souls who live with DOS 8+3 file name limits get
1583      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1584      Only the first do loop is run if not MSDOS */
1585   do
1586     {
1587       for (compr = compressors; compr->suffix != NULL; compr++)
1588         if (streq (compr->suffix, suffix))
1589           return compr;
1590       if (!MSDOS)
1591         break;                  /* do it only once: not really a loop */
1592       if (extptr != NULL)
1593         *extptr = ++suffix;
1594     } while (*suffix != '\0');
1595   return NULL;
1596 }
1597
1598
1599
1600 /*
1601  * Return a language given the name.
1602  */
1603 static language *
1604 get_language_from_langname (name)
1605      const char *name;
1606 {
1607   language *lang;
1608
1609   if (name == NULL)
1610     error ("empty language name", (char *)NULL);
1611   else
1612     {
1613       for (lang = lang_names; lang->name != NULL; lang++)
1614         if (streq (name, lang->name))
1615           return lang;
1616       error ("unknown language \"%s\"", name);
1617     }
1618
1619   return NULL;
1620 }
1621
1622
1623 /*
1624  * Return a language given the interpreter name.
1625  */
1626 static language *
1627 get_language_from_interpreter (interpreter)
1628      char *interpreter;
1629 {
1630   language *lang;
1631   char **iname;
1632
1633   if (interpreter == NULL)
1634     return NULL;
1635   for (lang = lang_names; lang->name != NULL; lang++)
1636     if (lang->interpreters != NULL)
1637       for (iname = lang->interpreters; *iname != NULL; iname++)
1638         if (streq (*iname, interpreter))
1639             return lang;
1640
1641   return NULL;
1642 }
1643
1644
1645
1646 /*
1647  * Return a language given the file name.
1648  */
1649 static language *
1650 get_language_from_filename (file, case_sensitive)
1651      char *file;
1652      bool case_sensitive;
1653 {
1654   language *lang;
1655   char **name, **ext, *suffix;
1656
1657   /* Try whole file name first. */
1658   for (lang = lang_names; lang->name != NULL; lang++)
1659     if (lang->filenames != NULL)
1660       for (name = lang->filenames; *name != NULL; name++)
1661         if ((case_sensitive)
1662             ? streq (*name, file)
1663             : strcaseeq (*name, file))
1664           return lang;
1665
1666   /* If not found, try suffix after last dot. */
1667   suffix = etags_strrchr (file, '.');
1668   if (suffix == NULL)
1669     return NULL;
1670   suffix += 1;
1671   for (lang = lang_names; lang->name != NULL; lang++)
1672     if (lang->suffixes != NULL)
1673       for (ext = lang->suffixes; *ext != NULL; ext++)
1674         if ((case_sensitive)
1675             ? streq (*ext, suffix)
1676             : strcaseeq (*ext, suffix))
1677           return lang;
1678   return NULL;
1679 }
1680
1681 \f
1682 /*
1683  * This routine is called on each file argument.
1684  */
1685 static void
1686 process_file_name (file, lang)
1687      char *file;
1688      language *lang;
1689 {
1690   struct stat stat_buf;
1691   FILE *inf;
1692   fdesc *fdp;
1693   compressor *compr;
1694   char *compressed_name, *uncompressed_name;
1695   char *ext, *real_name;
1696   int retval;
1697
1698   canonicalize_filename (file);
1699   if (streq (file, tagfile) && !streq (tagfile, "-"))
1700     {
1701       error ("skipping inclusion of %s in self.", file);
1702       return;
1703     }
1704   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1705     {
1706       compressed_name = NULL;
1707       real_name = uncompressed_name = savestr (file);
1708     }
1709   else
1710     {
1711       real_name = compressed_name = savestr (file);
1712       uncompressed_name = savenstr (file, ext - file);
1713     }
1714
1715   /* If the canonicalized uncompressed name
1716      has already been dealt with, skip it silently. */
1717   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1718     {
1719       assert (fdp->infname != NULL);
1720       if (streq (uncompressed_name, fdp->infname))
1721         goto cleanup;
1722     }
1723
1724   if (stat (real_name, &stat_buf) != 0)
1725     {
1726       /* Reset real_name and try with a different name. */
1727       real_name = NULL;
1728       if (compressed_name != NULL) /* try with the given suffix */
1729         {
1730           if (stat (uncompressed_name, &stat_buf) == 0)
1731             real_name = uncompressed_name;
1732         }
1733       else                      /* try all possible suffixes */
1734         {
1735           for (compr = compressors; compr->suffix != NULL; compr++)
1736             {
1737               compressed_name = concat (file, ".", compr->suffix);
1738               if (stat (compressed_name, &stat_buf) != 0)
1739                 {
1740                   if (MSDOS)
1741                     {
1742                       char *suf = compressed_name + strlen (file);
1743                       size_t suflen = strlen (compr->suffix) + 1;
1744                       for ( ; suf[1]; suf++, suflen--)
1745                         {
1746                           memmove (suf, suf + 1, suflen);
1747                           if (stat (compressed_name, &stat_buf) == 0)
1748                             {
1749                               real_name = compressed_name;
1750                               break;
1751                             }
1752                         }
1753                       if (real_name != NULL)
1754                         break;
1755                     } /* MSDOS */
1756                   free (compressed_name);
1757                   compressed_name = NULL;
1758                 }
1759               else
1760                 {
1761                   real_name = compressed_name;
1762                   break;
1763                 }
1764             }
1765         }
1766       if (real_name == NULL)
1767         {
1768           perror (file);
1769           goto cleanup;
1770         }
1771     } /* try with a different name */
1772
1773   if (!S_ISREG (stat_buf.st_mode))
1774     {
1775       error ("skipping %s: it is not a regular file.", real_name);
1776       goto cleanup;
1777     }
1778   if (real_name == compressed_name)
1779     {
1780       char *cmd = concat (compr->command, " ", real_name);
1781       inf = (FILE *) popen (cmd, "r");
1782       free (cmd);
1783     }
1784   else
1785     inf = fopen (real_name, "r");
1786   if (inf == NULL)
1787     {
1788       perror (real_name);
1789       goto cleanup;
1790     }
1791
1792   process_file (inf, uncompressed_name, lang);
1793
1794   if (real_name == compressed_name)
1795     retval = pclose (inf);
1796   else
1797     retval = fclose (inf);
1798   if (retval < 0)
1799     pfatal (file);
1800
1801  cleanup:
1802   free (compressed_name);
1803   free (uncompressed_name);
1804   last_node = NULL;
1805   curfdp = NULL;
1806   return;
1807 }
1808
1809 static void
1810 process_file (fh, fn, lang)
1811      FILE *fh;
1812      char *fn;
1813      language *lang;
1814 {
1815   static const fdesc emptyfdesc;
1816   fdesc *fdp;
1817
1818   /* Create a new input file description entry. */
1819   fdp = xnew (1, fdesc);
1820   *fdp = emptyfdesc;
1821   fdp->next = fdhead;
1822   fdp->infname = savestr (fn);
1823   fdp->lang = lang;
1824   fdp->infabsname = absolute_filename (fn, cwd);
1825   fdp->infabsdir = absolute_dirname (fn, cwd);
1826   if (filename_is_absolute (fn))
1827     {
1828       /* An absolute file name.  Canonicalize it. */
1829       fdp->taggedfname = absolute_filename (fn, NULL);
1830     }
1831   else
1832     {
1833       /* A file name relative to cwd.  Make it relative
1834          to the directory of the tags file. */
1835       fdp->taggedfname = relative_filename (fn, tagfiledir);
1836     }
1837   fdp->usecharno = TRUE;        /* use char position when making tags */
1838   fdp->prop = NULL;
1839   fdp->written = FALSE;         /* not written on tags file yet */
1840
1841   fdhead = fdp;
1842   curfdp = fdhead;              /* the current file description */
1843
1844   find_entries (fh);
1845
1846   /* If not Ctags, and if this is not metasource and if it contained no #line
1847      directives, we can write the tags and free all nodes pointing to
1848      curfdp. */
1849   if (!CTAGS
1850       && curfdp->usecharno      /* no #line directives in this file */
1851       && !curfdp->lang->metasource)
1852     {
1853       node *np, *prev;
1854
1855       /* Look for the head of the sublist relative to this file.  See add_node
1856          for the structure of the node tree. */
1857       prev = NULL;
1858       for (np = nodehead; np != NULL; prev = np, np = np->left)
1859         if (np->fdp == curfdp)
1860           break;
1861
1862       /* If we generated tags for this file, write and delete them. */
1863       if (np != NULL)
1864         {
1865           /* This is the head of the last sublist, if any.  The following
1866              instructions depend on this being true. */
1867           assert (np->left == NULL);
1868
1869           assert (fdhead == curfdp);
1870           assert (last_node->fdp == curfdp);
1871           put_entries (np);     /* write tags for file curfdp->taggedfname */
1872           free_tree (np);       /* remove the written nodes */
1873           if (prev == NULL)
1874             nodehead = NULL;    /* no nodes left */
1875           else
1876             prev->left = NULL;  /* delete the pointer to the sublist */
1877         }
1878     }
1879 }
1880
1881 /*
1882  * This routine sets up the boolean pseudo-functions which work
1883  * by setting boolean flags dependent upon the corresponding character.
1884  * Every char which is NOT in that string is not a white char.  Therefore,
1885  * all of the array "_wht" is set to FALSE, and then the elements
1886  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1887  * of a char is TRUE if it is the string "white", else FALSE.
1888  */
1889 static void
1890 init ()
1891 {
1892   register char *sp;
1893   register int i;
1894
1895   for (i = 0; i < CHARS; i++)
1896     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1897   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1898   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1899   notinname('\0') = notinname('\n');
1900   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1901   begtoken('\0') = begtoken('\n');
1902   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1903   intoken('\0') = intoken('\n');
1904   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1905   endtoken('\0') = endtoken('\n');
1906 }
1907
1908 /*
1909  * This routine opens the specified file and calls the function
1910  * which finds the function and type definitions.
1911  */
1912 static void
1913 find_entries (inf)
1914      FILE *inf;
1915 {
1916   char *cp;
1917   language *lang = curfdp->lang;
1918   Lang_function *parser = NULL;
1919
1920   /* If user specified a language, use it. */
1921   if (lang != NULL && lang->function != NULL)
1922     {
1923       parser = lang->function;
1924     }
1925
1926   /* Else try to guess the language given the file name. */
1927   if (parser == NULL)
1928     {
1929       lang = get_language_from_filename (curfdp->infname, TRUE);
1930       if (lang != NULL && lang->function != NULL)
1931         {
1932           curfdp->lang = lang;
1933           parser = lang->function;
1934         }
1935     }
1936
1937   /* Else look for sharp-bang as the first two characters. */
1938   if (parser == NULL
1939       && readline_internal (&lb, inf) > 0
1940       && lb.len >= 2
1941       && lb.buffer[0] == '#'
1942       && lb.buffer[1] == '!')
1943     {
1944       char *lp;
1945
1946       /* Set lp to point at the first char after the last slash in the
1947          line or, if no slashes, at the first nonblank.  Then set cp to
1948          the first successive blank and terminate the string. */
1949       lp = etags_strrchr (lb.buffer+2, '/');
1950       if (lp != NULL)
1951         lp += 1;
1952       else
1953         lp = skip_spaces (lb.buffer + 2);
1954       cp = skip_non_spaces (lp);
1955       *cp = '\0';
1956
1957       if (strlen (lp) > 0)
1958         {
1959           lang = get_language_from_interpreter (lp);
1960           if (lang != NULL && lang->function != NULL)
1961             {
1962               curfdp->lang = lang;
1963               parser = lang->function;
1964             }
1965         }
1966     }
1967
1968   /* We rewind here, even if inf may be a pipe.  We fail if the
1969      length of the first line is longer than the pipe block size,
1970      which is unlikely. */
1971   rewind (inf);
1972
1973   /* Else try to guess the language given the case insensitive file name. */
1974   if (parser == NULL)
1975     {
1976       lang = get_language_from_filename (curfdp->infname, FALSE);
1977       if (lang != NULL && lang->function != NULL)
1978         {
1979           curfdp->lang = lang;
1980           parser = lang->function;
1981         }
1982     }
1983
1984   /* Else try Fortran or C. */
1985   if (parser == NULL)
1986     {
1987       node *old_last_node = last_node;
1988
1989       curfdp->lang = get_language_from_langname ("fortran");
1990       find_entries (inf);
1991
1992       if (old_last_node == last_node)
1993         /* No Fortran entries found.  Try C. */
1994         {
1995           /* We do not tag if rewind fails.
1996              Only the file name will be recorded in the tags file. */
1997           rewind (inf);
1998           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1999           find_entries (inf);
2000         }
2001       return;
2002     }
2003
2004   if (!no_line_directive
2005       && curfdp->lang != NULL && curfdp->lang->metasource)
2006     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
2007        file, or anyway we parsed a file that is automatically generated from
2008        this one.  If this is the case, the bingo.c file contained #line
2009        directives that generated tags pointing to this file.  Let's delete
2010        them all before parsing this file, which is the real source. */
2011     {
2012       fdesc **fdpp = &fdhead;
2013       while (*fdpp != NULL)
2014         if (*fdpp != curfdp
2015             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
2016           /* We found one of those!  We must delete both the file description
2017              and all tags referring to it. */
2018           {
2019             fdesc *badfdp = *fdpp;
2020
2021             /* Delete the tags referring to badfdp->taggedfname
2022                that were obtained from badfdp->infname. */
2023             invalidate_nodes (badfdp, &nodehead);
2024
2025             *fdpp = badfdp->next; /* remove the bad description from the list */
2026             free_fdesc (badfdp);
2027           }
2028         else
2029           fdpp = &(*fdpp)->next; /* advance the list pointer */
2030     }
2031
2032   assert (parser != NULL);
2033
2034   /* Generic initialisations before reading from file. */
2035   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
2036
2037   /* Generic initialisations before parsing file with readline. */
2038   lineno = 0;                  /* reset global line number */
2039   charno = 0;                  /* reset global char number */
2040   linecharno = 0;              /* reset global char number of line start */
2041
2042   parser (inf);
2043
2044   regex_tag_multiline ();
2045 }
2046
2047 \f
2048 /*
2049  * Check whether an implicitly named tag should be created,
2050  * then call `pfnote'.
2051  * NAME is a string that is internally copied by this function.
2052  *
2053  * TAGS format specification
2054  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
2055  * The following is explained in some more detail in etc/ETAGS.EBNF.
2056  *
2057  * make_tag creates tags with "implicit tag names" (unnamed tags)
2058  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
2059  *  1. NAME does not contain any of the characters in NONAM;
2060  *  2. LINESTART contains name as either a rightmost, or rightmost but
2061  *     one character, substring;
2062  *  3. the character, if any, immediately before NAME in LINESTART must
2063  *     be a character in NONAM;
2064  *  4. the character, if any, immediately after NAME in LINESTART must
2065  *     also be a character in NONAM.
2066  *
2067  * The implementation uses the notinname() macro, which recognises the
2068  * characters stored in the string `nonam'.
2069  * etags.el needs to use the same characters that are in NONAM.
2070  */
2071 static void
2072 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2073      char *name;                /* tag name, or NULL if unnamed */
2074      int namelen;               /* tag length */
2075      bool is_func;              /* tag is a function */
2076      char *linestart;           /* start of the line where tag is */
2077      int linelen;               /* length of the line where tag is */
2078      int lno;                   /* line number */
2079      long cno;                  /* character number */
2080 {
2081   bool named = (name != NULL && namelen > 0);
2082
2083   if (!CTAGS && named)          /* maybe set named to false */
2084     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2085        such that etags.el can guess a name from it. */
2086     {
2087       int i;
2088       register char *cp = name;
2089
2090       for (i = 0; i < namelen; i++)
2091         if (notinname (*cp++))
2092           break;
2093       if (i == namelen)                         /* rule #1 */
2094         {
2095           cp = linestart + linelen - namelen;
2096           if (notinname (linestart[linelen-1]))
2097             cp -= 1;                            /* rule #4 */
2098           if (cp >= linestart                   /* rule #2 */
2099               && (cp == linestart
2100                   || notinname (cp[-1]))        /* rule #3 */
2101               && strneq (name, cp, namelen))    /* rule #2 */
2102             named = FALSE;      /* use implicit tag name */
2103         }
2104     }
2105
2106   if (named)
2107     name = savenstr (name, namelen);
2108   else
2109     name = NULL;
2110   pfnote (name, is_func, linestart, linelen, lno, cno);
2111 }
2112
2113 /* Record a tag. */
2114 static void
2115 pfnote (name, is_func, linestart, linelen, lno, cno)
2116      char *name;                /* tag name, or NULL if unnamed */
2117      bool is_func;              /* tag is a function */
2118      char *linestart;           /* start of the line where tag is */
2119      int linelen;               /* length of the line where tag is */
2120      int lno;                   /* line number */
2121      long cno;                  /* character number */
2122 {
2123   register node *np;
2124
2125   assert (name == NULL || name[0] != '\0');
2126   if (CTAGS && name == NULL)
2127     return;
2128
2129   np = xnew (1, node);
2130
2131   /* If ctags mode, change name "main" to M<thisfilename>. */
2132   if (CTAGS && !cxref_style && streq (name, "main"))
2133     {
2134       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2135       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2136       fp = etags_strrchr (np->name, '.');
2137       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2138         fp[0] = '\0';
2139     }
2140   else
2141     np->name = name;
2142   np->valid = TRUE;
2143   np->been_warned = FALSE;
2144   np->fdp = curfdp;
2145   np->is_func = is_func;
2146   np->lno = lno;
2147   if (np->fdp->usecharno)
2148     /* Our char numbers are 0-base, because of C language tradition?
2149        ctags compatibility?  old versions compatibility?   I don't know.
2150        Anyway, since emacs's are 1-base we expect etags.el to take care
2151        of the difference.  If we wanted to have 1-based numbers, we would
2152        uncomment the +1 below. */
2153     np->cno = cno /* + 1 */ ;
2154   else
2155     np->cno = invalidcharno;
2156   np->left = np->right = NULL;
2157   if (CTAGS && !cxref_style)
2158     {
2159       if (strlen (linestart) < 50)
2160         np->regex = concat (linestart, "$", "");
2161       else
2162         np->regex = savenstr (linestart, 50);
2163     }
2164   else
2165     np->regex = savenstr (linestart, linelen);
2166
2167   add_node (np, &nodehead);
2168 }
2169
2170 /*
2171  * free_tree ()
2172  *      recurse on left children, iterate on right children.
2173  */
2174 static void
2175 free_tree (np)
2176      register node *np;
2177 {
2178   while (np)
2179     {
2180       register node *node_right = np->right;
2181       free_tree (np->left);
2182       free (np->name);
2183       free (np->regex);
2184       free (np);
2185       np = node_right;
2186     }
2187 }
2188
2189 /*
2190  * free_fdesc ()
2191  *      delete a file description
2192  */
2193 static void
2194 free_fdesc (fdp)
2195      register fdesc *fdp;
2196 {
2197   free (fdp->infname);
2198   free (fdp->infabsname);
2199   free (fdp->infabsdir);
2200   free (fdp->taggedfname);
2201   free (fdp->prop);
2202   free (fdp);
2203 }
2204
2205 /*
2206  * add_node ()
2207  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2208  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2209  *      balancing.
2210  *
2211  *      add_node is the only function allowed to add nodes, so it can
2212  *      maintain state.
2213  */
2214 static void
2215 add_node (np, cur_node_p)
2216      node *np, **cur_node_p;
2217 {
2218   register int dif;
2219   register node *cur_node = *cur_node_p;
2220
2221   if (cur_node == NULL)
2222     {
2223       *cur_node_p = np;
2224       last_node = np;
2225       return;
2226     }
2227
2228   if (!CTAGS)
2229     /* Etags Mode */
2230     {
2231       /* For each file name, tags are in a linked sublist on the right
2232          pointer.  The first tags of different files are a linked list
2233          on the left pointer.  last_node points to the end of the last
2234          used sublist. */
2235       if (last_node != NULL && last_node->fdp == np->fdp)
2236         {
2237           /* Let's use the same sublist as the last added node. */
2238           assert (last_node->right == NULL);
2239           last_node->right = np;
2240           last_node = np;
2241         }
2242       else if (cur_node->fdp == np->fdp)
2243         {
2244           /* Scanning the list we found the head of a sublist which is
2245              good for us.  Let's scan this sublist. */
2246           add_node (np, &cur_node->right);
2247         }
2248       else
2249         /* The head of this sublist is not good for us.  Let's try the
2250            next one. */
2251         add_node (np, &cur_node->left);
2252     } /* if ETAGS mode */
2253
2254   else
2255     {
2256       /* Ctags Mode */
2257       dif = strcmp (np->name, cur_node->name);
2258
2259       /*
2260        * If this tag name matches an existing one, then
2261        * do not add the node, but maybe print a warning.
2262        */
2263       if (no_duplicates && !dif)
2264         {
2265           if (np->fdp == cur_node->fdp)
2266             {
2267               if (!no_warnings)
2268                 {
2269                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2270                            np->fdp->infname, lineno, np->name);
2271                   fprintf (stderr, "Second entry ignored\n");
2272                 }
2273             }
2274           else if (!cur_node->been_warned && !no_warnings)
2275             {
2276               fprintf
2277                 (stderr,
2278                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2279                  np->fdp->infname, cur_node->fdp->infname, np->name);
2280               cur_node->been_warned = TRUE;
2281             }
2282           return;
2283         }
2284
2285       /* Actually add the node */
2286       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2287     } /* if CTAGS mode */
2288 }
2289
2290 /*
2291  * invalidate_nodes ()
2292  *      Scan the node tree and invalidate all nodes pointing to the
2293  *      given file description (CTAGS case) or free them (ETAGS case).
2294  */
2295 static void
2296 invalidate_nodes (badfdp, npp)
2297      fdesc *badfdp;
2298      node **npp;
2299 {
2300   node *np = *npp;
2301
2302   if (np == NULL)
2303     return;
2304
2305   if (CTAGS)
2306     {
2307       if (np->left != NULL)
2308         invalidate_nodes (badfdp, &np->left);
2309       if (np->fdp == badfdp)
2310         np->valid = FALSE;
2311       if (np->right != NULL)
2312         invalidate_nodes (badfdp, &np->right);
2313     }
2314   else
2315     {
2316       assert (np->fdp != NULL);
2317       if (np->fdp == badfdp)
2318         {
2319           *npp = np->left;      /* detach the sublist from the list */
2320           np->left = NULL;      /* isolate it */
2321           free_tree (np);       /* free it */
2322           invalidate_nodes (badfdp, npp);
2323         }
2324       else
2325         invalidate_nodes (badfdp, &np->left);
2326     }
2327 }
2328
2329 \f
2330 static int total_size_of_entries __P((node *));
2331 static int number_len __P((long));
2332
2333 /* Length of a non-negative number's decimal representation. */
2334 static int
2335 number_len (num)
2336      long num;
2337 {
2338   int len = 1;
2339   while ((num /= 10) > 0)
2340     len += 1;
2341   return len;
2342 }
2343
2344 /*
2345  * Return total number of characters that put_entries will output for
2346  * the nodes in the linked list at the right of the specified node.
2347  * This count is irrelevant with etags.el since emacs 19.34 at least,
2348  * but is still supplied for backward compatibility.
2349  */
2350 static int
2351 total_size_of_entries (np)
2352      register node *np;
2353 {
2354   register int total = 0;
2355
2356   for (; np != NULL; np = np->right)
2357     if (np->valid)
2358       {
2359         total += strlen (np->regex) + 1;                /* pat\177 */
2360         if (np->name != NULL)
2361           total += strlen (np->name) + 1;               /* name\001 */
2362         total += number_len ((long) np->lno) + 1;       /* lno, */
2363         if (np->cno != invalidcharno)                   /* cno */
2364           total += number_len (np->cno);
2365         total += 1;                                     /* newline */
2366       }
2367
2368   return total;
2369 }
2370
2371 static void
2372 put_entries (np)
2373      register node *np;
2374 {
2375   register char *sp;
2376   static fdesc *fdp = NULL;
2377
2378   if (np == NULL)
2379     return;
2380
2381   /* Output subentries that precede this one */
2382   if (CTAGS)
2383     put_entries (np->left);
2384
2385   /* Output this entry */
2386   if (np->valid)
2387     {
2388       if (!CTAGS)
2389         {
2390           /* Etags mode */
2391           if (fdp != np->fdp)
2392             {
2393               fdp = np->fdp;
2394               fprintf (tagf, "\f\n%s,%d\n",
2395                        fdp->taggedfname, total_size_of_entries (np));
2396               fdp->written = TRUE;
2397             }
2398           fputs (np->regex, tagf);
2399           fputc ('\177', tagf);
2400           if (np->name != NULL)
2401             {
2402               fputs (np->name, tagf);
2403               fputc ('\001', tagf);
2404             }
2405           fprintf (tagf, "%d,", np->lno);
2406           if (np->cno != invalidcharno)
2407             fprintf (tagf, "%ld", np->cno);
2408           fputs ("\n", tagf);
2409         }
2410       else
2411         {
2412           /* Ctags mode */
2413           if (np->name == NULL)
2414             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2415
2416           if (cxref_style)
2417             {
2418               if (vgrind_style)
2419                 fprintf (stdout, "%s %s %d\n",
2420                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2421               else
2422                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2423                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2424             }
2425           else
2426             {
2427               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2428
2429               if (np->is_func)
2430                 {               /* function or #define macro with args */
2431                   putc (searchar, tagf);
2432                   putc ('^', tagf);
2433
2434                   for (sp = np->regex; *sp; sp++)
2435                     {
2436                       if (*sp == '\\' || *sp == searchar)
2437                         putc ('\\', tagf);
2438                       putc (*sp, tagf);
2439                     }
2440                   putc (searchar, tagf);
2441                 }
2442               else
2443                 {               /* anything else; text pattern inadequate */
2444                   fprintf (tagf, "%d", np->lno);
2445                 }
2446               putc ('\n', tagf);
2447             }
2448         }
2449     } /* if this node contains a valid tag */
2450
2451   /* Output subentries that follow this one */
2452   put_entries (np->right);
2453   if (!CTAGS)
2454     put_entries (np->left);
2455 }
2456
2457 \f
2458 /* C extensions. */
2459 #define C_EXT   0x00fff         /* C extensions */
2460 #define C_PLAIN 0x00000         /* C */
2461 #define C_PLPL  0x00001         /* C++ */
2462 #define C_STAR  0x00003         /* C* */
2463 #define C_JAVA  0x00005         /* JAVA */
2464 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2465 #define YACC    0x10000         /* yacc file */
2466
2467 /*
2468  * The C symbol tables.
2469  */
2470 enum sym_type
2471 {
2472   st_none,
2473   st_C_objprot, st_C_objimpl, st_C_objend,
2474   st_C_gnumacro,
2475   st_C_ignore, st_C_attribute,
2476   st_C_javastruct,
2477   st_C_operator,
2478   st_C_class, st_C_template,
2479   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2480 };
2481
2482 static unsigned int hash __P((const char *, unsigned int));
2483 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2484 static enum sym_type C_symtype __P((char *, int, int));
2485
2486 /* Feed stuff between (but not including) %[ and %] lines to:
2487      gperf -m 5
2488 %[
2489 %compare-strncmp
2490 %enum
2491 %struct-type
2492 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2493 %%
2494 if,             0,                      st_C_ignore
2495 for,            0,                      st_C_ignore
2496 while,          0,                      st_C_ignore
2497 switch,         0,                      st_C_ignore
2498 return,         0,                      st_C_ignore
2499 __attribute__,  0,                      st_C_attribute
2500 GTY,            0,                      st_C_attribute
2501 @interface,     0,                      st_C_objprot
2502 @protocol,      0,                      st_C_objprot
2503 @implementation,0,                      st_C_objimpl
2504 @end,           0,                      st_C_objend
2505 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2506 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2507 friend,         C_PLPL,                 st_C_ignore
2508 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2509 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2510 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2511 class,          0,                      st_C_class
2512 namespace,      C_PLPL,                 st_C_struct
2513 domain,         C_STAR,                 st_C_struct
2514 union,          0,                      st_C_struct
2515 struct,         0,                      st_C_struct
2516 extern,         0,                      st_C_extern
2517 enum,           0,                      st_C_enum
2518 typedef,        0,                      st_C_typedef
2519 define,         0,                      st_C_define
2520 undef,          0,                      st_C_define
2521 operator,       C_PLPL,                 st_C_operator
2522 template,       0,                      st_C_template
2523 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2524 DEFUN,          0,                      st_C_gnumacro
2525 SYSCALL,        0,                      st_C_gnumacro
2526 ENTRY,          0,                      st_C_gnumacro
2527 PSEUDO,         0,                      st_C_gnumacro
2528 # These are defined inside C functions, so currently they are not met.
2529 # EXFUN used in glibc, DEFVAR_* in emacs.
2530 #EXFUN,         0,                      st_C_gnumacro
2531 #DEFVAR_,       0,                      st_C_gnumacro
2532 %]
2533 and replace lines between %< and %> with its output, then:
2534  - remove the #if characterset check
2535  - make in_word_set static and not inline. */
2536 /*%<*/
2537 /* C code produced by gperf version 3.0.1 */
2538 /* Command-line: gperf -m 5  */
2539 /* Computed positions: -k'2-3' */
2540
2541 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2542 /* maximum key range = 33, duplicates = 0 */
2543
2544 #ifdef __GNUC__
2545 __inline
2546 #else
2547 #ifdef __cplusplus
2548 inline
2549 #endif
2550 #endif
2551 static unsigned int
2552 hash (str, len)
2553      register const char *str;
2554      register unsigned int len;
2555 {
2556   static unsigned char asso_values[] =
2557     {
2558       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2559       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2560       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2561       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2562       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2563       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2564       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2565       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2566       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2567       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2568       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2569        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2570        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2571       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2572       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2573       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2574       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2575       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2576       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2577       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2578       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2579       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2580       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2581       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2582       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2583       35, 35, 35, 35, 35, 35
2584     };
2585   register int hval = len;
2586
2587   switch (hval)
2588     {
2589       default:
2590         hval += asso_values[(unsigned char)str[2]];
2591       /*FALLTHROUGH*/
2592       case 2:
2593         hval += asso_values[(unsigned char)str[1]];
2594         break;
2595     }
2596   return hval;
2597 }
2598
2599 static struct C_stab_entry *
2600 in_word_set (str, len)
2601      register const char *str;
2602      register unsigned int len;
2603 {
2604   enum
2605     {
2606       TOTAL_KEYWORDS = 33,
2607       MIN_WORD_LENGTH = 2,
2608       MAX_WORD_LENGTH = 15,
2609       MIN_HASH_VALUE = 2,
2610       MAX_HASH_VALUE = 34
2611     };
2612
2613   static struct C_stab_entry wordlist[] =
2614     {
2615       {""}, {""},
2616       {"if",            0,                      st_C_ignore},
2617       {"GTY",           0,                      st_C_attribute},
2618       {"@end",          0,                      st_C_objend},
2619       {"union",         0,                      st_C_struct},
2620       {"define",                0,                      st_C_define},
2621       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2622       {"template",      0,                      st_C_template},
2623       {"operator",      C_PLPL,                 st_C_operator},
2624       {"@interface",    0,                      st_C_objprot},
2625       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2626       {"friend",                C_PLPL,                 st_C_ignore},
2627       {"typedef",       0,                      st_C_typedef},
2628       {"return",                0,                      st_C_ignore},
2629       {"@implementation",0,                     st_C_objimpl},
2630       {"@protocol",     0,                      st_C_objprot},
2631       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2632       {"extern",                0,                      st_C_extern},
2633       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2634       {"struct",                0,                      st_C_struct},
2635       {"domain",                C_STAR,                 st_C_struct},
2636       {"switch",                0,                      st_C_ignore},
2637       {"enum",          0,                      st_C_enum},
2638       {"for",           0,                      st_C_ignore},
2639       {"namespace",     C_PLPL,                 st_C_struct},
2640       {"class",         0,                      st_C_class},
2641       {"while",         0,                      st_C_ignore},
2642       {"undef",         0,                      st_C_define},
2643       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2644       {"__attribute__", 0,                      st_C_attribute},
2645       {"SYSCALL",       0,                      st_C_gnumacro},
2646       {"ENTRY",         0,                      st_C_gnumacro},
2647       {"PSEUDO",                0,                      st_C_gnumacro},
2648       {"DEFUN",         0,                      st_C_gnumacro}
2649     };
2650
2651   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2652     {
2653       register int key = hash (str, len);
2654
2655       if (key <= MAX_HASH_VALUE && key >= 0)
2656         {
2657           register const char *s = wordlist[key].name;
2658
2659           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2660             return &wordlist[key];
2661         }
2662     }
2663   return 0;
2664 }
2665 /*%>*/
2666
2667 static enum sym_type
2668 C_symtype (str, len, c_ext)
2669      char *str;
2670      int len;
2671      int c_ext;
2672 {
2673   register struct C_stab_entry *se = in_word_set (str, len);
2674
2675   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2676     return st_none;
2677   return se->type;
2678 }
2679
2680 \f
2681 /*
2682  * Ignoring __attribute__ ((list))
2683  */
2684 static bool inattribute;        /* looking at an __attribute__ construct */
2685
2686 /*
2687  * C functions and variables are recognized using a simple
2688  * finite automaton.  fvdef is its state variable.
2689  */
2690 static enum
2691 {
2692   fvnone,                       /* nothing seen */
2693   fdefunkey,                    /* Emacs DEFUN keyword seen */
2694   fdefunname,                   /* Emacs DEFUN name seen */
2695   foperator,                    /* func: operator keyword seen (cplpl) */
2696   fvnameseen,                   /* function or variable name seen */
2697   fstartlist,                   /* func: just after open parenthesis */
2698   finlist,                      /* func: in parameter list */
2699   flistseen,                    /* func: after parameter list */
2700   fignore,                      /* func: before open brace */
2701   vignore                       /* var-like: ignore until ';' */
2702 } fvdef;
2703
2704 static bool fvextern;           /* func or var: extern keyword seen; */
2705
2706 /*
2707  * typedefs are recognized using a simple finite automaton.
2708  * typdef is its state variable.
2709  */
2710 static enum
2711 {
2712   tnone,                        /* nothing seen */
2713   tkeyseen,                     /* typedef keyword seen */
2714   ttypeseen,                    /* defined type seen */
2715   tinbody,                      /* inside typedef body */
2716   tend,                         /* just before typedef tag */
2717   tignore                       /* junk after typedef tag */
2718 } typdef;
2719
2720 /*
2721  * struct-like structures (enum, struct and union) are recognized
2722  * using another simple finite automaton.  `structdef' is its state
2723  * variable.
2724  */
2725 static enum
2726 {
2727   snone,                        /* nothing seen yet,
2728                                    or in struct body if bracelev > 0 */
2729   skeyseen,                     /* struct-like keyword seen */
2730   stagseen,                     /* struct-like tag seen */
2731   scolonseen                    /* colon seen after struct-like tag */
2732 } structdef;
2733
2734 /*
2735  * When objdef is different from onone, objtag is the name of the class.
2736  */
2737 static char *objtag = "<uninited>";
2738
2739 /*
2740  * Yet another little state machine to deal with preprocessor lines.
2741  */
2742 static enum
2743 {
2744   dnone,                        /* nothing seen */
2745   dsharpseen,                   /* '#' seen as first char on line */
2746   ddefineseen,                  /* '#' and 'define' seen */
2747   dignorerest                   /* ignore rest of line */
2748 } definedef;
2749
2750 /*
2751  * State machine for Objective C protocols and implementations.
2752  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2753  */
2754 static enum
2755 {
2756   onone,                        /* nothing seen */
2757   oprotocol,                    /* @interface or @protocol seen */
2758   oimplementation,              /* @implementations seen */
2759   otagseen,                     /* class name seen */
2760   oparenseen,                   /* parenthesis before category seen */
2761   ocatseen,                     /* category name seen */
2762   oinbody,                      /* in @implementation body */
2763   omethodsign,                  /* in @implementation body, after +/- */
2764   omethodtag,                   /* after method name */
2765   omethodcolon,                 /* after method colon */
2766   omethodparm,                  /* after method parameter */
2767   oignore                       /* wait for @end */
2768 } objdef;
2769
2770
2771 /*
2772  * Use this structure to keep info about the token read, and how it
2773  * should be tagged.  Used by the make_C_tag function to build a tag.
2774  */
2775 static struct tok
2776 {
2777   char *line;                   /* string containing the token */
2778   int offset;                   /* where the token starts in LINE */
2779   int length;                   /* token length */
2780   /*
2781     The previous members can be used to pass strings around for generic
2782     purposes.  The following ones specifically refer to creating tags.  In this
2783     case the token contained here is the pattern that will be used to create a
2784     tag.
2785   */
2786   bool valid;                   /* do not create a tag; the token should be
2787                                    invalidated whenever a state machine is
2788                                    reset prematurely */
2789   bool named;                   /* create a named tag */
2790   int lineno;                   /* source line number of tag */
2791   long linepos;                 /* source char number of tag */
2792 } token;                        /* latest token read */
2793
2794 /*
2795  * Variables and functions for dealing with nested structures.
2796  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2797  */
2798 static void pushclass_above __P((int, char *, int));
2799 static void popclass_above __P((int));
2800 static void write_classname __P((linebuffer *, char *qualifier));
2801
2802 static struct {
2803   char **cname;                 /* nested class names */
2804   int *bracelev;                /* nested class brace level */
2805   int nl;                       /* class nesting level (elements used) */
2806   int size;                     /* length of the array */
2807 } cstack;                       /* stack for nested declaration tags */
2808 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2809 #define nestlev         (cstack.nl)
2810 /* After struct keyword or in struct body, not inside a nested function. */
2811 #define instruct        (structdef == snone && nestlev > 0                      \
2812                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2813
2814 static void
2815 pushclass_above (bracelev, str, len)
2816      int bracelev;
2817      char *str;
2818      int len;
2819 {
2820   int nl;
2821
2822   popclass_above (bracelev);
2823   nl = cstack.nl;
2824   if (nl >= cstack.size)
2825     {
2826       int size = cstack.size *= 2;
2827       xrnew (cstack.cname, size, char *);
2828       xrnew (cstack.bracelev, size, int);
2829     }
2830   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2831   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2832   cstack.bracelev[nl] = bracelev;
2833   cstack.nl = nl + 1;
2834 }
2835
2836 static void
2837 popclass_above (bracelev)
2838      int bracelev;
2839 {
2840   int nl;
2841
2842   for (nl = cstack.nl - 1;
2843        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2844        nl--)
2845     {
2846       free (cstack.cname[nl]);
2847       cstack.nl = nl;
2848     }
2849 }
2850
2851 static void
2852 write_classname (cn, qualifier)
2853      linebuffer *cn;
2854      char *qualifier;
2855 {
2856   int i, len;
2857   int qlen = strlen (qualifier);
2858
2859   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2860     {
2861       len = 0;
2862       cn->len = 0;
2863       cn->buffer[0] = '\0';
2864     }
2865   else
2866     {
2867       len = strlen (cstack.cname[0]);
2868       linebuffer_setlen (cn, len);
2869       strcpy (cn->buffer, cstack.cname[0]);
2870     }
2871   for (i = 1; i < cstack.nl; i++)
2872     {
2873       char *s;
2874       int slen;
2875
2876       s = cstack.cname[i];
2877       if (s == NULL)
2878         continue;
2879       slen = strlen (s);
2880       len += slen + qlen;
2881       linebuffer_setlen (cn, len);
2882       strncat (cn->buffer, qualifier, qlen);
2883       strncat (cn->buffer, s, slen);
2884     }
2885 }
2886
2887 \f
2888 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2889 static void make_C_tag __P((bool));
2890
2891 /*
2892  * consider_token ()
2893  *      checks to see if the current token is at the start of a
2894  *      function or variable, or corresponds to a typedef, or
2895  *      is a struct/union/enum tag, or #define, or an enum constant.
2896  *
2897  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2898  *      with args.  C_EXTP points to which language we are looking at.
2899  *
2900  * Globals
2901  *      fvdef                   IN OUT
2902  *      structdef               IN OUT
2903  *      definedef               IN OUT
2904  *      typdef                  IN OUT
2905  *      objdef                  IN OUT
2906  */
2907
2908 static bool
2909 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2910      register char *str;        /* IN: token pointer */
2911      register int len;          /* IN: token length */
2912      register int c;            /* IN: first char after the token */
2913      int *c_extp;               /* IN, OUT: C extensions mask */
2914      int bracelev;              /* IN: brace level */
2915      int parlev;                /* IN: parenthesis level */
2916      bool *is_func_or_var;      /* OUT: function or variable found */
2917 {
2918   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2919      structtype is the type of the preceding struct-like keyword, and
2920      structbracelev is the brace level where it has been seen. */
2921   static enum sym_type structtype;
2922   static int structbracelev;
2923   static enum sym_type toktype;
2924
2925
2926   toktype = C_symtype (str, len, *c_extp);
2927
2928   /*
2929    * Skip __attribute__
2930    */
2931   if (toktype == st_C_attribute)
2932     {
2933       inattribute = TRUE;
2934       return FALSE;
2935      }
2936
2937    /*
2938     * Advance the definedef state machine.
2939     */
2940    switch (definedef)
2941      {
2942      case dnone:
2943        /* We're not on a preprocessor line. */
2944        if (toktype == st_C_gnumacro)
2945          {
2946            fvdef = fdefunkey;
2947            return FALSE;
2948          }
2949        break;
2950      case dsharpseen:
2951        if (toktype == st_C_define)
2952          {
2953            definedef = ddefineseen;
2954          }
2955        else
2956          {
2957            definedef = dignorerest;
2958          }
2959        return FALSE;
2960      case ddefineseen:
2961        /*
2962         * Make a tag for any macro, unless it is a constant
2963         * and constantypedefs is FALSE.
2964         */
2965        definedef = dignorerest;
2966        *is_func_or_var = (c == '(');
2967        if (!*is_func_or_var && !constantypedefs)
2968          return FALSE;
2969        else
2970          return TRUE;
2971      case dignorerest:
2972        return FALSE;
2973      default:
2974        error ("internal error: definedef value.", (char *)NULL);
2975      }
2976
2977    /*
2978     * Now typedefs
2979     */
2980    switch (typdef)
2981      {
2982      case tnone:
2983        if (toktype == st_C_typedef)
2984          {
2985            if (typedefs)
2986              typdef = tkeyseen;
2987            fvextern = FALSE;
2988            fvdef = fvnone;
2989            return FALSE;
2990          }
2991        break;
2992      case tkeyseen:
2993        switch (toktype)
2994          {
2995          case st_none:
2996          case st_C_class:
2997          case st_C_struct:
2998          case st_C_enum:
2999            typdef = ttypeseen;
3000          }
3001        break;
3002      case ttypeseen:
3003        if (structdef == snone && fvdef == fvnone)
3004          {
3005            fvdef = fvnameseen;
3006            return TRUE;
3007          }
3008        break;
3009      case tend:
3010        switch (toktype)
3011          {
3012          case st_C_class:
3013          case st_C_struct:
3014          case st_C_enum:
3015            return FALSE;
3016          }
3017        return TRUE;
3018      }
3019
3020    switch (toktype)
3021      {
3022      case st_C_javastruct:
3023        if (structdef == stagseen)
3024          structdef = scolonseen;
3025        return FALSE;
3026      case st_C_template:
3027      case st_C_class:
3028        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
3029            && bracelev == 0
3030            && definedef == dnone && structdef == snone
3031            && typdef == tnone && fvdef == fvnone)
3032          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3033        if (toktype == st_C_template)
3034          break;
3035        /* FALLTHRU */
3036      case st_C_struct:
3037      case st_C_enum:
3038        if (parlev == 0
3039            && fvdef != vignore
3040            && (typdef == tkeyseen
3041                || (typedefs_or_cplusplus && structdef == snone)))
3042          {
3043            structdef = skeyseen;
3044            structtype = toktype;
3045            structbracelev = bracelev;
3046            if (fvdef == fvnameseen)
3047              fvdef = fvnone;
3048          }
3049        return FALSE;
3050      }
3051
3052    if (structdef == skeyseen)
3053      {
3054        structdef = stagseen;
3055        return TRUE;
3056      }
3057
3058    if (typdef != tnone)
3059      definedef = dnone;
3060
3061    /* Detect Objective C constructs. */
3062    switch (objdef)
3063      {
3064      case onone:
3065        switch (toktype)
3066          {
3067          case st_C_objprot:
3068            objdef = oprotocol;
3069            return FALSE;
3070          case st_C_objimpl:
3071            objdef = oimplementation;
3072            return FALSE;
3073          }
3074        break;
3075      case oimplementation:
3076        /* Save the class tag for functions or variables defined inside. */
3077        objtag = savenstr (str, len);
3078        objdef = oinbody;
3079        return FALSE;
3080      case oprotocol:
3081        /* Save the class tag for categories. */
3082        objtag = savenstr (str, len);
3083        objdef = otagseen;
3084        *is_func_or_var = TRUE;
3085        return TRUE;
3086      case oparenseen:
3087        objdef = ocatseen;
3088        *is_func_or_var = TRUE;
3089        return TRUE;
3090      case oinbody:
3091        break;
3092      case omethodsign:
3093        if (parlev == 0)
3094          {
3095            fvdef = fvnone;
3096            objdef = omethodtag;
3097            linebuffer_setlen (&token_name, len);
3098            strncpy (token_name.buffer, str, len);
3099            token_name.buffer[len] = '\0';
3100            return TRUE;
3101          }
3102        return FALSE;
3103      case omethodcolon:
3104        if (parlev == 0)
3105          objdef = omethodparm;
3106        return FALSE;
3107      case omethodparm:
3108        if (parlev == 0)
3109          {
3110            fvdef = fvnone;
3111            objdef = omethodtag;
3112            linebuffer_setlen (&token_name, token_name.len + len);
3113            strncat (token_name.buffer, str, len);
3114            return TRUE;
3115          }
3116        return FALSE;
3117      case oignore:
3118        if (toktype == st_C_objend)
3119          {
3120            /* Memory leakage here: the string pointed by objtag is
3121               never released, because many tests would be needed to
3122               avoid breaking on incorrect input code.  The amount of
3123               memory leaked here is the sum of the lengths of the
3124               class tags.
3125            free (objtag); */
3126            objdef = onone;
3127          }
3128        return FALSE;
3129      }
3130
3131    /* A function, variable or enum constant? */
3132    switch (toktype)
3133      {
3134      case st_C_extern:
3135        fvextern = TRUE;
3136        switch  (fvdef)
3137          {
3138          case finlist:
3139          case flistseen:
3140          case fignore:
3141          case vignore:
3142            break;
3143          default:
3144            fvdef = fvnone;
3145          }
3146        return FALSE;
3147      case st_C_ignore:
3148        fvextern = FALSE;
3149        fvdef = vignore;
3150        return FALSE;
3151      case st_C_operator:
3152        fvdef = foperator;
3153        *is_func_or_var = TRUE;
3154        return TRUE;
3155      case st_none:
3156        if (constantypedefs
3157            && structdef == snone
3158            && structtype == st_C_enum && bracelev > structbracelev)
3159          return TRUE;           /* enum constant */
3160        switch (fvdef)
3161          {
3162          case fdefunkey:
3163            if (bracelev > 0)
3164              break;
3165            fvdef = fdefunname;  /* GNU macro */
3166            *is_func_or_var = TRUE;
3167            return TRUE;
3168          case fvnone:
3169            switch (typdef)
3170              {
3171              case ttypeseen:
3172                return FALSE;
3173              case tnone:
3174                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3175                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3176                  {
3177                    fvdef = vignore;
3178                    return FALSE;
3179                  }
3180                break;
3181              }
3182           /* FALLTHRU */
3183           case fvnameseen:
3184           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3185             {
3186               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3187                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3188               fvdef = foperator;
3189               *is_func_or_var = TRUE;
3190               return TRUE;
3191             }
3192           if (bracelev > 0 && !instruct)
3193             break;
3194           fvdef = fvnameseen;   /* function or variable */
3195           *is_func_or_var = TRUE;
3196           return TRUE;
3197         }
3198       break;
3199     }
3200
3201   return FALSE;
3202 }
3203
3204 \f
3205 /*
3206  * C_entries often keeps pointers to tokens or lines which are older than
3207  * the line currently read.  By keeping two line buffers, and switching
3208  * them at end of line, it is possible to use those pointers.
3209  */
3210 static struct
3211 {
3212   long linepos;
3213   linebuffer lb;
3214 } lbs[2];
3215
3216 #define current_lb_is_new (newndx == curndx)
3217 #define switch_line_buffers() (curndx = 1 - curndx)
3218
3219 #define curlb (lbs[curndx].lb)
3220 #define newlb (lbs[newndx].lb)
3221 #define curlinepos (lbs[curndx].linepos)
3222 #define newlinepos (lbs[newndx].linepos)
3223
3224 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3225 #define cplpl (c_ext & C_PLPL)
3226 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3227
3228 #define CNL_SAVE_DEFINEDEF()                                            \
3229 do {                                                                    \
3230   curlinepos = charno;                                                  \
3231   readline (&curlb, inf);                                               \
3232   lp = curlb.buffer;                                                    \
3233   quotednl = FALSE;                                                     \
3234   newndx = curndx;                                                      \
3235 } while (0)
3236
3237 #define CNL()                                                           \
3238 do {                                                                    \
3239   CNL_SAVE_DEFINEDEF();                                                 \
3240   if (savetoken.valid)                                                  \
3241     {                                                                   \
3242       token = savetoken;                                                \
3243       savetoken.valid = FALSE;                                          \
3244     }                                                                   \
3245   definedef = dnone;                                                    \
3246 } while (0)
3247
3248
3249 static void
3250 make_C_tag (isfun)
3251      bool isfun;
3252 {
3253   /* This function is never called when token.valid is FALSE, but
3254      we must protect against invalid input or internal errors. */
3255   if (token.valid)
3256     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3257               token.offset+token.length+1, token.lineno, token.linepos);
3258   else if (DEBUG)
3259     {                             /* this branch is optimised away if !DEBUG */
3260       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3261                 token_name.len + 17, isfun, token.line,
3262                 token.offset+token.length+1, token.lineno, token.linepos);
3263       error ("INVALID TOKEN", NULL);
3264     }
3265
3266   token.valid = FALSE;
3267 }
3268
3269
3270 /*
3271  * C_entries ()
3272  *      This routine finds functions, variables, typedefs,
3273  *      #define's, enum constants and struct/union/enum definitions in
3274  *      C syntax and adds them to the list.
3275  */
3276 static void
3277 C_entries (c_ext, inf)
3278      int c_ext;                 /* extension of C */
3279      FILE *inf;                 /* input file */
3280 {
3281   register char c;              /* latest char read; '\0' for end of line */
3282   register char *lp;            /* pointer one beyond the character `c' */
3283   int curndx, newndx;           /* indices for current and new lb */
3284   register int tokoff;          /* offset in line of start of current token */
3285   register int toklen;          /* length of current token */
3286   char *qualifier;              /* string used to qualify names */
3287   int qlen;                     /* length of qualifier */
3288   int bracelev;                 /* current brace level */
3289   int bracketlev;               /* current bracket level */
3290   int parlev;                   /* current parenthesis level */
3291   int attrparlev;               /* __attribute__ parenthesis level */
3292   int templatelev;              /* current template level */
3293   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3294   bool incomm, inquote, inchar, quotednl, midtoken;
3295   bool yacc_rules;              /* in the rules part of a yacc file */
3296   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3297
3298
3299   linebuffer_init (&lbs[0].lb);
3300   linebuffer_init (&lbs[1].lb);
3301   if (cstack.size == 0)
3302     {
3303       cstack.size = (DEBUG) ? 1 : 4;
3304       cstack.nl = 0;
3305       cstack.cname = xnew (cstack.size, char *);
3306       cstack.bracelev = xnew (cstack.size, int);
3307     }
3308
3309   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3310   curndx = newndx = 0;
3311   lp = curlb.buffer;
3312   *lp = 0;
3313
3314   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3315   structdef = snone; definedef = dnone; objdef = onone;
3316   yacc_rules = FALSE;
3317   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3318   token.valid = savetoken.valid = FALSE;
3319   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3320   if (cjava)
3321     { qualifier = "."; qlen = 1; }
3322   else
3323     { qualifier = "::"; qlen = 2; }
3324
3325
3326   while (!feof (inf))
3327     {
3328       c = *lp++;
3329       if (c == '\\')
3330         {
3331           /* If we are at the end of the line, the next character is a
3332              '\0'; do not skip it, because it is what tells us
3333              to read the next line.  */
3334           if (*lp == '\0')
3335             {
3336               quotednl = TRUE;
3337               continue;
3338             }
3339           lp++;
3340           c = ' ';
3341         }
3342       else if (incomm)
3343         {
3344           switch (c)
3345             {
3346             case '*':
3347               if (*lp == '/')
3348                 {
3349                   c = *lp++;
3350                   incomm = FALSE;
3351                 }
3352               break;
3353             case '\0':
3354               /* Newlines inside comments do not end macro definitions in
3355                  traditional cpp. */
3356               CNL_SAVE_DEFINEDEF ();
3357               break;
3358             }
3359           continue;
3360         }
3361       else if (inquote)
3362         {
3363           switch (c)
3364             {
3365             case '"':
3366               inquote = FALSE;
3367               break;
3368             case '\0':
3369               /* Newlines inside strings do not end macro definitions
3370                  in traditional cpp, even though compilers don't
3371                  usually accept them. */
3372               CNL_SAVE_DEFINEDEF ();
3373               break;
3374             }
3375           continue;
3376         }
3377       else if (inchar)
3378         {
3379           switch (c)
3380             {
3381             case '\0':
3382               /* Hmmm, something went wrong. */
3383               CNL ();
3384               /* FALLTHRU */
3385             case '\'':
3386               inchar = FALSE;
3387               break;
3388             }
3389           continue;
3390         }
3391       else if (bracketlev > 0)
3392         {
3393           switch (c)
3394             {
3395             case ']':
3396               if (--bracketlev > 0)
3397                 continue;
3398               break;
3399             case '\0':
3400               CNL_SAVE_DEFINEDEF ();
3401               break;
3402             }
3403           continue;
3404         }
3405       else switch (c)
3406         {
3407         case '"':
3408           inquote = TRUE;
3409           if (inattribute)
3410             break;
3411           switch (fvdef)
3412             {
3413             case fdefunkey:
3414             case fstartlist:
3415             case finlist:
3416             case fignore:
3417             case vignore:
3418               break;
3419             default:
3420               fvextern = FALSE;
3421               fvdef = fvnone;
3422             }
3423           continue;
3424         case '\'':
3425           inchar = TRUE;
3426           if (inattribute)
3427             break;
3428           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3429             {
3430               fvextern = FALSE;
3431               fvdef = fvnone;
3432             }
3433           continue;
3434         case '/':
3435           if (*lp == '*')
3436             {
3437               incomm = TRUE;
3438               lp++;
3439               c = ' ';
3440             }
3441           else if (/* cplpl && */ *lp == '/')
3442             {
3443               c = '\0';
3444             }
3445           break;
3446         case '%':
3447           if ((c_ext & YACC) && *lp == '%')
3448             {
3449               /* Entering or exiting rules section in yacc file. */
3450               lp++;
3451               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3452               typdef = tnone; structdef = snone;
3453               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3454               bracelev = 0;
3455               yacc_rules = !yacc_rules;
3456               continue;
3457             }
3458           else
3459             break;
3460         case '#':
3461           if (definedef == dnone)
3462             {
3463               char *cp;
3464               bool cpptoken = TRUE;
3465
3466               /* Look back on this line.  If all blanks, or nonblanks
3467                  followed by an end of comment, this is a preprocessor
3468                  token. */
3469               for (cp = newlb.buffer; cp < lp-1; cp++)
3470                 if (!iswhite (*cp))
3471                   {
3472                     if (*cp == '*' && *(cp+1) == '/')
3473                       {
3474                         cp++;
3475                         cpptoken = TRUE;
3476                       }
3477                     else
3478                       cpptoken = FALSE;
3479                   }
3480               if (cpptoken)
3481                 definedef = dsharpseen;
3482             } /* if (definedef == dnone) */
3483           continue;
3484         case '[':
3485           bracketlev++;
3486             continue;
3487         } /* switch (c) */
3488
3489
3490       /* Consider token only if some involved conditions are satisfied. */
3491       if (typdef != tignore
3492           && definedef != dignorerest
3493           && fvdef != finlist
3494           && templatelev == 0
3495           && (definedef != dnone
3496               || structdef != scolonseen)
3497           && !inattribute)
3498         {
3499           if (midtoken)
3500             {
3501               if (endtoken (c))
3502                 {
3503                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3504                     /* This handles :: in the middle,
3505                        but not at the beginning of an identifier.
3506                        Also, space-separated :: is not recognised. */
3507                     {
3508                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3509                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3510                       lp += 2;
3511                       toklen += 2;
3512                       c = lp[-1];
3513                       goto still_in_token;
3514                     }
3515                   else
3516                     {
3517                       bool funorvar = FALSE;
3518
3519                       if (yacc_rules
3520                           || consider_token (newlb.buffer + tokoff, toklen, c,
3521                                              &c_ext, bracelev, parlev,
3522                                              &funorvar))
3523                         {
3524                           if (fvdef == foperator)
3525                             {
3526                               char *oldlp = lp;
3527                               lp = skip_spaces (lp-1);
3528                               if (*lp != '\0')
3529                                 lp += 1;
3530                               while (*lp != '\0'
3531                                      && !iswhite (*lp) && *lp != '(')
3532                                 lp += 1;
3533                               c = *lp++;
3534                               toklen += lp - oldlp;
3535                             }
3536                           token.named = FALSE;
3537                           if (!plainc
3538                               && nestlev > 0 && definedef == dnone)
3539                             /* in struct body */
3540                             {
3541                               write_classname (&token_name, qualifier);
3542                               linebuffer_setlen (&token_name,
3543                                                  token_name.len+qlen+toklen);
3544                               strcat (token_name.buffer, qualifier);
3545                               strncat (token_name.buffer,
3546                                        newlb.buffer + tokoff, toklen);
3547                               token.named = TRUE;
3548                             }
3549                           else if (objdef == ocatseen)
3550                             /* Objective C category */
3551                             {
3552                               int len = strlen (objtag) + 2 + toklen;
3553                               linebuffer_setlen (&token_name, len);
3554                               strcpy (token_name.buffer, objtag);
3555                               strcat (token_name.buffer, "(");
3556                               strncat (token_name.buffer,
3557                                        newlb.buffer + tokoff, toklen);
3558                               strcat (token_name.buffer, ")");
3559                               token.named = TRUE;
3560                             }
3561                           else if (objdef == omethodtag
3562                                    || objdef == omethodparm)
3563                             /* Objective C method */
3564                             {
3565                               token.named = TRUE;
3566                             }
3567                           else if (fvdef == fdefunname)
3568                             /* GNU DEFUN and similar macros */
3569                             {
3570                               bool defun = (newlb.buffer[tokoff] == 'F');
3571                               int off = tokoff;
3572                               int len = toklen;
3573
3574                               /* Rewrite the tag so that emacs lisp DEFUNs
3575                                  can be found by their elisp name */
3576                               if (defun)
3577                                 {
3578                                   off += 1;
3579                                   len -= 1;
3580                                 }
3581                               linebuffer_setlen (&token_name, len);
3582                               strncpy (token_name.buffer,
3583                                        newlb.buffer + off, len);
3584                               token_name.buffer[len] = '\0';
3585                               if (defun)
3586                                 while (--len >= 0)
3587                                   if (token_name.buffer[len] == '_')
3588                                     token_name.buffer[len] = '-';
3589                               token.named = defun;
3590                             }
3591                           else
3592                             {
3593                               linebuffer_setlen (&token_name, toklen);
3594                               strncpy (token_name.buffer,
3595                                        newlb.buffer + tokoff, toklen);
3596                               token_name.buffer[toklen] = '\0';
3597                               /* Name macros and members. */
3598                               token.named = (structdef == stagseen
3599                                              || typdef == ttypeseen
3600                                              || typdef == tend
3601                                              || (funorvar
3602                                                  && definedef == dignorerest)
3603                                              || (funorvar
3604                                                  && definedef == dnone
3605                                                  && structdef == snone
3606                                                  && bracelev > 0));
3607                             }
3608                           token.lineno = lineno;
3609                           token.offset = tokoff;
3610                           token.length = toklen;
3611                           token.line = newlb.buffer;
3612                           token.linepos = newlinepos;
3613                           token.valid = TRUE;
3614
3615                           if (definedef == dnone
3616                               && (fvdef == fvnameseen
3617                                   || fvdef == foperator
3618                                   || structdef == stagseen
3619                                   || typdef == tend
3620                                   || typdef == ttypeseen
3621                                   || objdef != onone))
3622                             {
3623                               if (current_lb_is_new)
3624                                 switch_line_buffers ();
3625                             }
3626                           else if (definedef != dnone
3627                                    || fvdef == fdefunname
3628                                    || instruct)
3629                             make_C_tag (funorvar);
3630                         }
3631                       else /* not yacc and consider_token failed */
3632                         {
3633                           if (inattribute && fvdef == fignore)
3634                             {
3635                               /* We have just met __attribute__ after a
3636                                  function parameter list: do not tag the
3637                                  function again. */
3638                               fvdef = fvnone;
3639                             }
3640                         }
3641                       midtoken = FALSE;
3642                     }
3643                 } /* if (endtoken (c)) */
3644               else if (intoken (c))
3645                 still_in_token:
3646                 {
3647                   toklen++;
3648                   continue;
3649                 }
3650             } /* if (midtoken) */
3651           else if (begtoken (c))
3652             {
3653               switch (definedef)
3654                 {
3655                 case dnone:
3656                   switch (fvdef)
3657                     {
3658                     case fstartlist:
3659                       /* This prevents tagging fb in
3660                          void (__attribute__((noreturn)) *fb) (void);
3661                          Fixing this is not easy and not very important. */
3662                       fvdef = finlist;
3663                       continue;
3664                     case flistseen:
3665                       if (plainc || declarations)
3666                         {
3667                           make_C_tag (TRUE); /* a function */
3668                           fvdef = fignore;
3669                         }
3670                       break;
3671                     }
3672                   if (structdef == stagseen && !cjava)
3673                     {
3674                       popclass_above (bracelev);
3675                       structdef = snone;
3676                     }
3677                   break;
3678                 case dsharpseen:
3679                   savetoken = token;
3680                   break;
3681                 }
3682               if (!yacc_rules || lp == newlb.buffer + 1)
3683                 {
3684                   tokoff = lp - 1 - newlb.buffer;
3685                   toklen = 1;
3686                   midtoken = TRUE;
3687                 }
3688               continue;
3689             } /* if (begtoken) */
3690         } /* if must look at token */
3691
3692
3693       /* Detect end of line, colon, comma, semicolon and various braces
3694          after having handled a token.*/
3695       switch (c)
3696         {
3697         case ':':
3698           if (inattribute)
3699             break;
3700           if (yacc_rules && token.offset == 0 && token.valid)
3701             {
3702               make_C_tag (FALSE); /* a yacc function */
3703               break;
3704             }
3705           if (definedef != dnone)
3706             break;
3707           switch (objdef)
3708             {
3709             case  otagseen:
3710               objdef = oignore;
3711               make_C_tag (TRUE); /* an Objective C class */
3712               break;
3713             case omethodtag:
3714             case omethodparm:
3715               objdef = omethodcolon;
3716               linebuffer_setlen (&token_name, token_name.len + 1);
3717               strcat (token_name.buffer, ":");
3718               break;
3719             }
3720           if (structdef == stagseen)
3721             {
3722               structdef = scolonseen;
3723               break;
3724             }
3725           /* Should be useless, but may be work as a safety net. */
3726           if (cplpl && fvdef == flistseen)
3727             {
3728               make_C_tag (TRUE); /* a function */
3729               fvdef = fignore;
3730               break;
3731             }
3732           break;
3733         case ';':
3734           if (definedef != dnone || inattribute)
3735             break;
3736           switch (typdef)
3737             {
3738             case tend:
3739             case ttypeseen:
3740               make_C_tag (FALSE); /* a typedef */
3741               typdef = tnone;
3742               fvdef = fvnone;
3743               break;
3744             case tnone:
3745             case tinbody:
3746             case tignore:
3747               switch (fvdef)
3748                 {
3749                 case fignore:
3750                   if (typdef == tignore || cplpl)
3751                     fvdef = fvnone;
3752                   break;
3753                 case fvnameseen:
3754                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3755                       || (members && instruct))
3756                     make_C_tag (FALSE); /* a variable */
3757                   fvextern = FALSE;
3758                   fvdef = fvnone;
3759                   token.valid = FALSE;
3760                   break;
3761                 case flistseen:
3762                   if ((declarations
3763                        && (cplpl || !instruct)
3764                        && (typdef == tnone || (typdef != tignore && instruct)))
3765                       || (members
3766                           && plainc && instruct))
3767                     make_C_tag (TRUE);  /* a function */
3768                   /* FALLTHRU */
3769                 default:
3770                   fvextern = FALSE;
3771                   fvdef = fvnone;
3772                   if (declarations
3773                        && cplpl && structdef == stagseen)
3774                     make_C_tag (FALSE); /* forward declaration */
3775                   else
3776                     token.valid = FALSE;
3777                 } /* switch (fvdef) */
3778               /* FALLTHRU */
3779             default:
3780               if (!instruct)
3781                 typdef = tnone;
3782             }
3783           if (structdef == stagseen)
3784             structdef = snone;
3785           break;
3786         case ',':
3787           if (definedef != dnone || inattribute)
3788             break;
3789           switch (objdef)
3790             {
3791             case omethodtag:
3792             case omethodparm:
3793               make_C_tag (TRUE); /* an Objective C method */
3794               objdef = oinbody;
3795               break;
3796             }
3797           switch (fvdef)
3798             {
3799             case fdefunkey:
3800             case foperator:
3801             case fstartlist:
3802             case finlist:
3803             case fignore:
3804             case vignore:
3805               break;
3806             case fdefunname:
3807               fvdef = fignore;
3808               break;
3809             case fvnameseen:
3810               if (parlev == 0
3811                   && ((globals
3812                        && bracelev == 0
3813                        && templatelev == 0
3814                        && (!fvextern || declarations))
3815                       || (members && instruct)))
3816                   make_C_tag (FALSE); /* a variable */
3817               break;
3818             case flistseen:
3819               if ((declarations && typdef == tnone && !instruct)
3820                   || (members && typdef != tignore && instruct))
3821                 {
3822                   make_C_tag (TRUE); /* a function */
3823                   fvdef = fvnameseen;
3824                 }
3825               else if (!declarations)
3826                 fvdef = fvnone;
3827               token.valid = FALSE;
3828               break;
3829             default:
3830               fvdef = fvnone;
3831             }
3832           if (structdef == stagseen)
3833             structdef = snone;
3834           break;
3835         case ']':
3836           if (definedef != dnone || inattribute)
3837             break;
3838           if (structdef == stagseen)
3839             structdef = snone;
3840           switch (typdef)
3841             {
3842             case ttypeseen:
3843             case tend:
3844               typdef = tignore;
3845               make_C_tag (FALSE);       /* a typedef */
3846               break;
3847             case tnone:
3848             case tinbody:
3849               switch (fvdef)
3850                 {
3851                 case foperator:
3852                 case finlist:
3853                 case fignore:
3854                 case vignore:
3855                   break;
3856                 case fvnameseen:
3857                   if ((members && bracelev == 1)
3858                       || (globals && bracelev == 0
3859                           && (!fvextern || declarations)))
3860                     make_C_tag (FALSE); /* a variable */
3861                   /* FALLTHRU */
3862                 default:
3863                   fvdef = fvnone;
3864                 }
3865               break;
3866             }
3867           break;
3868         case '(':
3869           if (inattribute)
3870             {
3871               attrparlev++;
3872               break;
3873             }
3874           if (definedef != dnone)
3875             break;
3876           if (objdef == otagseen && parlev == 0)
3877             objdef = oparenseen;
3878           switch (fvdef)
3879             {
3880             case fvnameseen:
3881               if (typdef == ttypeseen
3882                   && *lp != '*'
3883                   && !instruct)
3884                 {
3885                   /* This handles constructs like:
3886                      typedef void OperatorFun (int fun); */
3887                   make_C_tag (FALSE);
3888                   typdef = tignore;
3889                   fvdef = fignore;
3890                   break;
3891                 }
3892               /* FALLTHRU */
3893             case foperator:
3894               fvdef = fstartlist;
3895               break;
3896             case flistseen:
3897               fvdef = finlist;
3898               break;
3899             }
3900           parlev++;
3901           break;
3902         case ')':
3903           if (inattribute)
3904             {
3905               if (--attrparlev == 0)
3906                 inattribute = FALSE;
3907               break;
3908             }
3909           if (definedef != dnone)
3910             break;
3911           if (objdef == ocatseen && parlev == 1)
3912             {
3913               make_C_tag (TRUE); /* an Objective C category */
3914               objdef = oignore;
3915             }
3916           if (--parlev == 0)
3917             {
3918               switch (fvdef)
3919                 {
3920                 case fstartlist:
3921                 case finlist:
3922                   fvdef = flistseen;
3923                   break;
3924                 }
3925               if (!instruct
3926                   && (typdef == tend
3927                       || typdef == ttypeseen))
3928                 {
3929                   typdef = tignore;
3930                   make_C_tag (FALSE); /* a typedef */
3931                 }
3932             }
3933           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3934             parlev = 0;
3935           break;
3936         case '{':
3937           if (definedef != dnone)
3938             break;
3939           if (typdef == ttypeseen)
3940             {
3941               /* Whenever typdef is set to tinbody (currently only
3942                  here), typdefbracelev should be set to bracelev. */
3943               typdef = tinbody;
3944               typdefbracelev = bracelev;
3945             }
3946           switch (fvdef)
3947             {
3948             case flistseen:
3949               make_C_tag (TRUE);    /* a function */
3950               /* FALLTHRU */
3951             case fignore:
3952               fvdef = fvnone;
3953               break;
3954             case fvnone:
3955               switch (objdef)
3956                 {
3957                 case otagseen:
3958                   make_C_tag (TRUE); /* an Objective C class */
3959                   objdef = oignore;
3960                   break;
3961                 case omethodtag:
3962                 case omethodparm:
3963                   make_C_tag (TRUE); /* an Objective C method */
3964                   objdef = oinbody;
3965                   break;
3966                 default:
3967                   /* Neutralize `extern "C" {' grot. */
3968                   if (bracelev == 0 && structdef == snone && nestlev == 0
3969                       && typdef == tnone)
3970                     bracelev = -1;
3971                 }
3972               break;
3973             }
3974           switch (structdef)
3975             {
3976             case skeyseen:         /* unnamed struct */
3977               pushclass_above (bracelev, NULL, 0);
3978               structdef = snone;
3979               break;
3980             case stagseen:         /* named struct or enum */
3981             case scolonseen:       /* a class */
3982               pushclass_above (bracelev,token.line+token.offset, token.length);
3983               structdef = snone;
3984               make_C_tag (FALSE);  /* a struct or enum */
3985               break;
3986             }
3987           bracelev += 1;
3988           break;
3989         case '*':
3990           if (definedef != dnone)
3991             break;
3992           if (fvdef == fstartlist)
3993             {
3994               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3995               token.valid = FALSE;
3996             }
3997           break;
3998         case '}':
3999           if (definedef != dnone)
4000             break;
4001           bracelev -= 1;
4002           if (!ignoreindent && lp == newlb.buffer + 1)
4003             {
4004               if (bracelev != 0)
4005                 token.valid = FALSE; /* unexpected value, token unreliable */
4006               bracelev = 0;     /* reset brace level if first column */
4007               parlev = 0;       /* also reset paren level, just in case... */
4008             }
4009           else if (bracelev < 0)
4010             {
4011               token.valid = FALSE; /* something gone amiss, token unreliable */
4012               bracelev = 0;
4013             }
4014           if (bracelev == 0 && fvdef == vignore)
4015             fvdef = fvnone;             /* end of function */
4016           popclass_above (bracelev);
4017           structdef = snone;
4018           /* Only if typdef == tinbody is typdefbracelev significant. */
4019           if (typdef == tinbody && bracelev <= typdefbracelev)
4020             {
4021               assert (bracelev == typdefbracelev);
4022               typdef = tend;
4023             }
4024           break;
4025         case '=':
4026           if (definedef != dnone)
4027             break;
4028           switch (fvdef)
4029             {
4030             case foperator:
4031             case finlist:
4032             case fignore:
4033             case vignore:
4034               break;
4035             case fvnameseen:
4036               if ((members && bracelev == 1)
4037                   || (globals && bracelev == 0 && (!fvextern || declarations)))
4038                 make_C_tag (FALSE); /* a variable */
4039               /* FALLTHRU */
4040             default:
4041               fvdef = vignore;
4042             }
4043           break;
4044         case '<':
4045           if (cplpl
4046               && (structdef == stagseen || fvdef == fvnameseen))
4047             {
4048               templatelev++;
4049               break;
4050             }
4051           goto resetfvdef;
4052         case '>':
4053           if (templatelev > 0)
4054             {
4055               templatelev--;
4056               break;
4057             }
4058           goto resetfvdef;
4059         case '+':
4060         case '-':
4061           if (objdef == oinbody && bracelev == 0)
4062             {
4063               objdef = omethodsign;
4064               break;
4065             }
4066           /* FALLTHRU */
4067         resetfvdef:
4068         case '#': case '~': case '&': case '%': case '/':
4069         case '|': case '^': case '!': case '.': case '?':
4070           if (definedef != dnone)
4071             break;
4072           /* These surely cannot follow a function tag in C. */
4073           switch (fvdef)
4074             {
4075             case foperator:
4076             case finlist:
4077             case fignore:
4078             case vignore:
4079               break;
4080             default:
4081               fvdef = fvnone;
4082             }
4083           break;
4084         case '\0':
4085           if (objdef == otagseen)
4086             {
4087               make_C_tag (TRUE); /* an Objective C class */
4088               objdef = oignore;
4089             }
4090           /* If a macro spans multiple lines don't reset its state. */
4091           if (quotednl)
4092             CNL_SAVE_DEFINEDEF ();
4093           else
4094             CNL ();
4095           break;
4096         } /* switch (c) */
4097
4098     } /* while not eof */
4099
4100   free (lbs[0].lb.buffer);
4101   free (lbs[1].lb.buffer);
4102 }
4103
4104 /*
4105  * Process either a C++ file or a C file depending on the setting
4106  * of a global flag.
4107  */
4108 static void
4109 default_C_entries (inf)
4110      FILE *inf;
4111 {
4112   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4113 }
4114
4115 /* Always do plain C. */
4116 static void
4117 plain_C_entries (inf)
4118      FILE *inf;
4119 {
4120   C_entries (0, inf);
4121 }
4122
4123 /* Always do C++. */
4124 static void
4125 Cplusplus_entries (inf)
4126      FILE *inf;
4127 {
4128   C_entries (C_PLPL, inf);
4129 }
4130
4131 /* Always do Java. */
4132 static void
4133 Cjava_entries (inf)
4134      FILE *inf;
4135 {
4136   C_entries (C_JAVA, inf);
4137 }
4138
4139 /* Always do C*. */
4140 static void
4141 Cstar_entries (inf)
4142      FILE *inf;
4143 {
4144   C_entries (C_STAR, inf);
4145 }
4146
4147 /* Always do Yacc. */
4148 static void
4149 Yacc_entries (inf)
4150      FILE *inf;
4151 {
4152   C_entries (YACC, inf);
4153 }
4154
4155 \f
4156 /* Useful macros. */
4157 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4158   for (;                        /* loop initialization */               \
4159        !feof (file_pointer)     /* loop test */                         \
4160        &&                       /* instructions at start of loop */     \
4161           (readline (&line_buffer, file_pointer),                       \
4162            char_pointer = line_buffer.buffer,                           \
4163            TRUE);                                                       \
4164       )
4165
4166 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4167   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4168    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4169    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4170    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4171
4172 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4173 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4174   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4175    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4176    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4177
4178 /*
4179  * Read a file, but do no processing.  This is used to do regexp
4180  * matching on files that have no language defined.
4181  */
4182 static void
4183 just_read_file (inf)
4184      FILE *inf;
4185 {
4186   register char *dummy;
4187
4188   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4189     continue;
4190 }
4191
4192 \f
4193 /* Fortran parsing */
4194
4195 static void F_takeprec __P((void));
4196 static void F_getit __P((FILE *));
4197
4198 static void
4199 F_takeprec ()
4200 {
4201   dbp = skip_spaces (dbp);
4202   if (*dbp != '*')
4203     return;
4204   dbp++;
4205   dbp = skip_spaces (dbp);
4206   if (strneq (dbp, "(*)", 3))
4207     {
4208       dbp += 3;
4209       return;
4210     }
4211   if (!ISDIGIT (*dbp))
4212     {
4213       --dbp;                    /* force failure */
4214       return;
4215     }
4216   do
4217     dbp++;
4218   while (ISDIGIT (*dbp));
4219 }
4220
4221 static void
4222 F_getit (inf)
4223      FILE *inf;
4224 {
4225   register char *cp;
4226
4227   dbp = skip_spaces (dbp);
4228   if (*dbp == '\0')
4229     {
4230       readline (&lb, inf);
4231       dbp = lb.buffer;
4232       if (dbp[5] != '&')
4233         return;
4234       dbp += 6;
4235       dbp = skip_spaces (dbp);
4236     }
4237   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4238     return;
4239   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4240     continue;
4241   make_tag (dbp, cp-dbp, TRUE,
4242             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4243 }
4244
4245
4246 static void
4247 Fortran_functions (inf)
4248      FILE *inf;
4249 {
4250   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4251     {
4252       if (*dbp == '%')
4253         dbp++;                  /* Ratfor escape to fortran */
4254       dbp = skip_spaces (dbp);
4255       if (*dbp == '\0')
4256         continue;
4257       switch (lowcase (*dbp))
4258         {
4259         case 'i':
4260           if (nocase_tail ("integer"))
4261             F_takeprec ();
4262           break;
4263         case 'r':
4264           if (nocase_tail ("real"))
4265             F_takeprec ();
4266           break;
4267         case 'l':
4268           if (nocase_tail ("logical"))
4269             F_takeprec ();
4270           break;
4271         case 'c':
4272           if (nocase_tail ("complex") || nocase_tail ("character"))
4273             F_takeprec ();
4274           break;
4275         case 'd':
4276           if (nocase_tail ("double"))
4277             {
4278               dbp = skip_spaces (dbp);
4279               if (*dbp == '\0')
4280                 continue;
4281               if (nocase_tail ("precision"))
4282                 break;
4283               continue;
4284             }
4285           break;
4286         }
4287       dbp = skip_spaces (dbp);
4288       if (*dbp == '\0')
4289         continue;
4290       switch (lowcase (*dbp))
4291         {
4292         case 'f':
4293           if (nocase_tail ("function"))
4294             F_getit (inf);
4295           continue;
4296         case 's':
4297           if (nocase_tail ("subroutine"))
4298             F_getit (inf);
4299           continue;
4300         case 'e':
4301           if (nocase_tail ("entry"))
4302             F_getit (inf);
4303           continue;
4304         case 'b':
4305           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4306             {
4307               dbp = skip_spaces (dbp);
4308               if (*dbp == '\0') /* assume un-named */
4309                 make_tag ("blockdata", 9, TRUE,
4310                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4311               else
4312                 F_getit (inf);  /* look for name */
4313             }
4314           continue;
4315         }
4316     }
4317 }
4318
4319 \f
4320 /*
4321  * Ada parsing
4322  * Original code by
4323  * Philippe Waroquiers (1998)
4324  */
4325
4326 static void Ada_getit __P((FILE *, char *));
4327
4328 /* Once we are positioned after an "interesting" keyword, let's get
4329    the real tag value necessary. */
4330 static void
4331 Ada_getit (inf, name_qualifier)
4332      FILE *inf;
4333      char *name_qualifier;
4334 {
4335   register char *cp;
4336   char *name;
4337   char c;
4338
4339   while (!feof (inf))
4340     {
4341       dbp = skip_spaces (dbp);
4342       if (*dbp == '\0'
4343           || (dbp[0] == '-' && dbp[1] == '-'))
4344         {
4345           readline (&lb, inf);
4346           dbp = lb.buffer;
4347         }
4348       switch (lowcase(*dbp))
4349         {
4350         case 'b':
4351           if (nocase_tail ("body"))
4352             {
4353               /* Skipping body of   procedure body   or   package body or ....
4354                  resetting qualifier to body instead of spec. */
4355               name_qualifier = "/b";
4356               continue;
4357             }
4358           break;
4359         case 't':
4360           /* Skipping type of   task type   or   protected type ... */
4361           if (nocase_tail ("type"))
4362             continue;
4363           break;
4364         }
4365       if (*dbp == '"')
4366         {
4367           dbp += 1;
4368           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4369             continue;
4370         }
4371       else
4372         {
4373           dbp = skip_spaces (dbp);
4374           for (cp = dbp;
4375                (*cp != '\0'
4376                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4377                cp++)
4378             continue;
4379           if (cp == dbp)
4380             return;
4381         }
4382       c = *cp;
4383       *cp = '\0';
4384       name = concat (dbp, name_qualifier, "");
4385       *cp = c;
4386       make_tag (name, strlen (name), TRUE,
4387                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4388       free (name);
4389       if (c == '"')
4390         dbp = cp + 1;
4391       return;
4392     }
4393 }
4394
4395 static void
4396 Ada_funcs (inf)
4397      FILE *inf;
4398 {
4399   bool inquote = FALSE;
4400   bool skip_till_semicolumn = FALSE;
4401
4402   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4403     {
4404       while (*dbp != '\0')
4405         {
4406           /* Skip a string i.e. "abcd". */
4407           if (inquote || (*dbp == '"'))
4408             {
4409               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4410               if (dbp != NULL)
4411                 {
4412                   inquote = FALSE;
4413                   dbp += 1;
4414                   continue;     /* advance char */
4415                 }
4416               else
4417                 {
4418                   inquote = TRUE;
4419                   break;        /* advance line */
4420                 }
4421             }
4422
4423           /* Skip comments. */
4424           if (dbp[0] == '-' && dbp[1] == '-')
4425             break;              /* advance line */
4426
4427           /* Skip character enclosed in single quote i.e. 'a'
4428              and skip single quote starting an attribute i.e. 'Image. */
4429           if (*dbp == '\'')
4430             {
4431               dbp++ ;
4432               if (*dbp != '\0')
4433                 dbp++;
4434               continue;
4435             }
4436
4437           if (skip_till_semicolumn)
4438             {
4439               if (*dbp == ';')
4440                 skip_till_semicolumn = FALSE;
4441               dbp++;
4442               continue;         /* advance char */
4443             }
4444
4445           /* Search for beginning of a token.  */
4446           if (!begtoken (*dbp))
4447             {
4448               dbp++;
4449               continue;         /* advance char */
4450             }
4451
4452           /* We are at the beginning of a token. */
4453           switch (lowcase(*dbp))
4454             {
4455             case 'f':
4456               if (!packages_only && nocase_tail ("function"))
4457                 Ada_getit (inf, "/f");
4458               else
4459                 break;          /* from switch */
4460               continue;         /* advance char */
4461             case 'p':
4462               if (!packages_only && nocase_tail ("procedure"))
4463                 Ada_getit (inf, "/p");
4464               else if (nocase_tail ("package"))
4465                 Ada_getit (inf, "/s");
4466               else if (nocase_tail ("protected")) /* protected type */
4467                 Ada_getit (inf, "/t");
4468               else
4469                 break;          /* from switch */
4470               continue;         /* advance char */
4471
4472             case 'u':
4473               if (typedefs && !packages_only && nocase_tail ("use"))
4474                 {
4475                   /* when tagging types, avoid tagging  use type Pack.Typename;
4476                      for this, we will skip everything till a ; */
4477                   skip_till_semicolumn = TRUE;
4478                   continue;     /* advance char */
4479                 }
4480
4481             case 't':
4482               if (!packages_only && nocase_tail ("task"))
4483                 Ada_getit (inf, "/k");
4484               else if (typedefs && !packages_only && nocase_tail ("type"))
4485                 {
4486                   Ada_getit (inf, "/t");
4487                   while (*dbp != '\0')
4488                     dbp += 1;
4489                 }
4490               else
4491                 break;          /* from switch */
4492               continue;         /* advance char */
4493             }
4494
4495           /* Look for the end of the token. */
4496           while (!endtoken (*dbp))
4497             dbp++;
4498
4499         } /* advance char */
4500     } /* advance line */
4501 }
4502
4503 \f
4504 /*
4505  * Unix and microcontroller assembly tag handling
4506  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4507  * Idea by Bob Weiner, Motorola Inc. (1994)
4508  */
4509 static void
4510 Asm_labels (inf)
4511      FILE *inf;
4512 {
4513   register char *cp;
4514
4515   LOOP_ON_INPUT_LINES (inf, lb, cp)
4516     {
4517       /* If first char is alphabetic or one of [_.$], test for colon
4518          following identifier. */
4519       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4520         {
4521           /* Read past label. */
4522           cp++;
4523           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4524             cp++;
4525           if (*cp == ':' || iswhite (*cp))
4526             /* Found end of label, so copy it and add it to the table. */
4527             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4528                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4529         }
4530     }
4531 }
4532
4533 \f
4534 /*
4535  * Perl support
4536  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4537  * Perl variable names: /^(my|local).../
4538  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4539  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4540  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4541  */
4542 static void
4543 Perl_functions (inf)
4544      FILE *inf;
4545 {
4546   char *package = savestr ("main"); /* current package name */
4547   register char *cp;
4548
4549   LOOP_ON_INPUT_LINES (inf, lb, cp)
4550     {
4551       cp = skip_spaces (cp);
4552
4553       if (LOOKING_AT (cp, "package"))
4554         {
4555           free (package);
4556           get_tag (cp, &package);
4557         }
4558       else if (LOOKING_AT (cp, "sub"))
4559         {
4560           char *pos;
4561           char *sp = cp;
4562
4563           while (!notinname (*cp))
4564             cp++;
4565           if (cp == sp)
4566             continue;           /* nothing found */
4567           if ((pos = etags_strchr (sp, ':')) != NULL
4568               && pos < cp && pos[1] == ':')
4569             /* The name is already qualified. */
4570             make_tag (sp, cp - sp, TRUE,
4571                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4572           else
4573             /* Qualify it. */
4574             {
4575               char savechar, *name;
4576
4577               savechar = *cp;
4578               *cp = '\0';
4579               name = concat (package, "::", sp);
4580               *cp = savechar;
4581               make_tag (name, strlen(name), TRUE,
4582                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4583               free (name);
4584             }
4585         }
4586        else if (globals)        /* only if we are tagging global vars */
4587         {
4588           /* Skip a qualifier, if any. */
4589           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4590           /* After "my" or "local", but before any following paren or space. */
4591           char *varstart = cp;
4592
4593           if (qual              /* should this be removed?  If yes, how? */
4594               && (*cp == '$' || *cp == '@' || *cp == '%'))
4595             {
4596               varstart += 1;
4597               do
4598                 cp++;
4599               while (ISALNUM (*cp) || *cp == '_');
4600             }
4601           else if (qual)
4602             {
4603               /* Should be examining a variable list at this point;
4604                  could insist on seeing an open parenthesis. */
4605               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4606                 cp++;
4607             }
4608           else
4609             continue;
4610
4611           make_tag (varstart, cp - varstart, FALSE,
4612                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4613         }
4614     }
4615   free (package);
4616 }
4617
4618
4619 /*
4620  * Python support
4621  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4622  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4623  * More ideas by seb bacon <seb@jamkit.com> (2002)
4624  */
4625 static void
4626 Python_functions (inf)
4627      FILE *inf;
4628 {
4629   register char *cp;
4630
4631   LOOP_ON_INPUT_LINES (inf, lb, cp)
4632     {
4633       cp = skip_spaces (cp);
4634       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4635         {
4636           char *name = cp;
4637           while (!notinname (*cp) && *cp != ':')
4638             cp++;
4639           make_tag (name, cp - name, TRUE,
4640                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4641         }
4642     }
4643 }
4644
4645 \f
4646 /*
4647  * PHP support
4648  * Look for:
4649  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4650  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4651  *  - /^[ \t]*define\(\"[^\"]+/
4652  * Only with --members:
4653  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4654  * Idea by Diez B. Roggisch (2001)
4655  */
4656 static void
4657 PHP_functions (inf)
4658      FILE *inf;
4659 {
4660   register char *cp, *name;
4661   bool search_identifier = FALSE;
4662
4663   LOOP_ON_INPUT_LINES (inf, lb, cp)
4664     {
4665       cp = skip_spaces (cp);
4666       name = cp;
4667       if (search_identifier
4668           && *cp != '\0')
4669         {
4670           while (!notinname (*cp))
4671             cp++;
4672           make_tag (name, cp - name, TRUE,
4673                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4674           search_identifier = FALSE;
4675         }
4676       else if (LOOKING_AT (cp, "function"))
4677         {
4678           if(*cp == '&')
4679             cp = skip_spaces (cp+1);
4680           if(*cp != '\0')
4681             {
4682               name = cp;
4683               while (!notinname (*cp))
4684                 cp++;
4685               make_tag (name, cp - name, TRUE,
4686                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4687             }
4688           else
4689             search_identifier = TRUE;
4690         }
4691       else if (LOOKING_AT (cp, "class"))
4692         {
4693           if (*cp != '\0')
4694             {
4695               name = cp;
4696               while (*cp != '\0' && !iswhite (*cp))
4697                 cp++;
4698               make_tag (name, cp - name, FALSE,
4699                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4700             }
4701           else
4702             search_identifier = TRUE;
4703         }
4704       else if (strneq (cp, "define", 6)
4705                && (cp = skip_spaces (cp+6))
4706                && *cp++ == '('
4707                && (*cp == '"' || *cp == '\''))
4708         {
4709           char quote = *cp++;
4710           name = cp;
4711           while (*cp != quote && *cp != '\0')
4712             cp++;
4713           make_tag (name, cp - name, FALSE,
4714                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4715         }
4716       else if (members
4717                && LOOKING_AT (cp, "var")
4718                && *cp == '$')
4719         {
4720           name = cp;
4721           while (!notinname(*cp))
4722             cp++;
4723           make_tag (name, cp - name, FALSE,
4724                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4725         }
4726     }
4727 }
4728
4729 \f
4730 /*
4731  * Cobol tag functions
4732  * We could look for anything that could be a paragraph name.
4733  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4734  * Idea by Corny de Souza (1993)
4735  */
4736 static void
4737 Cobol_paragraphs (inf)
4738      FILE *inf;
4739 {
4740   register char *bp, *ep;
4741
4742   LOOP_ON_INPUT_LINES (inf, lb, bp)
4743     {
4744       if (lb.len < 9)
4745         continue;
4746       bp += 8;
4747
4748       /* If eoln, compiler option or comment ignore whole line. */
4749       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4750         continue;
4751
4752       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4753         continue;
4754       if (*ep++ == '.')
4755         make_tag (bp, ep - bp, TRUE,
4756                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4757     }
4758 }
4759
4760 \f
4761 /*
4762  * Makefile support
4763  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4764  */
4765 static void
4766 Makefile_targets (inf)
4767      FILE *inf;
4768 {
4769   register char *bp;
4770
4771   LOOP_ON_INPUT_LINES (inf, lb, bp)
4772     {
4773       if (*bp == '\t' || *bp == '#')
4774         continue;
4775       while (*bp != '\0' && *bp != '=' && *bp != ':')
4776         bp++;
4777       if (*bp == ':' || (globals && *bp == '='))
4778         {
4779           /* We should detect if there is more than one tag, but we do not.
4780              We just skip initial and final spaces. */
4781           char * namestart = skip_spaces (lb.buffer);
4782           while (--bp > namestart)
4783             if (!notinname (*bp))
4784               break;
4785           make_tag (namestart, bp - namestart + 1, TRUE,
4786                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4787         }
4788     }
4789 }
4790
4791 \f
4792 /*
4793  * Pascal parsing
4794  * Original code by Mosur K. Mohan (1989)
4795  *
4796  *  Locates tags for procedures & functions.  Doesn't do any type- or
4797  *  var-definitions.  It does look for the keyword "extern" or
4798  *  "forward" immediately following the procedure statement; if found,
4799  *  the tag is skipped.
4800  */
4801 static void
4802 Pascal_functions (inf)
4803      FILE *inf;
4804 {
4805   linebuffer tline;             /* mostly copied from C_entries */
4806   long save_lcno;
4807   int save_lineno, namelen, taglen;
4808   char c, *name;
4809
4810   bool                          /* each of these flags is TRUE if: */
4811     incomment,                  /* point is inside a comment */
4812     inquote,                    /* point is inside '..' string */
4813     get_tagname,                /* point is after PROCEDURE/FUNCTION
4814                                    keyword, so next item = potential tag */
4815     found_tag,                  /* point is after a potential tag */
4816     inparms,                    /* point is within parameter-list */
4817     verify_tag;                 /* point has passed the parm-list, so the
4818                                    next token will determine whether this
4819                                    is a FORWARD/EXTERN to be ignored, or
4820                                    whether it is a real tag */
4821
4822   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4823   name = NULL;                  /* keep compiler quiet */
4824   dbp = lb.buffer;
4825   *dbp = '\0';
4826   linebuffer_init (&tline);
4827
4828   incomment = inquote = FALSE;
4829   found_tag = FALSE;            /* have a proc name; check if extern */
4830   get_tagname = FALSE;          /* found "procedure" keyword         */
4831   inparms = FALSE;              /* found '(' after "proc"            */
4832   verify_tag = FALSE;           /* check if "extern" is ahead        */
4833
4834
4835   while (!feof (inf))           /* long main loop to get next char */
4836     {
4837       c = *dbp++;
4838       if (c == '\0')            /* if end of line */
4839         {
4840           readline (&lb, inf);
4841           dbp = lb.buffer;
4842           if (*dbp == '\0')
4843             continue;
4844           if (!((found_tag && verify_tag)
4845                 || get_tagname))
4846             c = *dbp++;         /* only if don't need *dbp pointing
4847                                    to the beginning of the name of
4848                                    the procedure or function */
4849         }
4850       if (incomment)
4851         {
4852           if (c == '}')         /* within { } comments */
4853             incomment = FALSE;
4854           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4855             {
4856               dbp++;
4857               incomment = FALSE;
4858             }
4859           continue;
4860         }
4861       else if (inquote)
4862         {
4863           if (c == '\'')
4864             inquote = FALSE;
4865           continue;
4866         }
4867       else
4868         switch (c)
4869           {
4870           case '\'':
4871             inquote = TRUE;     /* found first quote */
4872             continue;
4873           case '{':             /* found open { comment */
4874             incomment = TRUE;
4875             continue;
4876           case '(':
4877             if (*dbp == '*')    /* found open (* comment */
4878               {
4879                 incomment = TRUE;
4880                 dbp++;
4881               }
4882             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4883               inparms = TRUE;
4884             continue;
4885           case ')':             /* end of parms list */
4886             if (inparms)
4887               inparms = FALSE;
4888             continue;
4889           case ';':
4890             if (found_tag && !inparms) /* end of proc or fn stmt */
4891               {
4892                 verify_tag = TRUE;
4893                 break;
4894               }
4895             continue;
4896           }
4897       if (found_tag && verify_tag && (*dbp != ' '))
4898         {
4899           /* Check if this is an "extern" declaration. */
4900           if (*dbp == '\0')
4901             continue;
4902           if (lowcase (*dbp == 'e'))
4903             {
4904               if (nocase_tail ("extern")) /* superfluous, really! */
4905                 {
4906                   found_tag = FALSE;
4907                   verify_tag = FALSE;
4908                 }
4909             }
4910           else if (lowcase (*dbp) == 'f')
4911             {
4912               if (nocase_tail ("forward")) /* check for forward reference */
4913                 {
4914                   found_tag = FALSE;
4915                   verify_tag = FALSE;
4916                 }
4917             }
4918           if (found_tag && verify_tag) /* not external proc, so make tag */
4919             {
4920               found_tag = FALSE;
4921               verify_tag = FALSE;
4922               make_tag (name, namelen, TRUE,
4923                         tline.buffer, taglen, save_lineno, save_lcno);
4924               continue;
4925             }
4926         }
4927       if (get_tagname)          /* grab name of proc or fn */
4928         {
4929           char *cp;
4930
4931           if (*dbp == '\0')
4932             continue;
4933
4934           /* Find block name. */
4935           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4936             continue;
4937
4938           /* Save all values for later tagging. */
4939           linebuffer_setlen (&tline, lb.len);
4940           strcpy (tline.buffer, lb.buffer);
4941           save_lineno = lineno;
4942           save_lcno = linecharno;
4943           name = tline.buffer + (dbp - lb.buffer);
4944           namelen = cp - dbp;
4945           taglen = cp - lb.buffer + 1;
4946
4947           dbp = cp;             /* set dbp to e-o-token */
4948           get_tagname = FALSE;
4949           found_tag = TRUE;
4950           continue;
4951
4952           /* And proceed to check for "extern". */
4953         }
4954       else if (!incomment && !inquote && !found_tag)
4955         {
4956           /* Check for proc/fn keywords. */
4957           switch (lowcase (c))
4958             {
4959             case 'p':
4960               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4961                 get_tagname = TRUE;
4962               continue;
4963             case 'f':
4964               if (nocase_tail ("unction"))
4965                 get_tagname = TRUE;
4966               continue;
4967             }
4968         }
4969     } /* while not eof */
4970
4971   free (tline.buffer);
4972 }
4973
4974 \f
4975 /*
4976  * Lisp tag functions
4977  *  look for (def or (DEF, quote or QUOTE
4978  */
4979
4980 static void L_getit __P((void));
4981
4982 static void
4983 L_getit ()
4984 {
4985   if (*dbp == '\'')             /* Skip prefix quote */
4986     dbp++;
4987   else if (*dbp == '(')
4988   {
4989     dbp++;
4990     /* Try to skip "(quote " */
4991     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4992       /* Ok, then skip "(" before name in (defstruct (foo)) */
4993       dbp = skip_spaces (dbp);
4994   }
4995   get_tag (dbp, NULL);
4996 }
4997
4998 static void
4999 Lisp_functions (inf)
5000      FILE *inf;
5001 {
5002   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5003     {
5004       if (dbp[0] != '(')
5005         continue;
5006
5007       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
5008         {
5009           dbp = skip_non_spaces (dbp);
5010           dbp = skip_spaces (dbp);
5011           L_getit ();
5012         }
5013       else
5014         {
5015           /* Check for (foo::defmumble name-defined ... */
5016           do
5017             dbp++;
5018           while (!notinname (*dbp) && *dbp != ':');
5019           if (*dbp == ':')
5020             {
5021               do
5022                 dbp++;
5023               while (*dbp == ':');
5024
5025               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
5026                 {
5027                   dbp = skip_non_spaces (dbp);
5028                   dbp = skip_spaces (dbp);
5029                   L_getit ();
5030                 }
5031             }
5032         }
5033     }
5034 }
5035
5036 \f
5037 /*
5038  * Lua script language parsing
5039  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
5040  *
5041  *  "function" and "local function" are tags if they start at column 1.
5042  */
5043 static void
5044 Lua_functions (inf)
5045      FILE *inf;
5046 {
5047   register char *bp;
5048
5049   LOOP_ON_INPUT_LINES (inf, lb, bp)
5050     {
5051       if (bp[0] != 'f' && bp[0] != 'l')
5052         continue;
5053
5054       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
5055
5056       if (LOOKING_AT (bp, "function"))
5057         get_tag (bp, NULL);
5058     }
5059 }
5060
5061 \f
5062 /*
5063  * Postscript tags
5064  * Just look for lines where the first character is '/'
5065  * Also look at "defineps" for PSWrap
5066  * Ideas by:
5067  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
5068  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
5069  */
5070 static void
5071 PS_functions (inf)
5072      FILE *inf;
5073 {
5074   register char *bp, *ep;
5075
5076   LOOP_ON_INPUT_LINES (inf, lb, bp)
5077     {
5078       if (bp[0] == '/')
5079         {
5080           for (ep = bp+1;
5081                *ep != '\0' && *ep != ' ' && *ep != '{';
5082                ep++)
5083             continue;
5084           make_tag (bp, ep - bp, TRUE,
5085                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5086         }
5087       else if (LOOKING_AT (bp, "defineps"))
5088         get_tag (bp, NULL);
5089     }
5090 }
5091
5092 \f
5093 /*
5094  * Forth tags
5095  * Ignore anything after \ followed by space or in ( )
5096  * Look for words defined by :
5097  * Look for constant, code, create, defer, value, and variable
5098  * OBP extensions:  Look for buffer:, field,
5099  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5100  */
5101 static void
5102 Forth_words (inf)
5103      FILE *inf;
5104 {
5105   register char *bp;
5106
5107   LOOP_ON_INPUT_LINES (inf, lb, bp)
5108     while ((bp = skip_spaces (bp))[0] != '\0')
5109       if (bp[0] == '\\' && iswhite(bp[1]))
5110         break;                  /* read next line */
5111       else if (bp[0] == '(' && iswhite(bp[1]))
5112         do                      /* skip to ) or eol */
5113           bp++;
5114         while (*bp != ')' && *bp != '\0');
5115       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5116                || LOOKING_AT_NOCASE (bp, "constant")
5117                || LOOKING_AT_NOCASE (bp, "code")
5118                || LOOKING_AT_NOCASE (bp, "create")
5119                || LOOKING_AT_NOCASE (bp, "defer")
5120                || LOOKING_AT_NOCASE (bp, "value")
5121                || LOOKING_AT_NOCASE (bp, "variable")
5122                || LOOKING_AT_NOCASE (bp, "buffer:")
5123                || LOOKING_AT_NOCASE (bp, "field"))
5124         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5125       else
5126         bp = skip_non_spaces (bp);
5127 }
5128
5129 \f
5130 /*
5131  * Scheme tag functions
5132  * look for (def... xyzzy
5133  *          (def... (xyzzy
5134  *          (def ... ((...(xyzzy ....
5135  *          (set! xyzzy
5136  * Original code by Ken Haase (1985?)
5137  */
5138 static void
5139 Scheme_functions (inf)
5140      FILE *inf;
5141 {
5142   register char *bp;
5143
5144   LOOP_ON_INPUT_LINES (inf, lb, bp)
5145     {
5146       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5147         {
5148           bp = skip_non_spaces (bp+4);
5149           /* Skip over open parens and white space */
5150           while (notinname (*bp))
5151             bp++;
5152           get_tag (bp, NULL);
5153         }
5154       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5155         get_tag (bp, NULL);
5156     }
5157 }
5158
5159 \f
5160 /* Find tags in TeX and LaTeX input files.  */
5161
5162 /* TEX_toktab is a table of TeX control sequences that define tags.
5163  * Each entry records one such control sequence.
5164  *
5165  * Original code from who knows whom.
5166  * Ideas by:
5167  *   Stefan Monnier (2002)
5168  */
5169
5170 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5171
5172 /* Default set of control sequences to put into TEX_toktab.
5173    The value of environment var TEXTAGS is prepended to this.  */
5174 static char *TEX_defenv = "\
5175 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5176 :part:appendix:entry:index:def\
5177 :newcommand:renewcommand:newenvironment:renewenvironment";
5178
5179 static void TEX_mode __P((FILE *));
5180 static void TEX_decode_env __P((char *, char *));
5181
5182 static char TEX_esc = '\\';
5183 static char TEX_opgrp = '{';
5184 static char TEX_clgrp = '}';
5185
5186 /*
5187  * TeX/LaTeX scanning loop.
5188  */
5189 static void
5190 TeX_commands (inf)
5191      FILE *inf;
5192 {
5193   char *cp;
5194   linebuffer *key;
5195
5196   /* Select either \ or ! as escape character.  */
5197   TEX_mode (inf);
5198
5199   /* Initialize token table once from environment. */
5200   if (TEX_toktab == NULL)
5201     TEX_decode_env ("TEXTAGS", TEX_defenv);
5202
5203   LOOP_ON_INPUT_LINES (inf, lb, cp)
5204     {
5205       /* Look at each TEX keyword in line. */
5206       for (;;)
5207         {
5208           /* Look for a TEX escape. */
5209           while (*cp++ != TEX_esc)
5210             if (cp[-1] == '\0' || cp[-1] == '%')
5211               goto tex_next_line;
5212
5213           for (key = TEX_toktab; key->buffer != NULL; key++)
5214             if (strneq (cp, key->buffer, key->len))
5215               {
5216                 register char *p;
5217                 int namelen, linelen;
5218                 bool opgrp = FALSE;
5219
5220                 cp = skip_spaces (cp + key->len);
5221                 if (*cp == TEX_opgrp)
5222                   {
5223                     opgrp = TRUE;
5224                     cp++;
5225                   }
5226                 for (p = cp;
5227                      (!iswhite (*p) && *p != '#' &&
5228                       *p != TEX_opgrp && *p != TEX_clgrp);
5229                      p++)
5230                   continue;
5231                 namelen = p - cp;
5232                 linelen = lb.len;
5233                 if (!opgrp || *p == TEX_clgrp)
5234                   {
5235                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5236                       p++;
5237                     linelen = p - lb.buffer + 1;
5238                   }
5239                 make_tag (cp, namelen, TRUE,
5240                           lb.buffer, linelen, lineno, linecharno);
5241                 goto tex_next_line; /* We only tag a line once */
5242               }
5243         }
5244     tex_next_line:
5245       ;
5246     }
5247 }
5248
5249 #define TEX_LESC '\\'
5250 #define TEX_SESC '!'
5251
5252 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5253    chars accordingly. */
5254 static void
5255 TEX_mode (inf)
5256      FILE *inf;
5257 {
5258   int c;
5259
5260   while ((c = getc (inf)) != EOF)
5261     {
5262       /* Skip to next line if we hit the TeX comment char. */
5263       if (c == '%')
5264         while (c != '\n' && c != EOF)
5265           c = getc (inf);
5266       else if (c == TEX_LESC || c == TEX_SESC )
5267         break;
5268     }
5269
5270   if (c == TEX_LESC)
5271     {
5272       TEX_esc = TEX_LESC;
5273       TEX_opgrp = '{';
5274       TEX_clgrp = '}';
5275     }
5276   else
5277     {
5278       TEX_esc = TEX_SESC;
5279       TEX_opgrp = '<';
5280       TEX_clgrp = '>';
5281     }
5282   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5283      No attempt is made to correct the situation. */
5284   rewind (inf);
5285 }
5286
5287 /* Read environment and prepend it to the default string.
5288    Build token table. */
5289 static void
5290 TEX_decode_env (evarname, defenv)
5291      char *evarname;
5292      char *defenv;
5293 {
5294   register char *env, *p;
5295   int i, len;
5296
5297   /* Append default string to environment. */
5298   env = getenv (evarname);
5299   if (!env)
5300     env = defenv;
5301   else
5302     {
5303       char *oldenv = env;
5304       env = concat (oldenv, defenv, "");
5305     }
5306
5307   /* Allocate a token table */
5308   for (len = 1, p = env; p;)
5309     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5310       len++;
5311   TEX_toktab = xnew (len, linebuffer);
5312
5313   /* Unpack environment string into token table. Be careful about */
5314   /* zero-length strings (leading ':', "::" and trailing ':') */
5315   for (i = 0; *env != '\0';)
5316     {
5317       p = etags_strchr (env, ':');
5318       if (!p)                   /* End of environment string. */
5319         p = env + strlen (env);
5320       if (p - env > 0)
5321         {                       /* Only non-zero strings. */
5322           TEX_toktab[i].buffer = savenstr (env, p - env);
5323           TEX_toktab[i].len = p - env;
5324           i++;
5325         }
5326       if (*p)
5327         env = p + 1;
5328       else
5329         {
5330           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5331           TEX_toktab[i].len = 0;
5332           break;
5333         }
5334     }
5335 }
5336
5337 \f
5338 /* Texinfo support.  Dave Love, Mar. 2000.  */
5339 static void
5340 Texinfo_nodes (inf)
5341      FILE * inf;
5342 {
5343   char *cp, *start;
5344   LOOP_ON_INPUT_LINES (inf, lb, cp)
5345     if (LOOKING_AT (cp, "@node"))
5346       {
5347         start = cp;
5348         while (*cp != '\0' && *cp != ',')
5349           cp++;
5350         make_tag (start, cp - start, TRUE,
5351                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5352       }
5353 }
5354
5355 \f
5356 /*
5357  * HTML support.
5358  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5359  * Contents of <a name=xxx> are tags with name xxx.
5360  *
5361  * Francesco Potortì, 2002.
5362  */
5363 static void
5364 HTML_labels (inf)
5365      FILE * inf;
5366 {
5367   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5368   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5369   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5370   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5371   char *end;
5372
5373
5374   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5375
5376   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5377     for (;;)                    /* loop on the same line */
5378       {
5379         if (skiptag)            /* skip HTML tag */
5380           {
5381             while (*dbp != '\0' && *dbp != '>')
5382               dbp++;
5383             if (*dbp == '>')
5384               {
5385                 dbp += 1;
5386                 skiptag = FALSE;
5387                 continue;       /* look on the same line */
5388               }
5389             break;              /* go to next line */
5390           }
5391
5392         else if (intag) /* look for "name=" or "id=" */
5393           {
5394             while (*dbp != '\0' && *dbp != '>'
5395                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5396               dbp++;
5397             if (*dbp == '\0')
5398               break;            /* go to next line */
5399             if (*dbp == '>')
5400               {
5401                 dbp += 1;
5402                 intag = FALSE;
5403                 continue;       /* look on the same line */
5404               }
5405             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5406                 || LOOKING_AT_NOCASE (dbp, "id="))
5407               {
5408                 bool quoted = (dbp[0] == '"');
5409
5410                 if (quoted)
5411                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5412                     continue;
5413                 else
5414                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5415                     continue;
5416                 linebuffer_setlen (&token_name, end - dbp);
5417                 strncpy (token_name.buffer, dbp, end - dbp);
5418                 token_name.buffer[end - dbp] = '\0';
5419
5420                 dbp = end;
5421                 intag = FALSE;  /* we found what we looked for */
5422                 skiptag = TRUE; /* skip to the end of the tag */
5423                 getnext = TRUE; /* then grab the text */
5424                 continue;       /* look on the same line */
5425               }
5426             dbp += 1;
5427           }
5428
5429         else if (getnext)       /* grab next tokens and tag them */
5430           {
5431             dbp = skip_spaces (dbp);
5432             if (*dbp == '\0')
5433               break;            /* go to next line */
5434             if (*dbp == '<')
5435               {
5436                 intag = TRUE;
5437                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5438                 continue;       /* look on the same line */
5439               }
5440
5441             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5442               continue;
5443             make_tag (token_name.buffer, token_name.len, TRUE,
5444                       dbp, end - dbp, lineno, linecharno);
5445             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5446             getnext = FALSE;
5447             break;              /* go to next line */
5448           }
5449
5450         else                    /* look for an interesting HTML tag */
5451           {
5452             while (*dbp != '\0' && *dbp != '<')
5453               dbp++;
5454             if (*dbp == '\0')
5455               break;            /* go to next line */
5456             intag = TRUE;
5457             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5458               {
5459                 inanchor = TRUE;
5460                 continue;       /* look on the same line */
5461               }
5462             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5463                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5464                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5465                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5466               {
5467                 intag = FALSE;
5468                 getnext = TRUE;
5469                 continue;       /* look on the same line */
5470               }
5471             dbp += 1;
5472           }
5473       }
5474 }
5475
5476 \f
5477 /*
5478  * Prolog support
5479  *
5480  * Assumes that the predicate or rule starts at column 0.
5481  * Only the first clause of a predicate or rule is added.
5482  * Original code by Sunichirou Sugou (1989)
5483  * Rewritten by Anders Lindgren (1996)
5484  */
5485 static int prolog_pr __P((char *, char *));
5486 static void prolog_skip_comment __P((linebuffer *, FILE *));
5487 static int prolog_atom __P((char *, int));
5488
5489 static void
5490 Prolog_functions (inf)
5491      FILE *inf;
5492 {
5493   char *cp, *last;
5494   int len;
5495   int allocated;
5496
5497   allocated = 0;
5498   len = 0;
5499   last = NULL;
5500
5501   LOOP_ON_INPUT_LINES (inf, lb, cp)
5502     {
5503       if (cp[0] == '\0')        /* Empty line */
5504         continue;
5505       else if (iswhite (cp[0])) /* Not a predicate */
5506         continue;
5507       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5508         prolog_skip_comment (&lb, inf);
5509       else if ((len = prolog_pr (cp, last)) > 0)
5510         {
5511           /* Predicate or rule.  Store the function name so that we
5512              only generate a tag for the first clause.  */
5513           if (last == NULL)
5514             last = xnew(len + 1, char);
5515           else if (len + 1 > allocated)
5516             xrnew (last, len + 1, char);
5517           allocated = len + 1;
5518           strncpy (last, cp, len);
5519           last[len] = '\0';
5520         }
5521     }
5522   free (last);
5523 }
5524
5525
5526 static void
5527 prolog_skip_comment (plb, inf)
5528      linebuffer *plb;
5529      FILE *inf;
5530 {
5531   char *cp;
5532
5533   do
5534     {
5535       for (cp = plb->buffer; *cp != '\0'; cp++)
5536         if (cp[0] == '*' && cp[1] == '/')
5537           return;
5538       readline (plb, inf);
5539     }
5540   while (!feof(inf));
5541 }
5542
5543 /*
5544  * A predicate or rule definition is added if it matches:
5545  *     <beginning of line><Prolog Atom><whitespace>(
5546  * or  <beginning of line><Prolog Atom><whitespace>:-
5547  *
5548  * It is added to the tags database if it doesn't match the
5549  * name of the previous clause header.
5550  *
5551  * Return the size of the name of the predicate or rule, or 0 if no
5552  * header was found.
5553  */
5554 static int
5555 prolog_pr (s, last)
5556      char *s;
5557      char *last;                /* Name of last clause. */
5558 {
5559   int pos;
5560   int len;
5561
5562   pos = prolog_atom (s, 0);
5563   if (pos < 1)
5564     return 0;
5565
5566   len = pos;
5567   pos = skip_spaces (s + pos) - s;
5568
5569   if ((s[pos] == '.'
5570        || (s[pos] == '(' && (pos += 1))
5571        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5572       && (last == NULL          /* save only the first clause */
5573           || len != (int)strlen (last)
5574           || !strneq (s, last, len)))
5575         {
5576           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5577           return len;
5578         }
5579   else
5580     return 0;
5581 }
5582
5583 /*
5584  * Consume a Prolog atom.
5585  * Return the number of bytes consumed, or -1 if there was an error.
5586  *
5587  * A prolog atom, in this context, could be one of:
5588  * - An alphanumeric sequence, starting with a lower case letter.
5589  * - A quoted arbitrary string. Single quotes can escape themselves.
5590  *   Backslash quotes everything.
5591  */
5592 static int
5593 prolog_atom (s, pos)
5594      char *s;
5595      int pos;
5596 {
5597   int origpos;
5598
5599   origpos = pos;
5600
5601   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5602     {
5603       /* The atom is unquoted. */
5604       pos++;
5605       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5606         {
5607           pos++;
5608         }
5609       return pos - origpos;
5610     }
5611   else if (s[pos] == '\'')
5612     {
5613       pos++;
5614
5615       for (;;)
5616         {
5617           if (s[pos] == '\'')
5618             {
5619               pos++;
5620               if (s[pos] != '\'')
5621                 break;
5622               pos++;            /* A double quote */
5623             }
5624           else if (s[pos] == '\0')
5625             /* Multiline quoted atoms are ignored. */
5626             return -1;
5627           else if (s[pos] == '\\')
5628             {
5629               if (s[pos+1] == '\0')
5630                 return -1;
5631               pos += 2;
5632             }
5633           else
5634             pos++;
5635         }
5636       return pos - origpos;
5637     }
5638   else
5639     return -1;
5640 }
5641
5642 \f
5643 /*
5644  * Support for Erlang
5645  *
5646  * Generates tags for functions, defines, and records.
5647  * Assumes that Erlang functions start at column 0.
5648  * Original code by Anders Lindgren (1996)
5649  */
5650 static int erlang_func __P((char *, char *));
5651 static void erlang_attribute __P((char *));
5652 static int erlang_atom __P((char *));
5653
5654 static void
5655 Erlang_functions (inf)
5656      FILE *inf;
5657 {
5658   char *cp, *last;
5659   int len;
5660   int allocated;
5661
5662   allocated = 0;
5663   len = 0;
5664   last = NULL;
5665
5666   LOOP_ON_INPUT_LINES (inf, lb, cp)
5667     {
5668       if (cp[0] == '\0')        /* Empty line */
5669         continue;
5670       else if (iswhite (cp[0])) /* Not function nor attribute */
5671         continue;
5672       else if (cp[0] == '%')    /* comment */
5673         continue;
5674       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5675         continue;
5676       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5677         {
5678           erlang_attribute (cp);
5679           if (last != NULL)
5680             {
5681               free (last);
5682               last = NULL;
5683             }
5684         }
5685       else if ((len = erlang_func (cp, last)) > 0)
5686         {
5687           /*
5688            * Function.  Store the function name so that we only
5689            * generates a tag for the first clause.
5690            */
5691           if (last == NULL)
5692             last = xnew (len + 1, char);
5693           else if (len + 1 > allocated)
5694             xrnew (last, len + 1, char);
5695           allocated = len + 1;
5696           strncpy (last, cp, len);
5697           last[len] = '\0';
5698         }
5699     }
5700   free (last);
5701 }
5702
5703
5704 /*
5705  * A function definition is added if it matches:
5706  *     <beginning of line><Erlang Atom><whitespace>(
5707  *
5708  * It is added to the tags database if it doesn't match the
5709  * name of the previous clause header.
5710  *
5711  * Return the size of the name of the function, or 0 if no function
5712  * was found.
5713  */
5714 static int
5715 erlang_func (s, last)
5716      char *s;
5717      char *last;                /* Name of last clause. */
5718 {
5719   int pos;
5720   int len;
5721
5722   pos = erlang_atom (s);
5723   if (pos < 1)
5724     return 0;
5725
5726   len = pos;
5727   pos = skip_spaces (s + pos) - s;
5728
5729   /* Save only the first clause. */
5730   if (s[pos++] == '('
5731       && (last == NULL
5732           || len != (int)strlen (last)
5733           || !strneq (s, last, len)))
5734         {
5735           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5736           return len;
5737         }
5738
5739   return 0;
5740 }
5741
5742
5743 /*
5744  * Handle attributes.  Currently, tags are generated for defines
5745  * and records.
5746  *
5747  * They are on the form:
5748  * -define(foo, bar).
5749  * -define(Foo(M, N), M+N).
5750  * -record(graph, {vtab = notable, cyclic = true}).
5751  */
5752 static void
5753 erlang_attribute (s)
5754      char *s;
5755 {
5756   char *cp = s;
5757
5758   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5759       && *cp++ == '(')
5760     {
5761       int len = erlang_atom (skip_spaces (cp));
5762       if (len > 0)
5763         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5764     }
5765   return;
5766 }
5767
5768
5769 /*
5770  * Consume an Erlang atom (or variable).
5771  * Return the number of bytes consumed, or -1 if there was an error.
5772  */
5773 static int
5774 erlang_atom (s)
5775      char *s;
5776 {
5777   int pos = 0;
5778
5779   if (ISALPHA (s[pos]) || s[pos] == '_')
5780     {
5781       /* The atom is unquoted. */
5782       do
5783         pos++;
5784       while (ISALNUM (s[pos]) || s[pos] == '_');
5785     }
5786   else if (s[pos] == '\'')
5787     {
5788       for (pos++; s[pos] != '\''; pos++)
5789         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5790             || (s[pos] == '\\' && s[++pos] == '\0'))
5791           return 0;
5792       pos++;
5793     }
5794
5795   return pos;
5796 }
5797
5798 \f
5799 static char *scan_separators __P((char *));
5800 static void add_regex __P((char *, language *));
5801 static char *substitute __P((char *, char *, struct re_registers *));
5802
5803 /*
5804  * Take a string like "/blah/" and turn it into "blah", verifying
5805  * that the first and last characters are the same, and handling
5806  * quoted separator characters.  Actually, stops on the occurrence of
5807  * an unquoted separator.  Also process \t, \n, etc. and turn into
5808  * appropriate characters. Works in place.  Null terminates name string.
5809  * Returns pointer to terminating separator, or NULL for
5810  * unterminated regexps.
5811  */
5812 static char *
5813 scan_separators (name)
5814      char *name;
5815 {
5816   char sep = name[0];
5817   char *copyto = name;
5818   bool quoted = FALSE;
5819
5820   for (++name; *name != '\0'; ++name)
5821     {
5822       if (quoted)
5823         {
5824           switch (*name)
5825             {
5826             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5827             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5828             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5829             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5830             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5831             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5832             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5833             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5834             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5835             default:
5836               if (*name == sep)
5837                 *copyto++ = sep;
5838               else
5839                 {
5840                   /* Something else is quoted, so preserve the quote. */
5841                   *copyto++ = '\\';
5842                   *copyto++ = *name;
5843                 }
5844               break;
5845             }
5846           quoted = FALSE;
5847         }
5848       else if (*name == '\\')
5849         quoted = TRUE;
5850       else if (*name == sep)
5851         break;
5852       else
5853         *copyto++ = *name;
5854     }
5855   if (*name != sep)
5856     name = NULL;                /* signal unterminated regexp */
5857
5858   /* Terminate copied string. */
5859   *copyto = '\0';
5860   return name;
5861 }
5862
5863 /* Look at the argument of --regex or --no-regex and do the right
5864    thing.  Same for each line of a regexp file. */
5865 static void
5866 analyse_regex (regex_arg)
5867      char *regex_arg;
5868 {
5869   if (regex_arg == NULL)
5870     {
5871       free_regexps ();          /* --no-regex: remove existing regexps */
5872       return;
5873     }
5874
5875   /* A real --regexp option or a line in a regexp file. */
5876   switch (regex_arg[0])
5877     {
5878       /* Comments in regexp file or null arg to --regex. */
5879     case '\0':
5880     case ' ':
5881     case '\t':
5882       break;
5883
5884       /* Read a regex file.  This is recursive and may result in a
5885          loop, which will stop when the file descriptors are exhausted. */
5886     case '@':
5887       {
5888         FILE *regexfp;
5889         linebuffer regexbuf;
5890         char *regexfile = regex_arg + 1;
5891
5892         /* regexfile is a file containing regexps, one per line. */
5893         regexfp = fopen (regexfile, "r");
5894         if (regexfp == NULL)
5895           {
5896             pfatal (regexfile);
5897             return;
5898           }
5899         linebuffer_init (&regexbuf);
5900         while (readline_internal (&regexbuf, regexfp) > 0)
5901           analyse_regex (regexbuf.buffer);
5902         free (regexbuf.buffer);
5903         fclose (regexfp);
5904       }
5905       break;
5906
5907       /* Regexp to be used for a specific language only. */
5908     case '{':
5909       {
5910         language *lang;
5911         char *lang_name = regex_arg + 1;
5912         char *cp;
5913
5914         for (cp = lang_name; *cp != '}'; cp++)
5915           if (*cp == '\0')
5916             {
5917               error ("unterminated language name in regex: %s", regex_arg);
5918               return;
5919             }
5920         *cp++ = '\0';
5921         lang = get_language_from_langname (lang_name);
5922         if (lang == NULL)
5923           return;
5924         add_regex (cp, lang);
5925       }
5926       break;
5927
5928       /* Regexp to be used for any language. */
5929     default:
5930       add_regex (regex_arg, NULL);
5931       break;
5932     }
5933 }
5934
5935 /* Separate the regexp pattern, compile it,
5936    and care for optional name and modifiers. */
5937 static void
5938 add_regex (regexp_pattern, lang)
5939      char *regexp_pattern;
5940      language *lang;
5941 {
5942   static struct re_pattern_buffer zeropattern;
5943   char sep, *pat, *name, *modifiers;
5944   const char *err;
5945   struct re_pattern_buffer *patbuf;
5946   regexp *rp;
5947   bool
5948     force_explicit_name = TRUE, /* do not use implicit tag names */
5949     ignore_case = FALSE,        /* case is significant */
5950     multi_line = FALSE,         /* matches are done one line at a time */
5951     single_line = FALSE;        /* dot does not match newline */
5952
5953
5954   if (strlen(regexp_pattern) < 3)
5955     {
5956       error ("null regexp", (char *)NULL);
5957       return;
5958     }
5959   sep = regexp_pattern[0];
5960   name = scan_separators (regexp_pattern);
5961   if (name == NULL)
5962     {
5963       error ("%s: unterminated regexp", regexp_pattern);
5964       return;
5965     }
5966   if (name[1] == sep)
5967     {
5968       error ("null name for regexp \"%s\"", regexp_pattern);
5969       return;
5970     }
5971   modifiers = scan_separators (name);
5972   if (modifiers == NULL)        /* no terminating separator --> no name */
5973     {
5974       modifiers = name;
5975       name = "";
5976     }
5977   else
5978     modifiers += 1;             /* skip separator */
5979
5980   /* Parse regex modifiers. */
5981   for (; modifiers[0] != '\0'; modifiers++)
5982     switch (modifiers[0])
5983       {
5984       case 'N':
5985         if (modifiers == name)
5986           error ("forcing explicit tag name but no name, ignoring", NULL);
5987         force_explicit_name = TRUE;
5988         break;
5989       case 'i':
5990         ignore_case = TRUE;
5991         break;
5992       case 's':
5993         single_line = TRUE;
5994         /* FALLTHRU */
5995       case 'm':
5996         multi_line = TRUE;
5997         need_filebuf = TRUE;
5998         break;
5999       default:
6000         {
6001           char wrongmod [2];
6002           wrongmod[0] = modifiers[0];
6003           wrongmod[1] = '\0';
6004           error ("invalid regexp modifier `%s', ignoring", wrongmod);
6005         }
6006         break;
6007       }
6008
6009   patbuf = xnew (1, struct re_pattern_buffer);
6010   *patbuf = zeropattern;
6011   if (ignore_case)
6012     {
6013       static char lc_trans[CHARS];
6014       int i;
6015       for (i = 0; i < CHARS; i++)
6016         lc_trans[i] = lowcase (i);
6017       patbuf->translate = lc_trans;     /* translation table to fold case  */
6018     }
6019
6020   if (multi_line)
6021     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
6022   else
6023     pat = regexp_pattern;
6024
6025   if (single_line)
6026     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
6027   else
6028     re_set_syntax (RE_SYNTAX_EMACS);
6029
6030   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
6031   if (multi_line)
6032     free (pat);
6033   if (err != NULL)
6034     {
6035       error ("%s while compiling pattern", err);
6036       return;
6037     }
6038
6039   rp = p_head;
6040   p_head = xnew (1, regexp);
6041   p_head->pattern = savestr (regexp_pattern);
6042   p_head->p_next = rp;
6043   p_head->lang = lang;
6044   p_head->pat = patbuf;
6045   p_head->name = savestr (name);
6046   p_head->error_signaled = FALSE;
6047   p_head->force_explicit_name = force_explicit_name;
6048   p_head->ignore_case = ignore_case;
6049   p_head->multi_line = multi_line;
6050 }
6051
6052 /*
6053  * Do the substitutions indicated by the regular expression and
6054  * arguments.
6055  */
6056 static char *
6057 substitute (in, out, regs)
6058      char *in, *out;
6059      struct re_registers *regs;
6060 {
6061   char *result, *t;
6062   int size, dig, diglen;
6063
6064   result = NULL;
6065   size = strlen (out);
6066
6067   /* Pass 1: figure out how much to allocate by finding all \N strings. */
6068   if (out[size - 1] == '\\')
6069     fatal ("pattern error in \"%s\"", out);
6070   for (t = etags_strchr (out, '\\');
6071        t != NULL;
6072        t = etags_strchr (t + 2, '\\'))
6073     if (ISDIGIT (t[1]))
6074       {
6075         dig = t[1] - '0';
6076         diglen = regs->end[dig] - regs->start[dig];
6077         size += diglen - 2;
6078       }
6079     else
6080       size -= 1;
6081
6082   /* Allocate space and do the substitutions. */
6083   assert (size >= 0);
6084   result = xnew (size + 1, char);
6085
6086   for (t = result; *out != '\0'; out++)
6087     if (*out == '\\' && ISDIGIT (*++out))
6088       {
6089         dig = *out - '0';
6090         diglen = regs->end[dig] - regs->start[dig];
6091         strncpy (t, in + regs->start[dig], diglen);
6092         t += diglen;
6093       }
6094     else
6095       *t++ = *out;
6096   *t = '\0';
6097
6098   assert (t <= result + size);
6099   assert (t - result == (int)strlen (result));
6100
6101   return result;
6102 }
6103
6104 /* Deallocate all regexps. */
6105 static void
6106 free_regexps ()
6107 {
6108   regexp *rp;
6109   while (p_head != NULL)
6110     {
6111       rp = p_head->p_next;
6112       free (p_head->pattern);
6113       free (p_head->name);
6114       free (p_head);
6115       p_head = rp;
6116     }
6117   return;
6118 }
6119
6120 /*
6121  * Reads the whole file as a single string from `filebuf' and looks for
6122  * multi-line regular expressions, creating tags on matches.
6123  * readline already dealt with normal regexps.
6124  *
6125  * Idea by Ben Wing <ben@666.com> (2002).
6126  */
6127 static void
6128 regex_tag_multiline ()
6129 {
6130   char *buffer = filebuf.buffer;
6131   regexp *rp;
6132   char *name;
6133
6134   for (rp = p_head; rp != NULL; rp = rp->p_next)
6135     {
6136       int match = 0;
6137
6138       if (!rp->multi_line)
6139         continue;               /* skip normal regexps */
6140
6141       /* Generic initialisations before parsing file from memory. */
6142       lineno = 1;               /* reset global line number */
6143       charno = 0;               /* reset global char number */
6144       linecharno = 0;           /* reset global char number of line start */
6145
6146       /* Only use generic regexps or those for the current language. */
6147       if (rp->lang != NULL && rp->lang != curfdp->lang)
6148         continue;
6149
6150       while (match >= 0 && match < filebuf.len)
6151         {
6152           match = re_search (rp->pat, buffer, filebuf.len, charno,
6153                              filebuf.len - match, &rp->regs);
6154           switch (match)
6155             {
6156             case -2:
6157               /* Some error. */
6158               if (!rp->error_signaled)
6159                 {
6160                   error ("regexp stack overflow while matching \"%s\"",
6161                          rp->pattern);
6162                   rp->error_signaled = TRUE;
6163                 }
6164               break;
6165             case -1:
6166               /* No match. */
6167               break;
6168             default:
6169               if (match == rp->regs.end[0])
6170                 {
6171                   if (!rp->error_signaled)
6172                     {
6173                       error ("regexp matches the empty string: \"%s\"",
6174                              rp->pattern);
6175                       rp->error_signaled = TRUE;
6176                     }
6177                   match = -3;   /* exit from while loop */
6178                   break;
6179                 }
6180
6181               /* Match occurred.  Construct a tag. */
6182               while (charno < rp->regs.end[0])
6183                 if (buffer[charno++] == '\n')
6184                   lineno++, linecharno = charno;
6185               name = rp->name;
6186               if (name[0] == '\0')
6187                 name = NULL;
6188               else /* make a named tag */
6189                 name = substitute (buffer, rp->name, &rp->regs);
6190               if (rp->force_explicit_name)
6191                 /* Force explicit tag name, if a name is there. */
6192                 pfnote (name, TRUE, buffer + linecharno,
6193                         charno - linecharno + 1, lineno, linecharno);
6194               else
6195                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6196                           charno - linecharno + 1, lineno, linecharno);
6197               break;
6198             }
6199         }
6200     }
6201 }
6202
6203 \f
6204 static bool
6205 nocase_tail (cp)
6206      char *cp;
6207 {
6208   register int len = 0;
6209
6210   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6211     cp++, len++;
6212   if (*cp == '\0' && !intoken (dbp[len]))
6213     {
6214       dbp += len;
6215       return TRUE;
6216     }
6217   return FALSE;
6218 }
6219
6220 static void
6221 get_tag (bp, namepp)
6222      register char *bp;
6223      char **namepp;
6224 {
6225   register char *cp = bp;
6226
6227   if (*bp != '\0')
6228     {
6229       /* Go till you get to white space or a syntactic break */
6230       for (cp = bp + 1; !notinname (*cp); cp++)
6231         continue;
6232       make_tag (bp, cp - bp, TRUE,
6233                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6234     }
6235
6236   if (namepp != NULL)
6237     *namepp = savenstr (bp, cp - bp);
6238 }
6239
6240 /*
6241  * Read a line of text from `stream' into `lbp', excluding the
6242  * newline or CR-NL, if any.  Return the number of characters read from
6243  * `stream', which is the length of the line including the newline.
6244  *
6245  * On DOS or Windows we do not count the CR character, if any before the
6246  * NL, in the returned length; this mirrors the behavior of Emacs on those
6247  * platforms (for text files, it translates CR-NL to NL as it reads in the
6248  * file).
6249  *
6250  * If multi-line regular expressions are requested, each line read is
6251  * appended to `filebuf'.
6252  */
6253 static long
6254 readline_internal (lbp, stream)
6255      linebuffer *lbp;
6256      register FILE *stream;
6257 {
6258   char *buffer = lbp->buffer;
6259   register char *p = lbp->buffer;
6260   register char *pend;
6261   int chars_deleted;
6262
6263   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6264
6265   for (;;)
6266     {
6267       register int c = getc (stream);
6268       if (p == pend)
6269         {
6270           /* We're at the end of linebuffer: expand it. */
6271           lbp->size *= 2;
6272           xrnew (buffer, lbp->size, char);
6273           p += buffer - lbp->buffer;
6274           pend = buffer + lbp->size;
6275           lbp->buffer = buffer;
6276         }
6277       if (c == EOF)
6278         {
6279           *p = '\0';
6280           chars_deleted = 0;
6281           break;
6282         }
6283       if (c == '\n')
6284         {
6285           if (p > buffer && p[-1] == '\r')
6286             {
6287               p -= 1;
6288 #ifdef DOS_NT
6289              /* Assume CRLF->LF translation will be performed by Emacs
6290                 when loading this file, so CRs won't appear in the buffer.
6291                 It would be cleaner to compensate within Emacs;
6292                 however, Emacs does not know how many CRs were deleted
6293                 before any given point in the file.  */
6294               chars_deleted = 1;
6295 #else
6296               chars_deleted = 2;
6297 #endif
6298             }
6299           else
6300             {
6301               chars_deleted = 1;
6302             }
6303           *p = '\0';
6304           break;
6305         }
6306       *p++ = c;
6307     }
6308   lbp->len = p - buffer;
6309
6310   if (need_filebuf              /* we need filebuf for multi-line regexps */
6311       && chars_deleted > 0)     /* not at EOF */
6312     {
6313       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6314         {
6315           /* Expand filebuf. */
6316           filebuf.size *= 2;
6317           xrnew (filebuf.buffer, filebuf.size, char);
6318         }
6319       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6320       filebuf.len += lbp->len;
6321       filebuf.buffer[filebuf.len++] = '\n';
6322       filebuf.buffer[filebuf.len] = '\0';
6323     }
6324
6325   return lbp->len + chars_deleted;
6326 }
6327
6328 /*
6329  * Like readline_internal, above, but in addition try to match the
6330  * input line against relevant regular expressions and manage #line
6331  * directives.
6332  */
6333 static void
6334 readline (lbp, stream)
6335      linebuffer *lbp;
6336      FILE *stream;
6337 {
6338   long result;
6339
6340   linecharno = charno;          /* update global char number of line start */
6341   result = readline_internal (lbp, stream); /* read line */
6342   lineno += 1;                  /* increment global line number */
6343   charno += result;             /* increment global char number */
6344
6345   /* Honour #line directives. */
6346   if (!no_line_directive)
6347     {
6348       static bool discard_until_line_directive;
6349
6350       /* Check whether this is a #line directive. */
6351       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6352         {
6353           unsigned int lno;
6354           int start = 0;
6355
6356           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6357               && start > 0)     /* double quote character found */
6358             {
6359               char *endp = lbp->buffer + start;
6360
6361               while ((endp = etags_strchr (endp, '"')) != NULL
6362                      && endp[-1] == '\\')
6363                 endp++;
6364               if (endp != NULL)
6365                 /* Ok, this is a real #line directive.  Let's deal with it. */
6366                 {
6367                   char *taggedabsname;  /* absolute name of original file */
6368                   char *taggedfname;    /* name of original file as given */
6369                   char *name;           /* temp var */
6370
6371                   discard_until_line_directive = FALSE; /* found it */
6372                   name = lbp->buffer + start;
6373                   *endp = '\0';
6374                   canonicalize_filename (name); /* for DOS */
6375                   taggedabsname = absolute_filename (name, tagfiledir);
6376                   if (filename_is_absolute (name)
6377                       || filename_is_absolute (curfdp->infname))
6378                     taggedfname = savestr (taggedabsname);
6379                   else
6380                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6381
6382                   if (streq (curfdp->taggedfname, taggedfname))
6383                     /* The #line directive is only a line number change.  We
6384                        deal with this afterwards. */
6385                     free (taggedfname);
6386                   else
6387                     /* The tags following this #line directive should be
6388                        attributed to taggedfname.  In order to do this, set
6389                        curfdp accordingly. */
6390                     {
6391                       fdesc *fdp; /* file description pointer */
6392
6393                       /* Go look for a file description already set up for the
6394                          file indicated in the #line directive.  If there is
6395                          one, use it from now until the next #line
6396                          directive. */
6397                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6398                         if (streq (fdp->infname, curfdp->infname)
6399                             && streq (fdp->taggedfname, taggedfname))
6400                           /* If we remove the second test above (after the &&)
6401                              then all entries pertaining to the same file are
6402                              coalesced in the tags file.  If we use it, then
6403                              entries pertaining to the same file but generated
6404                              from different files (via #line directives) will
6405                              go into separate sections in the tags file.  These
6406                              alternatives look equivalent.  The first one
6407                              destroys some apparently useless information. */
6408                           {
6409                             curfdp = fdp;
6410                             free (taggedfname);
6411                             break;
6412                           }
6413                       /* Else, if we already tagged the real file, skip all
6414                          input lines until the next #line directive. */
6415                       if (fdp == NULL) /* not found */
6416                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6417                           if (streq (fdp->infabsname, taggedabsname))
6418                             {
6419                               discard_until_line_directive = TRUE;
6420                               free (taggedfname);
6421                               break;
6422                             }
6423                       /* Else create a new file description and use that from
6424                          now on, until the next #line directive. */
6425                       if (fdp == NULL) /* not found */
6426                         {
6427                           fdp = fdhead;
6428                           fdhead = xnew (1, fdesc);
6429                           *fdhead = *curfdp; /* copy curr. file description */
6430                           fdhead->next = fdp;
6431                           fdhead->infname = savestr (curfdp->infname);
6432                           fdhead->infabsname = savestr (curfdp->infabsname);
6433                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6434                           fdhead->taggedfname = taggedfname;
6435                           fdhead->usecharno = FALSE;
6436                           fdhead->prop = NULL;
6437                           fdhead->written = FALSE;
6438                           curfdp = fdhead;
6439                         }
6440                     }
6441                   free (taggedabsname);
6442                   lineno = lno - 1;
6443                   readline (lbp, stream);
6444                   return;
6445                 } /* if a real #line directive */
6446             } /* if #line is followed by a a number */
6447         } /* if line begins with "#line " */
6448
6449       /* If we are here, no #line directive was found. */
6450       if (discard_until_line_directive)
6451         {
6452           if (result > 0)
6453             {
6454               /* Do a tail recursion on ourselves, thus discarding the contents
6455                  of the line buffer. */
6456               readline (lbp, stream);
6457               return;
6458             }
6459           /* End of file. */
6460           discard_until_line_directive = FALSE;
6461           return;
6462         }
6463     } /* if #line directives should be considered */
6464
6465   {
6466     int match;
6467     regexp *rp;
6468     char *name;
6469
6470     /* Match against relevant regexps. */
6471     if (lbp->len > 0)
6472       for (rp = p_head; rp != NULL; rp = rp->p_next)
6473         {
6474           /* Only use generic regexps or those for the current language.
6475              Also do not use multiline regexps, which is the job of
6476              regex_tag_multiline. */
6477           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6478               || rp->multi_line)
6479             continue;
6480
6481           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6482           switch (match)
6483             {
6484             case -2:
6485               /* Some error. */
6486               if (!rp->error_signaled)
6487                 {
6488                   error ("regexp stack overflow while matching \"%s\"",
6489                          rp->pattern);
6490                   rp->error_signaled = TRUE;
6491                 }
6492               break;
6493             case -1:
6494               /* No match. */
6495               break;
6496             case 0:
6497               /* Empty string matched. */
6498               if (!rp->error_signaled)
6499                 {
6500                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6501                   rp->error_signaled = TRUE;
6502                 }
6503               break;
6504             default:
6505               /* Match occurred.  Construct a tag. */
6506               name = rp->name;
6507               if (name[0] == '\0')
6508                 name = NULL;
6509               else /* make a named tag */
6510                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6511               if (rp->force_explicit_name)
6512                 /* Force explicit tag name, if a name is there. */
6513                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6514               else
6515                 make_tag (name, strlen (name), TRUE,
6516                           lbp->buffer, match, lineno, linecharno);
6517               break;
6518             }
6519         }
6520   }
6521 }
6522
6523 \f
6524 /*
6525  * Return a pointer to a space of size strlen(cp)+1 allocated
6526  * with xnew where the string CP has been copied.
6527  */
6528 static char *
6529 savestr (cp)
6530      char *cp;
6531 {
6532   return savenstr (cp, strlen (cp));
6533 }
6534
6535 /*
6536  * Return a pointer to a space of size LEN+1 allocated with xnew where
6537  * the string CP has been copied for at most the first LEN characters.
6538  */
6539 static char *
6540 savenstr (cp, len)
6541      char *cp;
6542      int len;
6543 {
6544   register char *dp;
6545
6546   dp = xnew (len + 1, char);
6547   strncpy (dp, cp, len);
6548   dp[len] = '\0';
6549   return dp;
6550 }
6551
6552 /*
6553  * Return the ptr in sp at which the character c last
6554  * appears; NULL if not found
6555  *
6556  * Identical to POSIX strrchr, included for portability.
6557  */
6558 static char *
6559 etags_strrchr (sp, c)
6560      register const char *sp;
6561      register int c;
6562 {
6563   register const char *r;
6564
6565   r = NULL;
6566   do
6567     {
6568       if (*sp == c)
6569         r = sp;
6570   } while (*sp++);
6571   return (char *)r;
6572 }
6573
6574 /*
6575  * Return the ptr in sp at which the character c first
6576  * appears; NULL if not found
6577  *
6578  * Identical to POSIX strchr, included for portability.
6579  */
6580 static char *
6581 etags_strchr (sp, c)
6582      register const char *sp;
6583      register int c;
6584 {
6585   do
6586     {
6587       if (*sp == c)
6588         return (char *)sp;
6589     } while (*sp++);
6590   return NULL;
6591 }
6592
6593 /*
6594  * Compare two strings, ignoring case for alphabetic characters.
6595  *
6596  * Same as BSD's strcasecmp, included for portability.
6597  */
6598 static int
6599 etags_strcasecmp (s1, s2)
6600      register const char *s1;
6601      register const char *s2;
6602 {
6603   while (*s1 != '\0'
6604          && (ISALPHA (*s1) && ISALPHA (*s2)
6605              ? lowcase (*s1) == lowcase (*s2)
6606              : *s1 == *s2))
6607     s1++, s2++;
6608
6609   return (ISALPHA (*s1) && ISALPHA (*s2)
6610           ? lowcase (*s1) - lowcase (*s2)
6611           : *s1 - *s2);
6612 }
6613
6614 /*
6615  * Compare two strings, ignoring case for alphabetic characters.
6616  * Stop after a given number of characters
6617  *
6618  * Same as BSD's strncasecmp, included for portability.
6619  */
6620 static int
6621 etags_strncasecmp (s1, s2, n)
6622      register const char *s1;
6623      register const char *s2;
6624      register int n;
6625 {
6626   while (*s1 != '\0' && n-- > 0
6627          && (ISALPHA (*s1) && ISALPHA (*s2)
6628              ? lowcase (*s1) == lowcase (*s2)
6629              : *s1 == *s2))
6630     s1++, s2++;
6631
6632   if (n < 0)
6633     return 0;
6634   else
6635     return (ISALPHA (*s1) && ISALPHA (*s2)
6636             ? lowcase (*s1) - lowcase (*s2)
6637             : *s1 - *s2);
6638 }
6639
6640 /* Skip spaces (end of string is not space), return new pointer. */
6641 static char *
6642 skip_spaces (cp)
6643      char *cp;
6644 {
6645   while (iswhite (*cp))
6646     cp++;
6647   return cp;
6648 }
6649
6650 /* Skip non spaces, except end of string, return new pointer. */
6651 static char *
6652 skip_non_spaces (cp)
6653      char *cp;
6654 {
6655   while (*cp != '\0' && !iswhite (*cp))
6656     cp++;
6657   return cp;
6658 }
6659
6660 /* Print error message and exit.  */
6661 void
6662 fatal (s1, s2)
6663      char *s1, *s2;
6664 {
6665   error (s1, s2);
6666   exit (EXIT_FAILURE);
6667 }
6668
6669 static void
6670 pfatal (s1)
6671      char *s1;
6672 {
6673   perror (s1);
6674   exit (EXIT_FAILURE);
6675 }
6676
6677 static void
6678 suggest_asking_for_help ()
6679 {
6680   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6681            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6682   exit (EXIT_FAILURE);
6683 }
6684
6685 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6686 static void
6687 error (s1, s2)
6688      const char *s1, *s2;
6689 {
6690   fprintf (stderr, "%s: ", progname);
6691   fprintf (stderr, s1, s2);
6692   fprintf (stderr, "\n");
6693 }
6694
6695 /* Return a newly-allocated string whose contents
6696    concatenate those of s1, s2, s3.  */
6697 static char *
6698 concat (s1, s2, s3)
6699      char *s1, *s2, *s3;
6700 {
6701   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6702   char *result = xnew (len1 + len2 + len3 + 1, char);
6703
6704   strcpy (result, s1);
6705   strcpy (result + len1, s2);
6706   strcpy (result + len1 + len2, s3);
6707   result[len1 + len2 + len3] = '\0';
6708
6709   return result;
6710 }
6711
6712 \f
6713 /* Does the same work as the system V getcwd, but does not need to
6714    guess the buffer size in advance. */
6715 static char *
6716 etags_getcwd ()
6717 {
6718 #ifdef HAVE_GETCWD
6719   int bufsize = 200;
6720   char *path = xnew (bufsize, char);
6721
6722   while (getcwd (path, bufsize) == NULL)
6723     {
6724       if (errno != ERANGE)
6725         pfatal ("getcwd");
6726       bufsize *= 2;
6727       free (path);
6728       path = xnew (bufsize, char);
6729     }
6730
6731   canonicalize_filename (path);
6732   return path;
6733
6734 #else /* not HAVE_GETCWD */
6735 #if MSDOS
6736
6737   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6738
6739   getwd (path);
6740
6741   for (p = path; *p != '\0'; p++)
6742     if (*p == '\\')
6743       *p = '/';
6744     else
6745       *p = lowcase (*p);
6746
6747   return strdup (path);
6748 #else /* not MSDOS */
6749   linebuffer path;
6750   FILE *pipe;
6751
6752   linebuffer_init (&path);
6753   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6754   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6755     pfatal ("pwd");
6756   pclose (pipe);
6757
6758   return path.buffer;
6759 #endif /* not MSDOS */
6760 #endif /* not HAVE_GETCWD */
6761 }
6762
6763 /* Return a newly allocated string containing the file name of FILE
6764    relative to the absolute directory DIR (which should end with a slash). */
6765 static char *
6766 relative_filename (file, dir)
6767      char *file, *dir;
6768 {
6769   char *fp, *dp, *afn, *res;
6770   int i;
6771
6772   /* Find the common root of file and dir (with a trailing slash). */
6773   afn = absolute_filename (file, cwd);
6774   fp = afn;
6775   dp = dir;
6776   while (*fp++ == *dp++)
6777     continue;
6778   fp--, dp--;                   /* back to the first differing char */
6779 #ifdef DOS_NT
6780   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6781     return afn;
6782 #endif
6783   do                            /* look at the equal chars until '/' */
6784     fp--, dp--;
6785   while (*fp != '/');
6786
6787   /* Build a sequence of "../" strings for the resulting relative file name. */
6788   i = 0;
6789   while (*dp == '/')
6790     ++dp;
6791   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6792     {
6793       i += 1;
6794       while (*dp == '/')
6795         ++dp;
6796     }
6797   res = xnew (3*i + strlen (fp + 1) + 1, char);
6798   res[0] = '\0';
6799   while (i-- > 0)
6800     strcat (res, "../");
6801
6802   /* Add the file name relative to the common root of file and dir. */
6803   strcat (res, fp + 1);
6804   free (afn);
6805
6806   return res;
6807 }
6808
6809 /* Return a newly allocated string containing the absolute file name
6810    of FILE given DIR (which should end with a slash). */
6811 static char *
6812 absolute_filename (file, dir)
6813      char *file, *dir;
6814 {
6815   char *slashp, *cp, *res;
6816
6817   if (filename_is_absolute (file))
6818     res = savestr (file);
6819 #ifdef DOS_NT
6820   /* We don't support non-absolute file names with a drive
6821      letter, like `d:NAME' (it's too much hassle).  */
6822   else if (file[1] == ':')
6823     fatal ("%s: relative file names with drive letters not supported", file);
6824 #endif
6825   else
6826     res = concat (dir, file, "");
6827
6828   /* Delete the "/dirname/.." and "/." substrings. */
6829   slashp = etags_strchr (res, '/');
6830   while (slashp != NULL && slashp[0] != '\0')
6831     {
6832       if (slashp[1] == '.')
6833         {
6834           if (slashp[2] == '.'
6835               && (slashp[3] == '/' || slashp[3] == '\0'))
6836             {
6837               cp = slashp;
6838               do
6839                 cp--;
6840               while (cp >= res && !filename_is_absolute (cp));
6841               if (cp < res)
6842                 cp = slashp;    /* the absolute name begins with "/.." */
6843 #ifdef DOS_NT
6844               /* Under MSDOS and NT we get `d:/NAME' as absolute
6845                  file name, so the luser could say `d:/../NAME'.
6846                  We silently treat this as `d:/NAME'.  */
6847               else if (cp[0] != '/')
6848                 cp = slashp;
6849 #endif
6850               strcpy (cp, slashp + 3);
6851               slashp = cp;
6852               continue;
6853             }
6854           else if (slashp[2] == '/' || slashp[2] == '\0')
6855             {
6856               strcpy (slashp, slashp + 2);
6857               continue;
6858             }
6859         }
6860
6861       slashp = etags_strchr (slashp + 1, '/');
6862     }
6863
6864   if (res[0] == '\0')           /* just a safety net: should never happen */
6865     {
6866       free (res);
6867       return savestr ("/");
6868     }
6869   else
6870     return res;
6871 }
6872
6873 /* Return a newly allocated string containing the absolute
6874    file name of dir where FILE resides given DIR (which should
6875    end with a slash). */
6876 static char *
6877 absolute_dirname (file, dir)
6878      char *file, *dir;
6879 {
6880   char *slashp, *res;
6881   char save;
6882
6883   canonicalize_filename (file);
6884   slashp = etags_strrchr (file, '/');
6885   if (slashp == NULL)
6886     return savestr (dir);
6887   save = slashp[1];
6888   slashp[1] = '\0';
6889   res = absolute_filename (file, dir);
6890   slashp[1] = save;
6891
6892   return res;
6893 }
6894
6895 /* Whether the argument string is an absolute file name.  The argument
6896    string must have been canonicalized with canonicalize_filename. */
6897 static bool
6898 filename_is_absolute (fn)
6899      char *fn;
6900 {
6901   return (fn[0] == '/'
6902 #ifdef DOS_NT
6903           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6904 #endif
6905           );
6906 }
6907
6908 /* Translate backslashes into slashes.  Works in place. */
6909 static void
6910 canonicalize_filename (fn)
6911      register char *fn;
6912 {
6913 #ifdef DOS_NT
6914   /* Canonicalize drive letter case.  */
6915   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6916     fn[0] = upcase (fn[0]);
6917   /* Convert backslashes to slashes.  */
6918   for (; *fn != '\0'; fn++)
6919     if (*fn == '\\')
6920       *fn = '/';
6921 #else
6922   /* No action. */
6923   fn = NULL;                    /* shut up the compiler */
6924 #endif
6925 }
6926
6927 \f
6928 /* Initialize a linebuffer for use */
6929 static void
6930 linebuffer_init (lbp)
6931      linebuffer *lbp;
6932 {
6933   lbp->size = (DEBUG) ? 3 : 200;
6934   lbp->buffer = xnew (lbp->size, char);
6935   lbp->buffer[0] = '\0';
6936   lbp->len = 0;
6937 }
6938
6939 /* Set the minimum size of a string contained in a linebuffer. */
6940 static void
6941 linebuffer_setlen (lbp, toksize)
6942      linebuffer *lbp;
6943      int toksize;
6944 {
6945   while (lbp->size <= toksize)
6946     {
6947       lbp->size *= 2;
6948       xrnew (lbp->buffer, lbp->size, char);
6949     }
6950   lbp->len = toksize;
6951 }
6952
6953 /* Like malloc but get fatal error if memory is exhausted. */
6954 static PTR
6955 xmalloc (size)
6956      unsigned int size;
6957 {
6958   PTR result = (PTR) malloc (size);
6959   if (result == NULL)
6960     fatal ("virtual memory exhausted", (char *)NULL);
6961   return result;
6962 }
6963
6964 static PTR
6965 xrealloc (ptr, size)
6966      char *ptr;
6967      unsigned int size;
6968 {
6969   PTR result = (PTR) realloc (ptr, size);
6970   if (result == NULL)
6971     fatal ("virtual memory exhausted", (char *)NULL);
6972   return result;
6973 }
6974
6975 /*
6976  * Local Variables:
6977  * indent-tabs-mode: t
6978  * tab-width: 8
6979  * fill-column: 79
6980  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6981  * c-file-style: "gnu"
6982  * End:
6983  */
6984
6985 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6986    (do not change this comment) */
6987
6988 /* etags.c ends here */