lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: utf-8 -*-
   2
   3 Copyright (C) 1984 The Regents of the University of California
   4
   5 Redistribution and use in source and binary forms, with or without
   6 modification, are permitted provided that the following conditions are
   7 met:
   8 1. Redistributions of source code must retain the above copyright
   9    notice, this list of conditions and the following disclaimer.
  10 2. Redistributions in binary form must reproduce the above copyright
  11    notice, this list of conditions and the following disclaimer in the
  12    documentation and/or other materials provided with the
  13    distribution.
  14 3. Neither the name of the University nor the names of its
  15    contributors may be used to endorse or promote products derived
  16    from this software without specific prior written permission.
  17
  18 THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS''
  19 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  20 THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  21 PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS
  22 BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  23 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  24 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
  25 BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
  26 WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE
  27 OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN
  28 IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29
  30
  31 Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2013 Free Software
  32 Foundation, Inc.
  33
  34 This file is not considered part of GNU Emacs.
  35
  36 This program is free software: you can redistribute it and/or modify
  37 it under the terms of the GNU General Public License as published by
  38 the Free Software Foundation, either version 3 of the License, or
  39 (at your option) any later version.
  40
  41 This program is distributed in the hope that it will be useful,
  42 but WITHOUT ANY WARRANTY; without even the implied warranty of
  43 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  44 GNU General Public License for more details.
  45
  46 You should have received a copy of the GNU General Public License
  47 along with this program.  If not, see <http://www.gnu.org/licenses/>.  */
  48
  49
  50 /* NB To comply with the above BSD license, copyright information is
  51 reproduced in etc/ETAGS.README.  That file should be updated when the
  52 above notices are.
  53
  54 To the best of our knowledge, this code was originally based on the
  55 ctags.c distributed with BSD4.2, which was copyrighted by the
  56 University of California, as described above. */
  57
  58
  59 /*
  60  * Authors:
  61  * 1983 Ctags originally by Ken Arnold.
  62  * 1984 Fortran added by Jim Kleckner.
  63  * 1984 Ed Pelegri-Llopart added C typedefs.
  64  * 1985 Emacs TAGS format by Richard Stallman.
  65  * 1989 Sam Kendall added C++.
  66  * 1992 Joseph B. Wells improved C and C++ parsing.
  67  * 1993 Francesco Potortì reorganized C and C++.
  68  * 1994 Line-by-line regexp tags by Tom Tromey.
  69  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  70  * 2002 #line directives by Francesco Potortì.
  71  *
  72  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  73  */
  74
  75 /*
  76  * If you want to add support for a new language, start by looking at the LUA
  77  * language, which is the simplest.  Alternatively, consider distributing etags
  78  * together with a configuration file containing regexp definitions for etags.
  79  */
  80
  81 char pot_etags_version[] = "@(#) pot revision number is 17.38.1.4";
  82
  83 #define TRUE    1
  84 #define FALSE   0
  85
  86 #ifdef DEBUG
  87 #  undef DEBUG
  88 #  define DEBUG TRUE
  89 #else
  90 #  define DEBUG  FALSE
  91 #  define NDEBUG                /* disable assert */
  92 #endif
  93
  94 #include <config.h>
  95
  96 #ifndef _GNU_SOURCE
  97 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  98 #endif
  99
 100 /* WIN32_NATIVE is for XEmacs.
 101    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
 102 #ifdef WIN32_NATIVE
 103 # undef MSDOS
 104 # undef  WINDOWSNT
 105 # define WINDOWSNT
 106 #endif /* WIN32_NATIVE */
 107
 108 #ifdef MSDOS
 109 # undef MSDOS
 110 # define MSDOS TRUE
 111 # include <fcntl.h>
 112 # include <sys/param.h>
 113 # include <io.h>
 114 #else
 115 # define MSDOS FALSE
 116 #endif /* MSDOS */
 117
 118 #ifdef WINDOWSNT
 119 # include <fcntl.h>
 120 # include <direct.h>
 121 # include <io.h>
 122 # define MAXPATHLEN _MAX_PATH
 123 # undef HAVE_NTGUI
 124 # undef  DOS_NT
 125 # define DOS_NT
 126 #endif /* WINDOWSNT */
 127
 128 #include <unistd.h>
 129 #include <stdarg.h>
 130 #include <stdlib.h>
 131 #include <string.h>
 132 #include <stdio.h>
 133 #include <ctype.h>
 134 #include <errno.h>
 135 #include <sys/types.h>
 136 #include <sys/stat.h>
 137 #include <c-strcase.h>
 138
 139 #include <assert.h>
 140 #ifdef NDEBUG
 141 # undef  assert                 /* some systems have a buggy assert.h */
 142 # define assert(x) ((void) 0)
 143 #endif
 144
 145 #include <getopt.h>
 146 #include <regex.h>
 147
 148 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 149  Leave it undefined to make the program "etags", which makes emacs-style
 150  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 151 #ifdef CTAGS
 152 # undef  CTAGS
 153 # define CTAGS TRUE
 154 #else
 155 # define CTAGS FALSE
 156 #endif
 157
 158 #define streq(s,t)      (assert ((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 159 #define strcaseeq(s,t)  (assert ((s)!=NULL && (t)!=NULL), !c_strcasecmp (s, t))
 160 #define strneq(s,t,n)   (assert ((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 161 #define strncaseeq(s,t,n) (assert ((s)!=NULL && (t)!=NULL), !c_strncasecmp (s, t, n))
 162
 163 #define CHARS 256               /* 2^sizeof(char) */
 164 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 165 #define iswhite(c)      (_wht[CHAR (c)]) /* c is white (see white) */
 166 #define notinname(c)    (_nin[CHAR (c)]) /* c is not in a name (see nonam) */
 167 #define begtoken(c)     (_btk[CHAR (c)]) /* c can start token (see begtk) */
 168 #define intoken(c)      (_itk[CHAR (c)]) /* c can be in token (see midtk) */
 169 #define endtoken(c)     (_etk[CHAR (c)]) /* c ends tokens (see endtk) */
 170
 171 #define ISALNUM(c)      isalnum (CHAR (c))
 172 #define ISALPHA(c)      isalpha (CHAR (c))
 173 #define ISDIGIT(c)      isdigit (CHAR (c))
 174 #define ISLOWER(c)      islower (CHAR (c))
 175
 176 #define lowcase(c)      tolower (CHAR (c))
 177
 178
 179 /*
 180  *      xnew, xrnew -- allocate, reallocate storage
 181  *
 182  * SYNOPSIS:    Type *xnew (int n, Type);
 183  *              void xrnew (OldPointer, int n, Type);
 184  */
 185 #if DEBUG
 186 # include "chkmalloc.h"
 187 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 188                                                   (n) * sizeof (Type)))
 189 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 190                                         (char *) (op), (n) * sizeof (Type)))
 191 #else
 192 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 193 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 194                                         (char *) (op), (n) * sizeof (Type)))
 195 #endif
 196
 197 #define bool int
 198
 199 typedef void Lang_function (FILE *);
 200
 201 typedef struct
 202 {
 203   const char *suffix;           /* file name suffix for this compressor */
 204   const char *command;          /* takes one arg and decompresses to stdout */
 205 } compressor;
 206
 207 typedef struct
 208 {
 209   const char *name;             /* language name */
 210   const char *help;             /* detailed help for the language */
 211   Lang_function *function;      /* parse function */
 212   const char **suffixes;        /* name suffixes of this language's files */
 213   const char **filenames;       /* names of this language's files */
 214   const char **interpreters;    /* interpreters for this language */
 215   bool metasource;              /* source used to generate other sources */
 216 } language;
 217
 218 typedef struct fdesc
 219 {
 220   struct fdesc *next;           /* for the linked list */
 221   char *infname;                /* uncompressed input file name */
 222   char *infabsname;             /* absolute uncompressed input file name */
 223   char *infabsdir;              /* absolute dir of input file */
 224   char *taggedfname;            /* file name to write in tagfile */
 225   language *lang;               /* language of file */
 226   char *prop;                   /* file properties to write in tagfile */
 227   bool usecharno;               /* etags tags shall contain char number */
 228   bool written;                 /* entry written in the tags file */
 229 } fdesc;
 230
 231 typedef struct node_st
 232 {                               /* sorting structure */
 233   struct node_st *left, *right; /* left and right sons */
 234   fdesc *fdp;                   /* description of file to whom tag belongs */
 235   char *name;                   /* tag name */
 236   char *regex;                  /* search regexp */
 237   bool valid;                   /* write this tag on the tag file */
 238   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 239   bool been_warned;             /* warning already given for duplicated tag */
 240   int lno;                      /* line number tag is on */
 241   long cno;                     /* character number line starts on */
 242 } node;
 243
 244 /*
 245  * A `linebuffer' is a structure which holds a line of text.
 246  * `readline_internal' reads a line from a stream into a linebuffer
 247  * and works regardless of the length of the line.
 248  * SIZE is the size of BUFFER, LEN is the length of the string in
 249  * BUFFER after readline reads it.
 250  */
 251 typedef struct
 252 {
 253   long size;
 254   int len;
 255   char *buffer;
 256 } linebuffer;
 257
 258 /* Used to support mixing of --lang and file names. */
 259 typedef struct
 260 {
 261   enum {
 262     at_language,                /* a language specification */
 263     at_regexp,                  /* a regular expression */
 264     at_filename,                /* a file name */
 265     at_stdin,                   /* read from stdin here */
 266     at_end                      /* stop parsing the list */
 267   } arg_type;                   /* argument type */
 268   language *lang;               /* language associated with the argument */
 269   char *what;                   /* the argument itself */
 270 } argument;
 271
 272 /* Structure defining a regular expression. */
 273 typedef struct regexp
 274 {
 275   struct regexp *p_next;        /* pointer to next in list */
 276   language *lang;               /* if set, use only for this language */
 277   char *pattern;                /* the regexp pattern */
 278   char *name;                   /* tag name */
 279   struct re_pattern_buffer *pat; /* the compiled pattern */
 280   struct re_registers regs;     /* re registers */
 281   bool error_signaled;          /* already signaled for this regexp */
 282   bool force_explicit_name;     /* do not allow implicit tag name */
 283   bool ignore_case;             /* ignore case when matching */
 284   bool multi_line;              /* do a multi-line match on the whole file */
 285 } regexp;
 286
 287
 288 /* Many compilers barf on this:
 289         Lang_function Ada_funcs;
 290    so let's write it this way */
 291 static void Ada_funcs (FILE *);
 292 static void Asm_labels (FILE *);
 293 static void C_entries (int c_ext, FILE *);
 294 static void default_C_entries (FILE *);
 295 static void plain_C_entries (FILE *);
 296 static void Cjava_entries (FILE *);
 297 static void Cobol_paragraphs (FILE *);
 298 static void Cplusplus_entries (FILE *);
 299 static void Cstar_entries (FILE *);
 300 static void Erlang_functions (FILE *);
 301 static void Forth_words (FILE *);
 302 static void Fortran_functions (FILE *);
 303 static void HTML_labels (FILE *);
 304 static void Lisp_functions (FILE *);
 305 static void Lua_functions (FILE *);
 306 static void Makefile_targets (FILE *);
 307 static void Pascal_functions (FILE *);
 308 static void Perl_functions (FILE *);
 309 static void PHP_functions (FILE *);
 310 static void PS_functions (FILE *);
 311 static void Prolog_functions (FILE *);
 312 static void Python_functions (FILE *);
 313 static void Scheme_functions (FILE *);
 314 static void TeX_commands (FILE *);
 315 static void Texinfo_nodes (FILE *);
 316 static void Yacc_entries (FILE *);
 317 static void just_read_file (FILE *);
 318
 319 static language *get_language_from_langname (const char *);
 320 static void readline (linebuffer *, FILE *);
 321 static long readline_internal (linebuffer *, FILE *);
 322 static bool nocase_tail (const char *);
 323 static void get_tag (char *, char **);
 324
 325 static void analyse_regex (char *);
 326 static void free_regexps (void);
 327 static void regex_tag_multiline (void);
 328 static void error (const char *, ...) ATTRIBUTE_FORMAT_PRINTF (1, 2);
 329 static _Noreturn void suggest_asking_for_help (void);
 330 _Noreturn void fatal (const char *, const char *);
 331 static _Noreturn void pfatal (const char *);
 332 static void add_node (node *, node **);
 333
 334 static void init (void);
 335 static void process_file_name (char *, language *);
 336 static void process_file (FILE *, char *, language *);
 337 static void find_entries (FILE *);
 338 static void free_tree (node *);
 339 static void free_fdesc (fdesc *);
 340 static void pfnote (char *, bool, char *, int, int, long);
 341 static void invalidate_nodes (fdesc *, node **);
 342 static void put_entries (node *);
 343
 344 static char *concat (const char *, const char *, const char *);
 345 static char *skip_spaces (char *);
 346 static char *skip_non_spaces (char *);
 347 static char *skip_name (char *);
 348 static char *savenstr (const char *, int);
 349 static char *savestr (const char *);
 350 static char *etags_strchr (const char *, int);
 351 static char *etags_strrchr (const char *, int);
 352 static char *etags_getcwd (void);
 353 static char *relative_filename (char *, char *);
 354 static char *absolute_filename (char *, char *);
 355 static char *absolute_dirname (char *, char *);
 356 static bool filename_is_absolute (char *f);
 357 static void canonicalize_filename (char *);
 358 static void linebuffer_init (linebuffer *);
 359 static void linebuffer_setlen (linebuffer *, int);
 360 static void *xmalloc (size_t);
 361 static void *xrealloc (char *, size_t);
 362
 363 \f
 364 static char searchar = '/';     /* use /.../ searches */
 365
 366 static char *tagfile;           /* output file */
 367 static char *progname;          /* name this program was invoked with */
 368 static char *cwd;               /* current working directory */
 369 static char *tagfiledir;        /* directory of tagfile */
 370 static FILE *tagf;              /* ioptr for tags file */
 371 static ptrdiff_t whatlen_max;   /* maximum length of any 'what' member */
 372
 373 static fdesc *fdhead;           /* head of file description list */
 374 static fdesc *curfdp;           /* current file description */
 375 static int lineno;              /* line number of current line */
 376 static long charno;             /* current character number */
 377 static long linecharno;         /* charno of start of current line */
 378 static char *dbp;               /* pointer to start of current tag */
 379
 380 static const int invalidcharno = -1;
 381
 382 static node *nodehead;          /* the head of the binary tree of tags */
 383 static node *last_node;         /* the last node created */
 384
 385 static linebuffer lb;           /* the current line */
 386 static linebuffer filebuf;      /* a buffer containing the whole file */
 387 static linebuffer token_name;   /* a buffer containing a tag name */
 388
 389 /* boolean "functions" (see init)       */
 390 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 391 static const char
 392   /* white chars */
 393   *white = " \f\t\n\r\v",
 394   /* not in a name */
 395   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 396   /* token ending chars */
 397   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 398   /* token starting chars */
 399   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 400   /* valid in-token chars */
 401   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 402
 403 static bool append_to_tagfile;  /* -a: append to tags */
 404 /* The next five default to TRUE in C and derived languages.  */
 405 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 406 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 407                                 /* 0 struct/enum/union decls, and C++ */
 408                                 /* member functions. */
 409 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 410                                 /* constants and variables. */
 411                                 /* -D: opposite of -d.  Default under ctags. */
 412 static bool globals;            /* create tags for global variables */
 413 static bool members;            /* create tags for C member variables */
 414 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 415 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 416 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 417 static bool update;             /* -u: update tags */
 418 static bool vgrind_style;       /* -v: create vgrind style index output */
 419 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 420 static bool cxref_style;        /* -x: create cxref style output */
 421 static bool cplusplus;          /* .[hc] means C++, not C (undocumented) */
 422 static bool ignoreindent;       /* -I: ignore indentation in C */
 423 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 424
 425 /* STDIN is defined in LynxOS system headers */
 426 #ifdef STDIN
 427 # undef STDIN
 428 #endif
 429
 430 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 431 static bool parsing_stdin;      /* --parse-stdin used */
 432
 433 static regexp *p_head;          /* list of all regexps */
 434 static bool need_filebuf;       /* some regexes are multi-line */
 435
 436 static struct option longopts[] =
 437 {
 438   { "append",             no_argument,       NULL,               'a'   },
 439   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 440   { "c++",                no_argument,       NULL,               'C'   },
 441   { "declarations",       no_argument,       &declarations,      TRUE  },
 442   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 443   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 444   { "help",               no_argument,       NULL,               'h'   },
 445   { "help",               no_argument,       NULL,               'H'   },
 446   { "ignore-indentation", no_argument,       NULL,               'I'   },
 447   { "language",           required_argument, NULL,               'l'   },
 448   { "members",            no_argument,       &members,           TRUE  },
 449   { "no-members",         no_argument,       &members,           FALSE },
 450   { "output",             required_argument, NULL,               'o'   },
 451   { "regex",              required_argument, NULL,               'r'   },
 452   { "no-regex",           no_argument,       NULL,               'R'   },
 453   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 454   { "parse-stdin",        required_argument, NULL,               STDIN },
 455   { "version",            no_argument,       NULL,               'V'   },
 456
 457 #if CTAGS /* Ctags options */
 458   { "backward-search",    no_argument,       NULL,               'B'   },
 459   { "cxref",              no_argument,       NULL,               'x'   },
 460   { "defines",            no_argument,       NULL,               'd'   },
 461   { "globals",            no_argument,       &globals,           TRUE  },
 462   { "typedefs",           no_argument,       NULL,               't'   },
 463   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 464   { "update",             no_argument,       NULL,               'u'   },
 465   { "vgrind",             no_argument,       NULL,               'v'   },
 466   { "no-warn",            no_argument,       NULL,               'w'   },
 467
 468 #else /* Etags options */
 469   { "no-defines",         no_argument,       NULL,               'D'   },
 470   { "no-globals",         no_argument,       &globals,           FALSE },
 471   { "include",            required_argument, NULL,               'i'   },
 472 #endif
 473   { NULL }
 474 };
 475
 476 static compressor compressors[] =
 477 {
 478   { "z", "gzip -d -c"},
 479   { "Z", "gzip -d -c"},
 480   { "gz", "gzip -d -c"},
 481   { "GZ", "gzip -d -c"},
 482   { "bz2", "bzip2 -d -c" },
 483   { "xz", "xz -d -c" },
 484   { NULL }
 485 };
 486
 487 /*
 488  * Language stuff.
 489  */
 490
 491 /* Ada code */
 492 static const char *Ada_suffixes [] =
 493   { "ads", "adb", "ada", NULL };
 494 static const char Ada_help [] =
 495 "In Ada code, functions, procedures, packages, tasks and types are\n\
 496 tags.  Use the `--packages-only' option to create tags for\n\
 497 packages only.\n\
 498 Ada tag names have suffixes indicating the type of entity:\n\
 499         Entity type:    Qualifier:\n\
 500         ------------    ----------\n\
 501         function        /f\n\
 502         procedure       /p\n\
 503         package spec    /s\n\
 504         package body    /b\n\
 505         type            /t\n\
 506         task            /k\n\
 507 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 508 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 509 will just search for any tag `bidule'.";
 510
 511 /* Assembly code */
 512 static const char *Asm_suffixes [] =
 513   { "a",        /* Unix assembler */
 514     "asm", /* Microcontroller assembly */
 515     "def", /* BSO/Tasking definition includes  */
 516     "inc", /* Microcontroller include files */
 517     "ins", /* Microcontroller include files */
 518     "s", "sa", /* Unix assembler */
 519     "S",   /* cpp-processed Unix assembler */
 520     "src", /* BSO/Tasking C compiler output */
 521     NULL
 522   };
 523 static const char Asm_help [] =
 524 "In assembler code, labels appearing at the beginning of a line,\n\
 525 followed by a colon, are tags.";
 526
 527
 528 /* Note that .c and .h can be considered C++, if the --c++ flag was
 529    given, or if the `class' or `template' keywords are met inside the file.
 530    That is why default_C_entries is called for these. */
 531 static const char *default_C_suffixes [] =
 532   { "c", "h", NULL };
 533 #if CTAGS                               /* C help for Ctags */
 534 static const char default_C_help [] =
 535 "In C code, any C function is a tag.  Use -t to tag typedefs.\n\
 536 Use -T to tag definitions of `struct', `union' and `enum'.\n\
 537 Use -d to tag `#define' macro definitions and `enum' constants.\n\
 538 Use --globals to tag global variables.\n\
 539 You can tag function declarations and external variables by\n\
 540 using `--declarations', and struct members by using `--members'.";
 541 #else                                   /* C help for Etags */
 542 static const char default_C_help [] =
 543 "In C code, any C function or typedef is a tag, and so are\n\
 544 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 545 definitions and `enum' constants are tags unless you specify\n\
 546 `--no-defines'.  Global variables are tags unless you specify\n\
 547 `--no-globals' and so are struct members unless you specify\n\
 548 `--no-members'.  Use of `--no-globals', `--no-defines' and\n\
 549 `--no-members' can make the tags table file much smaller.\n\
 550 You can tag function declarations and external variables by\n\
 551 using `--declarations'.";
 552 #endif  /* C help for Ctags and Etags */
 553
 554 static const char *Cplusplus_suffixes [] =
 555   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 556     "M",                        /* Objective C++ */
 557     "pdb",                      /* PostScript with C syntax */
 558     NULL };
 559 static const char Cplusplus_help [] =
 560 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 561 --help --lang=c --lang=c++ for full help.)\n\
 562 In addition to C tags, member functions are also recognized.  Member\n\
 563 variables are recognized unless you use the `--no-members' option.\n\
 564 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 565 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 566 `operator+'.";
 567
 568 static const char *Cjava_suffixes [] =
 569   { "java", NULL };
 570 static char Cjava_help [] =
 571 "In Java code, all the tags constructs of C and C++ code are\n\
 572 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 573
 574
 575 static const char *Cobol_suffixes [] =
 576   { "COB", "cob", NULL };
 577 static char Cobol_help [] =
 578 "In Cobol code, tags are paragraph names; that is, any word\n\
 579 starting in column 8 and followed by a period.";
 580
 581 static const char *Cstar_suffixes [] =
 582   { "cs", "hs", NULL };
 583
 584 static const char *Erlang_suffixes [] =
 585   { "erl", "hrl", NULL };
 586 static const char Erlang_help [] =
 587 "In Erlang code, the tags are the functions, records and macros\n\
 588 defined in the file.";
 589
 590 const char *Forth_suffixes [] =
 591   { "fth", "tok", NULL };
 592 static const char Forth_help [] =
 593 "In Forth code, tags are words defined by `:',\n\
 594 constant, code, create, defer, value, variable, buffer:, field.";
 595
 596 static const char *Fortran_suffixes [] =
 597   { "F", "f", "f90", "for", NULL };
 598 static const char Fortran_help [] =
 599 "In Fortran code, functions, subroutines and block data are tags.";
 600
 601 static const char *HTML_suffixes [] =
 602   { "htm", "html", "shtml", NULL };
 603 static const char HTML_help [] =
 604 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 605 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 606 occurrences of `id='.";
 607
 608 static const char *Lisp_suffixes [] =
 609   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 610 static const char Lisp_help [] =
 611 "In Lisp code, any function defined with `defun', any variable\n\
 612 defined with `defvar' or `defconst', and in general the first\n\
 613 argument of any expression that starts with `(def' in column zero\n\
 614 is a tag.\n\
 615 The `--declarations' option tags \"(defvar foo)\" constructs too.";
 616
 617 static const char *Lua_suffixes [] =
 618   { "lua", "LUA", NULL };
 619 static const char Lua_help [] =
 620 "In Lua scripts, all functions are tags.";
 621
 622 static const char *Makefile_filenames [] =
 623   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 624 static const char Makefile_help [] =
 625 "In makefiles, targets are tags; additionally, variables are tags\n\
 626 unless you specify `--no-globals'.";
 627
 628 static const char *Objc_suffixes [] =
 629   { "lm",                       /* Objective lex file */
 630     "m",                        /* Objective C file */
 631      NULL };
 632 static const char Objc_help [] =
 633 "In Objective C code, tags include Objective C definitions for classes,\n\
 634 class categories, methods and protocols.  Tags for variables and\n\
 635 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 636 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 637
 638 static const char *Pascal_suffixes [] =
 639   { "p", "pas", NULL };
 640 static const char Pascal_help [] =
 641 "In Pascal code, the tags are the functions and procedures defined\n\
 642 in the file.";
 643 /* " // this is for working around an Emacs highlighting bug... */
 644
 645 static const char *Perl_suffixes [] =
 646   { "pl", "pm", NULL };
 647 static const char *Perl_interpreters [] =
 648   { "perl", "@PERL@", NULL };
 649 static const char Perl_help [] =
 650 "In Perl code, the tags are the packages, subroutines and variables\n\
 651 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 652 `--globals' if you want to tag global variables.  Tags for\n\
 653 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 654 defined in the default package is `main::SUB'.";
 655
 656 static const char *PHP_suffixes [] =
 657   { "php", "php3", "php4", NULL };
 658 static const char PHP_help [] =
 659 "In PHP code, tags are functions, classes and defines.  Unless you use\n\
 660 the `--no-members' option, vars are tags too.";
 661
 662 static const char *plain_C_suffixes [] =
 663   { "pc",                       /* Pro*C file */
 664      NULL };
 665
 666 static const char *PS_suffixes [] =
 667   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 668 static const char PS_help [] =
 669 "In PostScript code, the tags are the functions.";
 670
 671 static const char *Prolog_suffixes [] =
 672   { "prolog", NULL };
 673 static const char Prolog_help [] =
 674 "In Prolog code, tags are predicates and rules at the beginning of\n\
 675 line.";
 676
 677 static const char *Python_suffixes [] =
 678   { "py", NULL };
 679 static const char Python_help [] =
 680 "In Python code, `def' or `class' at the beginning of a line\n\
 681 generate a tag.";
 682
 683 /* Can't do the `SCM' or `scm' prefix with a version number. */
 684 static const char *Scheme_suffixes [] =
 685   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 686 static const char Scheme_help [] =
 687 "In Scheme code, tags include anything defined with `def' or with a\n\
 688 construct whose name starts with `def'.  They also include\n\
 689 variables set with `set!' at top level in the file.";
 690
 691 static const char *TeX_suffixes [] =
 692   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 693 static const char TeX_help [] =
 694 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 695 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 696 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 697 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 698 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 699 \n\
 700 Other commands can be specified by setting the environment variable\n\
 701 `TEXTAGS' to a colon-separated list like, for example,\n\
 702      TEXTAGS=\"mycommand:myothercommand\".";
 703
 704
 705 static const char *Texinfo_suffixes [] =
 706   { "texi", "texinfo", "txi", NULL };
 707 static const char Texinfo_help [] =
 708 "for texinfo files, lines starting with @node are tagged.";
 709
 710 static const char *Yacc_suffixes [] =
 711   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 712 static const char Yacc_help [] =
 713 "In Bison or Yacc input files, each rule defines as a tag the\n\
 714 nonterminal it constructs.  The portions of the file that contain\n\
 715 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 716 for full help).";
 717
 718 static const char auto_help [] =
 719 "`auto' is not a real language, it indicates to use\n\
 720 a default language for files base on file name suffix and file contents.";
 721
 722 static const char none_help [] =
 723 "`none' is not a real language, it indicates to only do\n\
 724 regexp processing on files.";
 725
 726 static const char no_lang_help [] =
 727 "No detailed help available for this language.";
 728
 729
 730 /*
 731  * Table of languages.
 732  *
 733  * It is ok for a given function to be listed under more than one
 734  * name.  I just didn't.
 735  */
 736
 737 static language lang_names [] =
 738 {
 739   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 740   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 741   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 742   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 743   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 744   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 745   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 746   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 747   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 748   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 749   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 750   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 751   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 752   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 753   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 754   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 755   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 756   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 757   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 758   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 759   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 760   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 761   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 762   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 763   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 764   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 765   { "auto",      auto_help },                      /* default guessing scheme */
 766   { "none",      none_help,      just_read_file }, /* regexp matching only */
 767   { NULL }                /* end of list */
 768 };
 769
 770 \f
 771 static void
 772 print_language_names (void)
 773 {
 774   language *lang;
 775   const char **name, **ext;
 776
 777   puts ("\nThese are the currently supported languages, along with the\n\
 778 default file names and dot suffixes:");
 779   for (lang = lang_names; lang->name != NULL; lang++)
 780     {
 781       printf ("  %-*s", 10, lang->name);
 782       if (lang->filenames != NULL)
 783         for (name = lang->filenames; *name != NULL; name++)
 784           printf (" %s", *name);
 785       if (lang->suffixes != NULL)
 786         for (ext = lang->suffixes; *ext != NULL; ext++)
 787           printf (" .%s", *ext);
 788       puts ("");
 789     }
 790   puts ("where `auto' means use default language for files based on file\n\
 791 name suffix, and `none' means only do regexp processing on files.\n\
 792 If no language is specified and no matching suffix is found,\n\
 793 the first line of the file is read for a sharp-bang (#!) sequence\n\
 794 followed by the name of an interpreter.  If no such sequence is found,\n\
 795 Fortran is tried first; if no tags are found, C is tried next.\n\
 796 When parsing any C file, a \"class\" or \"template\" keyword\n\
 797 switches to C++.");
 798   puts ("Compressed files are supported using gzip, bzip2, and xz.\n\
 799 \n\
 800 For detailed help on a given language use, for example,\n\
 801 etags --help --lang=ada.");
 802 }
 803
 804 #ifndef EMACS_NAME
 805 # define EMACS_NAME "standalone"
 806 #endif
 807 #ifndef VERSION
 808 # define VERSION "17.38.1.4"
 809 #endif
 810 static _Noreturn void
 811 print_version (void)
 812 {
 813   char emacs_copyright[] = COPYRIGHT;
 814
 815   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 816   puts (emacs_copyright);
 817   puts ("This program is distributed under the terms in ETAGS.README");
 818
 819   exit (EXIT_SUCCESS);
 820 }
 821
 822 #ifndef PRINT_UNDOCUMENTED_OPTIONS_HELP
 823 # define PRINT_UNDOCUMENTED_OPTIONS_HELP FALSE
 824 #endif
 825
 826 static _Noreturn void
 827 print_help (argument *argbuffer)
 828 {
 829   bool help_for_lang = FALSE;
 830
 831   for (; argbuffer->arg_type != at_end; argbuffer++)
 832     if (argbuffer->arg_type == at_language)
 833       {
 834         if (help_for_lang)
 835           puts ("");
 836         puts (argbuffer->lang->help);
 837         help_for_lang = TRUE;
 838       }
 839
 840   if (help_for_lang)
 841     exit (EXIT_SUCCESS);
 842
 843   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 844 \n\
 845 These are the options accepted by %s.\n", progname, progname);
 846   puts ("You may use unambiguous abbreviations for the long option names.");
 847   puts ("  A - as file name means read names from stdin (one per line).\n\
 848 Absolute names are stored in the output file as they are.\n\
 849 Relative ones are stored relative to the output file's directory.\n");
 850
 851   puts ("-a, --append\n\
 852         Append tag entries to existing tags file.");
 853
 854   puts ("--packages-only\n\
 855         For Ada files, only generate tags for packages.");
 856
 857   if (CTAGS)
 858     puts ("-B, --backward-search\n\
 859         Write the search commands for the tag entries using '?', the\n\
 860         backward-search command instead of '/', the forward-search command.");
 861
 862   /* This option is mostly obsolete, because etags can now automatically
 863      detect C++.  Retained for backward compatibility and for debugging and
 864      experimentation.  In principle, we could want to tag as C++ even
 865      before any "class" or "template" keyword.
 866   puts ("-C, --c++\n\
 867         Treat files whose name suffix defaults to C language as C++ files.");
 868   */
 869
 870   puts ("--declarations\n\
 871         In C and derived languages, create tags for function declarations,");
 872   if (CTAGS)
 873     puts ("\tand create tags for extern variables if --globals is used.");
 874   else
 875     puts
 876       ("\tand create tags for extern variables unless --no-globals is used.");
 877
 878   if (CTAGS)
 879     puts ("-d, --defines\n\
 880         Create tag entries for C #define constants and enum constants, too.");
 881   else
 882     puts ("-D, --no-defines\n\
 883         Don't create tag entries for C #define constants and enum constants.\n\
 884         This makes the tags file smaller.");
 885
 886   if (!CTAGS)
 887     puts ("-i FILE, --include=FILE\n\
 888         Include a note in tag file indicating that, when searching for\n\
 889         a tag, one should also consult the tags file FILE after\n\
 890         checking the current file.");
 891
 892   puts ("-l LANG, --language=LANG\n\
 893         Force the following files to be considered as written in the\n\
 894         named language up to the next --language=LANG option.");
 895
 896   if (CTAGS)
 897     puts ("--globals\n\
 898         Create tag entries for global variables in some languages.");
 899   else
 900     puts ("--no-globals\n\
 901         Do not create tag entries for global variables in some\n\
 902         languages.  This makes the tags file smaller.");
 903
 904   if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 905     puts ("--no-line-directive\n\
 906         Ignore #line preprocessor directives in C and derived languages.");
 907
 908   if (CTAGS)
 909     puts ("--members\n\
 910         Create tag entries for members of structures in some languages.");
 911   else
 912     puts ("--no-members\n\
 913         Do not create tag entries for members of structures\n\
 914         in some languages.");
 915
 916   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 917         Make a tag for each line matching a regular expression pattern\n\
 918         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 919         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 920         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 921         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 922   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 923         For example Tcl named tags can be created with:\n\
 924           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 925         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 926         `m' means to allow multi-line matches, `s' implies `m' and\n\
 927         causes dot to match any character, including newline.");
 928
 929   puts ("-R, --no-regex\n\
 930         Don't create tags from regexps for the following files.");
 931
 932   puts ("-I, --ignore-indentation\n\
 933         In C and C++ do not assume that a closing brace in the first\n\
 934         column is the final brace of a function or structure definition.");
 935
 936   puts ("-o FILE, --output=FILE\n\
 937         Write the tags to FILE.");
 938
 939   puts ("--parse-stdin=NAME\n\
 940         Read from standard input and record tags as belonging to file NAME.");
 941
 942   if (CTAGS)
 943     {
 944       puts ("-t, --typedefs\n\
 945         Generate tag entries for C and Ada typedefs.");
 946       puts ("-T, --typedefs-and-c++\n\
 947         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 948         and C++ member functions.");
 949     }
 950
 951   if (CTAGS)
 952     puts ("-u, --update\n\
 953         Update the tag entries for the given files, leaving tag\n\
 954         entries for other files in place.  Currently, this is\n\
 955         implemented by deleting the existing entries for the given\n\
 956         files and then rewriting the new entries at the end of the\n\
 957         tags file.  It is often faster to simply rebuild the entire\n\
 958         tag file than to use this.");
 959
 960   if (CTAGS)
 961     {
 962       puts ("-v, --vgrind\n\
 963         Print on the standard output an index of items intended for\n\
 964         human consumption, similar to the output of vgrind.  The index\n\
 965         is sorted, and gives the page number of each item.");
 966
 967       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 968         puts ("-w, --no-duplicates\n\
 969         Do not create duplicate tag entries, for compatibility with\n\
 970         traditional ctags.");
 971
 972       if (PRINT_UNDOCUMENTED_OPTIONS_HELP)
 973         puts ("-w, --no-warn\n\
 974         Suppress warning messages about duplicate tag entries.");
 975
 976       puts ("-x, --cxref\n\
 977         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 978         The output uses line numbers instead of page numbers, but\n\
 979         beyond that the differences are cosmetic; try both to see\n\
 980         which you like.");
 981     }
 982
 983   puts ("-V, --version\n\
 984         Print the version of the program.\n\
 985 -h, --help\n\
 986         Print this help message.\n\
 987         Followed by one or more `--language' options prints detailed\n\
 988         help about tag generation for the specified languages.");
 989
 990   print_language_names ();
 991
 992   puts ("");
 993   puts ("Report bugs to bug-gnu-emacs@gnu.org");
 994
 995   exit (EXIT_SUCCESS);
 996 }
 997
 998 \f
 999 int
1000 main (int argc, char **argv)
1001 {
1002   int i;
1003   unsigned int nincluded_files;
1004   char **included_files;
1005   argument *argbuffer;
1006   int current_arg, file_count;
1007   linebuffer filename_lb;
1008   bool help_asked = FALSE;
1009   ptrdiff_t len;
1010  char *optstring;
1011  int opt;
1012
1013
1014 #ifdef DOS_NT
1015   _fmode = O_BINARY;   /* all of files are treated as binary files */
1016 #endif /* DOS_NT */
1017
1018   progname = argv[0];
1019   nincluded_files = 0;
1020   included_files = xnew (argc, char *);
1021   current_arg = 0;
1022   file_count = 0;
1023
1024   /* Allocate enough no matter what happens.  Overkill, but each one
1025      is small. */
1026   argbuffer = xnew (argc, argument);
1027
1028   /*
1029    * Always find typedefs and structure tags.
1030    * Also default to find macro constants, enum constants, struct
1031    * members and global variables.  Do it for both etags and ctags.
1032    */
1033   typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1034   globals = members = TRUE;
1035
1036   /* When the optstring begins with a '-' getopt_long does not rearrange the
1037      non-options arguments to be at the end, but leaves them alone. */
1038   optstring = concat ("-ac:Cf:Il:o:r:RSVhH",
1039                       (CTAGS) ? "BxdtTuvw" : "Di:",
1040                       "");
1041
1042   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1043     switch (opt)
1044       {
1045       case 0:
1046         /* If getopt returns 0, then it has already processed a
1047            long-named option.  We should do nothing.  */
1048         break;
1049
1050       case 1:
1051         /* This means that a file name has been seen.  Record it. */
1052         argbuffer[current_arg].arg_type = at_filename;
1053         argbuffer[current_arg].what     = optarg;
1054         len = strlen (optarg);
1055         if (whatlen_max < len)
1056           whatlen_max = len;
1057         ++current_arg;
1058         ++file_count;
1059         break;
1060
1061       case STDIN:
1062         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1063         argbuffer[current_arg].arg_type = at_stdin;
1064         argbuffer[current_arg].what     = optarg;
1065         len = strlen (optarg);
1066         if (whatlen_max < len)
1067           whatlen_max = len;
1068         ++current_arg;
1069         ++file_count;
1070         if (parsing_stdin)
1071           fatal ("cannot parse standard input more than once", (char *)NULL);
1072         parsing_stdin = TRUE;
1073         break;
1074
1075         /* Common options. */
1076       case 'a': append_to_tagfile = TRUE;       break;
1077       case 'C': cplusplus = TRUE;               break;
1078       case 'f':         /* for compatibility with old makefiles */
1079       case 'o':
1080         if (tagfile)
1081           {
1082             error ("-o option may only be given once.");
1083             suggest_asking_for_help ();
1084             /* NOTREACHED */
1085           }
1086         tagfile = optarg;
1087         break;
1088       case 'I':
1089       case 'S':         /* for backward compatibility */
1090         ignoreindent = TRUE;
1091         break;
1092       case 'l':
1093         {
1094           language *lang = get_language_from_langname (optarg);
1095           if (lang != NULL)
1096             {
1097               argbuffer[current_arg].lang = lang;
1098               argbuffer[current_arg].arg_type = at_language;
1099               ++current_arg;
1100             }
1101         }
1102         break;
1103       case 'c':
1104         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1105         optarg = concat (optarg, "i", ""); /* memory leak here */
1106         /* FALLTHRU */
1107       case 'r':
1108         argbuffer[current_arg].arg_type = at_regexp;
1109         argbuffer[current_arg].what = optarg;
1110         len = strlen (optarg);
1111         if (whatlen_max < len)
1112           whatlen_max = len;
1113         ++current_arg;
1114         break;
1115       case 'R':
1116         argbuffer[current_arg].arg_type = at_regexp;
1117         argbuffer[current_arg].what = NULL;
1118         ++current_arg;
1119         break;
1120       case 'V':
1121         print_version ();
1122         break;
1123       case 'h':
1124       case 'H':
1125         help_asked = TRUE;
1126         break;
1127
1128         /* Etags options */
1129       case 'D': constantypedefs = FALSE;                        break;
1130       case 'i': included_files[nincluded_files++] = optarg;     break;
1131
1132         /* Ctags options. */
1133       case 'B': searchar = '?';                                 break;
1134       case 'd': constantypedefs = TRUE;                         break;
1135       case 't': typedefs = TRUE;                                break;
1136       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1137       case 'u': update = TRUE;                                  break;
1138       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1139       case 'x': cxref_style = TRUE;                             break;
1140       case 'w': no_warnings = TRUE;                             break;
1141       default:
1142         suggest_asking_for_help ();
1143         /* NOTREACHED */
1144       }
1145
1146   /* No more options.  Store the rest of arguments. */
1147   for (; optind < argc; optind++)
1148     {
1149       argbuffer[current_arg].arg_type = at_filename;
1150       argbuffer[current_arg].what = argv[optind];
1151       len = strlen (argv[optind]);
1152       if (whatlen_max < len)
1153         whatlen_max = len;
1154       ++current_arg;
1155       ++file_count;
1156     }
1157
1158   argbuffer[current_arg].arg_type = at_end;
1159
1160   if (help_asked)
1161     print_help (argbuffer);
1162     /* NOTREACHED */
1163
1164   if (nincluded_files == 0 && file_count == 0)
1165     {
1166       error ("no input files specified.");
1167       suggest_asking_for_help ();
1168       /* NOTREACHED */
1169     }
1170
1171   if (tagfile == NULL)
1172     tagfile = savestr (CTAGS ? "tags" : "TAGS");
1173   cwd = etags_getcwd ();        /* the current working directory */
1174   if (cwd[strlen (cwd) - 1] != '/')
1175     {
1176       char *oldcwd = cwd;
1177       cwd = concat (oldcwd, "/", "");
1178       free (oldcwd);
1179     }
1180
1181   /* Compute base directory for relative file names. */
1182   if (streq (tagfile, "-")
1183       || strneq (tagfile, "/dev/", 5))
1184     tagfiledir = cwd;            /* relative file names are relative to cwd */
1185   else
1186     {
1187       canonicalize_filename (tagfile);
1188       tagfiledir = absolute_dirname (tagfile, cwd);
1189     }
1190
1191   init ();                      /* set up boolean "functions" */
1192
1193   linebuffer_init (&lb);
1194   linebuffer_init (&filename_lb);
1195   linebuffer_init (&filebuf);
1196   linebuffer_init (&token_name);
1197
1198   if (!CTAGS)
1199     {
1200       if (streq (tagfile, "-"))
1201         {
1202           tagf = stdout;
1203 #ifdef DOS_NT
1204           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1205              doesn't take effect until after `stdout' is already open). */
1206           if (!isatty (fileno (stdout)))
1207             setmode (fileno (stdout), O_BINARY);
1208 #endif /* DOS_NT */
1209         }
1210       else
1211         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1212       if (tagf == NULL)
1213         pfatal (tagfile);
1214     }
1215
1216   /*
1217    * Loop through files finding functions.
1218    */
1219   for (i = 0; i < current_arg; i++)
1220     {
1221       static language *lang;    /* non-NULL if language is forced */
1222       char *this_file;
1223
1224       switch (argbuffer[i].arg_type)
1225         {
1226         case at_language:
1227           lang = argbuffer[i].lang;
1228           break;
1229         case at_regexp:
1230           analyse_regex (argbuffer[i].what);
1231           break;
1232         case at_filename:
1233               this_file = argbuffer[i].what;
1234               /* Input file named "-" means read file names from stdin
1235                  (one per line) and use them. */
1236               if (streq (this_file, "-"))
1237                 {
1238                   if (parsing_stdin)
1239                     fatal ("cannot parse standard input AND read file names from it",
1240                            (char *)NULL);
1241                   while (readline_internal (&filename_lb, stdin) > 0)
1242                     process_file_name (filename_lb.buffer, lang);
1243                 }
1244               else
1245                 process_file_name (this_file, lang);
1246           break;
1247         case at_stdin:
1248           this_file = argbuffer[i].what;
1249           process_file (stdin, this_file, lang);
1250           break;
1251         }
1252     }
1253
1254   free_regexps ();
1255   free (lb.buffer);
1256   free (filebuf.buffer);
1257   free (token_name.buffer);
1258
1259   if (!CTAGS || cxref_style)
1260     {
1261       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1262       put_entries (nodehead);
1263       free_tree (nodehead);
1264       nodehead = NULL;
1265       if (!CTAGS)
1266         {
1267           fdesc *fdp;
1268
1269           /* Output file entries that have no tags. */
1270           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1271             if (!fdp->written)
1272               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1273
1274           while (nincluded_files-- > 0)
1275             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1276
1277           if (fclose (tagf) == EOF)
1278             pfatal (tagfile);
1279         }
1280
1281       exit (EXIT_SUCCESS);
1282     }
1283
1284   /* From here on, we are in (CTAGS && !cxref_style) */
1285   if (update)
1286     {
1287       char *cmd =
1288         xmalloc (strlen (tagfile) + whatlen_max +
1289                  sizeof "mv..OTAGS;fgrep -v '\t\t' OTAGS >;rm OTAGS");
1290       for (i = 0; i < current_arg; ++i)
1291         {
1292           switch (argbuffer[i].arg_type)
1293             {
1294             case at_filename:
1295             case at_stdin:
1296               break;
1297             default:
1298               continue;         /* the for loop */
1299             }
1300           strcpy (cmd, "mv ");
1301           strcat (cmd, tagfile);
1302           strcat (cmd, " OTAGS;fgrep -v '\t");
1303           strcat (cmd, argbuffer[i].what);
1304           strcat (cmd, "\t' OTAGS >");
1305           strcat (cmd, tagfile);
1306           strcat (cmd, ";rm OTAGS");
1307           if (system (cmd) != EXIT_SUCCESS)
1308             fatal ("failed to execute shell command", (char *)NULL);
1309         }
1310       free (cmd);
1311       append_to_tagfile = TRUE;
1312     }
1313
1314   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1315   if (tagf == NULL)
1316     pfatal (tagfile);
1317   put_entries (nodehead);       /* write all the tags (CTAGS) */
1318   free_tree (nodehead);
1319   nodehead = NULL;
1320   if (fclose (tagf) == EOF)
1321     pfatal (tagfile);
1322
1323   if (CTAGS)
1324     if (append_to_tagfile || update)
1325       {
1326         char *cmd = xmalloc (2 * strlen (tagfile) + sizeof "sort -u -o..");
1327         /* Maybe these should be used:
1328            setenv ("LC_COLLATE", "C", 1);
1329            setenv ("LC_ALL", "C", 1); */
1330         strcpy (cmd, "sort -u -o ");
1331         strcat (cmd, tagfile);
1332         strcat (cmd, " ");
1333         strcat (cmd, tagfile);
1334         exit (system (cmd));
1335       }
1336   return EXIT_SUCCESS;
1337 }
1338
1339
1340 /*
1341  * Return a compressor given the file name.  If EXTPTR is non-zero,
1342  * return a pointer into FILE where the compressor-specific
1343  * extension begins.  If no compressor is found, NULL is returned
1344  * and EXTPTR is not significant.
1345  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1346  */
1347 static compressor *
1348 get_compressor_from_suffix (char *file, char **extptr)
1349 {
1350   compressor *compr;
1351   char *slash, *suffix;
1352
1353   /* File has been processed by canonicalize_filename,
1354      so we don't need to consider backslashes on DOS_NT.  */
1355   slash = etags_strrchr (file, '/');
1356   suffix = etags_strrchr (file, '.');
1357   if (suffix == NULL || suffix < slash)
1358     return NULL;
1359   if (extptr != NULL)
1360     *extptr = suffix;
1361   suffix += 1;
1362   /* Let those poor souls who live with DOS 8+3 file name limits get
1363      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1364      Only the first do loop is run if not MSDOS */
1365   do
1366     {
1367       for (compr = compressors; compr->suffix != NULL; compr++)
1368         if (streq (compr->suffix, suffix))
1369           return compr;
1370       if (!MSDOS)
1371         break;                  /* do it only once: not really a loop */
1372       if (extptr != NULL)
1373         *extptr = ++suffix;
1374     } while (*suffix != '\0');
1375   return NULL;
1376 }
1377
1378
1379
1380 /*
1381  * Return a language given the name.
1382  */
1383 static language *
1384 get_language_from_langname (const char *name)
1385 {
1386   language *lang;
1387
1388   if (name == NULL)
1389     error ("empty language name");
1390   else
1391     {
1392       for (lang = lang_names; lang->name != NULL; lang++)
1393         if (streq (name, lang->name))
1394           return lang;
1395       error ("unknown language \"%s\"", name);
1396     }
1397
1398   return NULL;
1399 }
1400
1401
1402 /*
1403  * Return a language given the interpreter name.
1404  */
1405 static language *
1406 get_language_from_interpreter (char *interpreter)
1407 {
1408   language *lang;
1409   const char **iname;
1410
1411   if (interpreter == NULL)
1412     return NULL;
1413   for (lang = lang_names; lang->name != NULL; lang++)
1414     if (lang->interpreters != NULL)
1415       for (iname = lang->interpreters; *iname != NULL; iname++)
1416         if (streq (*iname, interpreter))
1417             return lang;
1418
1419   return NULL;
1420 }
1421
1422
1423
1424 /*
1425  * Return a language given the file name.
1426  */
1427 static language *
1428 get_language_from_filename (char *file, int case_sensitive)
1429 {
1430   language *lang;
1431   const char **name, **ext, *suffix;
1432
1433   /* Try whole file name first. */
1434   for (lang = lang_names; lang->name != NULL; lang++)
1435     if (lang->filenames != NULL)
1436       for (name = lang->filenames; *name != NULL; name++)
1437         if ((case_sensitive)
1438             ? streq (*name, file)
1439             : strcaseeq (*name, file))
1440           return lang;
1441
1442   /* If not found, try suffix after last dot. */
1443   suffix = etags_strrchr (file, '.');
1444   if (suffix == NULL)
1445     return NULL;
1446   suffix += 1;
1447   for (lang = lang_names; lang->name != NULL; lang++)
1448     if (lang->suffixes != NULL)
1449       for (ext = lang->suffixes; *ext != NULL; ext++)
1450         if ((case_sensitive)
1451             ? streq (*ext, suffix)
1452             : strcaseeq (*ext, suffix))
1453           return lang;
1454   return NULL;
1455 }
1456
1457 \f
1458 /*
1459  * This routine is called on each file argument.
1460  */
1461 static void
1462 process_file_name (char *file, language *lang)
1463 {
1464   struct stat stat_buf;
1465   FILE *inf;
1466   fdesc *fdp;
1467   compressor *compr;
1468   char *compressed_name, *uncompressed_name;
1469   char *ext, *real_name;
1470   int retval;
1471
1472   canonicalize_filename (file);
1473   if (streq (file, tagfile) && !streq (tagfile, "-"))
1474     {
1475       error ("skipping inclusion of %s in self.", file);
1476       return;
1477     }
1478   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1479     {
1480       compressed_name = NULL;
1481       real_name = uncompressed_name = savestr (file);
1482     }
1483   else
1484     {
1485       real_name = compressed_name = savestr (file);
1486       uncompressed_name = savenstr (file, ext - file);
1487     }
1488
1489   /* If the canonicalized uncompressed name
1490      has already been dealt with, skip it silently. */
1491   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1492     {
1493       assert (fdp->infname != NULL);
1494       if (streq (uncompressed_name, fdp->infname))
1495         goto cleanup;
1496     }
1497
1498   if (stat (real_name, &stat_buf) != 0)
1499     {
1500       /* Reset real_name and try with a different name. */
1501       real_name = NULL;
1502       if (compressed_name != NULL) /* try with the given suffix */
1503         {
1504           if (stat (uncompressed_name, &stat_buf) == 0)
1505             real_name = uncompressed_name;
1506         }
1507       else                      /* try all possible suffixes */
1508         {
1509           for (compr = compressors; compr->suffix != NULL; compr++)
1510             {
1511               compressed_name = concat (file, ".", compr->suffix);
1512               if (stat (compressed_name, &stat_buf) != 0)
1513                 {
1514                   if (MSDOS)
1515                     {
1516                       char *suf = compressed_name + strlen (file);
1517                       size_t suflen = strlen (compr->suffix) + 1;
1518                       for ( ; suf[1]; suf++, suflen--)
1519                         {
1520                           memmove (suf, suf + 1, suflen);
1521                           if (stat (compressed_name, &stat_buf) == 0)
1522                             {
1523                               real_name = compressed_name;
1524                               break;
1525                             }
1526                         }
1527                       if (real_name != NULL)
1528                         break;
1529                     } /* MSDOS */
1530                   free (compressed_name);
1531                   compressed_name = NULL;
1532                 }
1533               else
1534                 {
1535                   real_name = compressed_name;
1536                   break;
1537                 }
1538             }
1539         }
1540       if (real_name == NULL)
1541         {
1542           perror (file);
1543           goto cleanup;
1544         }
1545     } /* try with a different name */
1546
1547   if (!S_ISREG (stat_buf.st_mode))
1548     {
1549       error ("skipping %s: it is not a regular file.", real_name);
1550       goto cleanup;
1551     }
1552   if (real_name == compressed_name)
1553     {
1554       char *cmd = concat (compr->command, " ", real_name);
1555       inf = (FILE *) popen (cmd, "r");
1556       free (cmd);
1557     }
1558   else
1559     inf = fopen (real_name, "r");
1560   if (inf == NULL)
1561     {
1562       perror (real_name);
1563       goto cleanup;
1564     }
1565
1566   process_file (inf, uncompressed_name, lang);
1567
1568   if (real_name == compressed_name)
1569     retval = pclose (inf);
1570   else
1571     retval = fclose (inf);
1572   if (retval < 0)
1573     pfatal (file);
1574
1575  cleanup:
1576   free (compressed_name);
1577   free (uncompressed_name);
1578   last_node = NULL;
1579   curfdp = NULL;
1580   return;
1581 }
1582
1583 static void
1584 process_file (FILE *fh, char *fn, language *lang)
1585 {
1586   static const fdesc emptyfdesc;
1587   fdesc *fdp;
1588
1589   /* Create a new input file description entry. */
1590   fdp = xnew (1, fdesc);
1591   *fdp = emptyfdesc;
1592   fdp->next = fdhead;
1593   fdp->infname = savestr (fn);
1594   fdp->lang = lang;
1595   fdp->infabsname = absolute_filename (fn, cwd);
1596   fdp->infabsdir = absolute_dirname (fn, cwd);
1597   if (filename_is_absolute (fn))
1598     {
1599       /* An absolute file name.  Canonicalize it. */
1600       fdp->taggedfname = absolute_filename (fn, NULL);
1601     }
1602   else
1603     {
1604       /* A file name relative to cwd.  Make it relative
1605          to the directory of the tags file. */
1606       fdp->taggedfname = relative_filename (fn, tagfiledir);
1607     }
1608   fdp->usecharno = TRUE;        /* use char position when making tags */
1609   fdp->prop = NULL;
1610   fdp->written = FALSE;         /* not written on tags file yet */
1611
1612   fdhead = fdp;
1613   curfdp = fdhead;              /* the current file description */
1614
1615   find_entries (fh);
1616
1617   /* If not Ctags, and if this is not metasource and if it contained no #line
1618      directives, we can write the tags and free all nodes pointing to
1619      curfdp. */
1620   if (!CTAGS
1621       && curfdp->usecharno      /* no #line directives in this file */
1622       && !curfdp->lang->metasource)
1623     {
1624       node *np, *prev;
1625
1626       /* Look for the head of the sublist relative to this file.  See add_node
1627          for the structure of the node tree. */
1628       prev = NULL;
1629       for (np = nodehead; np != NULL; prev = np, np = np->left)
1630         if (np->fdp == curfdp)
1631           break;
1632
1633       /* If we generated tags for this file, write and delete them. */
1634       if (np != NULL)
1635         {
1636           /* This is the head of the last sublist, if any.  The following
1637              instructions depend on this being true. */
1638           assert (np->left == NULL);
1639
1640           assert (fdhead == curfdp);
1641           assert (last_node->fdp == curfdp);
1642           put_entries (np);     /* write tags for file curfdp->taggedfname */
1643           free_tree (np);       /* remove the written nodes */
1644           if (prev == NULL)
1645             nodehead = NULL;    /* no nodes left */
1646           else
1647             prev->left = NULL;  /* delete the pointer to the sublist */
1648         }
1649     }
1650 }
1651
1652 /*
1653  * This routine sets up the boolean pseudo-functions which work
1654  * by setting boolean flags dependent upon the corresponding character.
1655  * Every char which is NOT in that string is not a white char.  Therefore,
1656  * all of the array "_wht" is set to FALSE, and then the elements
1657  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1658  * of a char is TRUE if it is the string "white", else FALSE.
1659  */
1660 static void
1661 init (void)
1662 {
1663   register const char *sp;
1664   register int i;
1665
1666   for (i = 0; i < CHARS; i++)
1667     iswhite (i) = notinname (i) = begtoken (i) = intoken (i) = endtoken (i) = FALSE;
1668   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1669   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1670   notinname ('\0') = notinname ('\n');
1671   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1672   begtoken ('\0') = begtoken ('\n');
1673   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1674   intoken ('\0') = intoken ('\n');
1675   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1676   endtoken ('\0') = endtoken ('\n');
1677 }
1678
1679 /*
1680  * This routine opens the specified file and calls the function
1681  * which finds the function and type definitions.
1682  */
1683 static void
1684 find_entries (FILE *inf)
1685 {
1686   char *cp;
1687   language *lang = curfdp->lang;
1688   Lang_function *parser = NULL;
1689
1690   /* If user specified a language, use it. */
1691   if (lang != NULL && lang->function != NULL)
1692     {
1693       parser = lang->function;
1694     }
1695
1696   /* Else try to guess the language given the file name. */
1697   if (parser == NULL)
1698     {
1699       lang = get_language_from_filename (curfdp->infname, TRUE);
1700       if (lang != NULL && lang->function != NULL)
1701         {
1702           curfdp->lang = lang;
1703           parser = lang->function;
1704         }
1705     }
1706
1707   /* Else look for sharp-bang as the first two characters. */
1708   if (parser == NULL
1709       && readline_internal (&lb, inf) > 0
1710       && lb.len >= 2
1711       && lb.buffer[0] == '#'
1712       && lb.buffer[1] == '!')
1713     {
1714       char *lp;
1715
1716       /* Set lp to point at the first char after the last slash in the
1717          line or, if no slashes, at the first nonblank.  Then set cp to
1718          the first successive blank and terminate the string. */
1719       lp = etags_strrchr (lb.buffer+2, '/');
1720       if (lp != NULL)
1721         lp += 1;
1722       else
1723         lp = skip_spaces (lb.buffer + 2);
1724       cp = skip_non_spaces (lp);
1725       *cp = '\0';
1726
1727       if (strlen (lp) > 0)
1728         {
1729           lang = get_language_from_interpreter (lp);
1730           if (lang != NULL && lang->function != NULL)
1731             {
1732               curfdp->lang = lang;
1733               parser = lang->function;
1734             }
1735         }
1736     }
1737
1738   /* We rewind here, even if inf may be a pipe.  We fail if the
1739      length of the first line is longer than the pipe block size,
1740      which is unlikely. */
1741   rewind (inf);
1742
1743   /* Else try to guess the language given the case insensitive file name. */
1744   if (parser == NULL)
1745     {
1746       lang = get_language_from_filename (curfdp->infname, FALSE);
1747       if (lang != NULL && lang->function != NULL)
1748         {
1749           curfdp->lang = lang;
1750           parser = lang->function;
1751         }
1752     }
1753
1754   /* Else try Fortran or C. */
1755   if (parser == NULL)
1756     {
1757       node *old_last_node = last_node;
1758
1759       curfdp->lang = get_language_from_langname ("fortran");
1760       find_entries (inf);
1761
1762       if (old_last_node == last_node)
1763         /* No Fortran entries found.  Try C. */
1764         {
1765           /* We do not tag if rewind fails.
1766              Only the file name will be recorded in the tags file. */
1767           rewind (inf);
1768           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1769           find_entries (inf);
1770         }
1771       return;
1772     }
1773
1774   if (!no_line_directive
1775       && curfdp->lang != NULL && curfdp->lang->metasource)
1776     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1777        file, or anyway we parsed a file that is automatically generated from
1778        this one.  If this is the case, the bingo.c file contained #line
1779        directives that generated tags pointing to this file.  Let's delete
1780        them all before parsing this file, which is the real source. */
1781     {
1782       fdesc **fdpp = &fdhead;
1783       while (*fdpp != NULL)
1784         if (*fdpp != curfdp
1785             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1786           /* We found one of those!  We must delete both the file description
1787              and all tags referring to it. */
1788           {
1789             fdesc *badfdp = *fdpp;
1790
1791             /* Delete the tags referring to badfdp->taggedfname
1792                that were obtained from badfdp->infname. */
1793             invalidate_nodes (badfdp, &nodehead);
1794
1795             *fdpp = badfdp->next; /* remove the bad description from the list */
1796             free_fdesc (badfdp);
1797           }
1798         else
1799           fdpp = &(*fdpp)->next; /* advance the list pointer */
1800     }
1801
1802   assert (parser != NULL);
1803
1804   /* Generic initializations before reading from file. */
1805   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1806
1807   /* Generic initializations before parsing file with readline. */
1808   lineno = 0;                  /* reset global line number */
1809   charno = 0;                  /* reset global char number */
1810   linecharno = 0;              /* reset global char number of line start */
1811
1812   parser (inf);
1813
1814   regex_tag_multiline ();
1815 }
1816
1817 \f
1818 /*
1819  * Check whether an implicitly named tag should be created,
1820  * then call `pfnote'.
1821  * NAME is a string that is internally copied by this function.
1822  *
1823  * TAGS format specification
1824  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1825  * The following is explained in some more detail in etc/ETAGS.EBNF.
1826  *
1827  * make_tag creates tags with "implicit tag names" (unnamed tags)
1828  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1829  *  1. NAME does not contain any of the characters in NONAM;
1830  *  2. LINESTART contains name as either a rightmost, or rightmost but
1831  *     one character, substring;
1832  *  3. the character, if any, immediately before NAME in LINESTART must
1833  *     be a character in NONAM;
1834  *  4. the character, if any, immediately after NAME in LINESTART must
1835  *     also be a character in NONAM.
1836  *
1837  * The implementation uses the notinname() macro, which recognizes the
1838  * characters stored in the string `nonam'.
1839  * etags.el needs to use the same characters that are in NONAM.
1840  */
1841 static void
1842 make_tag (const char *name,     /* tag name, or NULL if unnamed */
1843           int namelen,          /* tag length */
1844           int is_func,          /* tag is a function */
1845           char *linestart,      /* start of the line where tag is */
1846           int linelen,          /* length of the line where tag is */
1847           int lno,              /* line number */
1848           long int cno)         /* character number */
1849 {
1850   bool named = (name != NULL && namelen > 0);
1851   char *nname = NULL;
1852
1853   if (!CTAGS && named)          /* maybe set named to false */
1854     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1855        such that etags.el can guess a name from it. */
1856     {
1857       int i;
1858       register const char *cp = name;
1859
1860       for (i = 0; i < namelen; i++)
1861         if (notinname (*cp++))
1862           break;
1863       if (i == namelen)                         /* rule #1 */
1864         {
1865           cp = linestart + linelen - namelen;
1866           if (notinname (linestart[linelen-1]))
1867             cp -= 1;                            /* rule #4 */
1868           if (cp >= linestart                   /* rule #2 */
1869               && (cp == linestart
1870                   || notinname (cp[-1]))        /* rule #3 */
1871               && strneq (name, cp, namelen))    /* rule #2 */
1872             named = FALSE;      /* use implicit tag name */
1873         }
1874     }
1875
1876   if (named)
1877     nname = savenstr (name, namelen);
1878
1879   pfnote (nname, is_func, linestart, linelen, lno, cno);
1880 }
1881
1882 /* Record a tag. */
1883 static void
1884 pfnote (char *name, int is_func, char *linestart, int linelen, int lno, long int cno)
1885                                 /* tag name, or NULL if unnamed */
1886                                 /* tag is a function */
1887                                 /* start of the line where tag is */
1888                                 /* length of the line where tag is */
1889                                 /* line number */
1890                                 /* character number */
1891 {
1892   register node *np;
1893
1894   assert (name == NULL || name[0] != '\0');
1895   if (CTAGS && name == NULL)
1896     return;
1897
1898   np = xnew (1, node);
1899
1900   /* If ctags mode, change name "main" to M<thisfilename>. */
1901   if (CTAGS && !cxref_style && streq (name, "main"))
1902     {
1903       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1904       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1905       fp = etags_strrchr (np->name, '.');
1906       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1907         fp[0] = '\0';
1908     }
1909   else
1910     np->name = name;
1911   np->valid = TRUE;
1912   np->been_warned = FALSE;
1913   np->fdp = curfdp;
1914   np->is_func = is_func;
1915   np->lno = lno;
1916   if (np->fdp->usecharno)
1917     /* Our char numbers are 0-base, because of C language tradition?
1918        ctags compatibility?  old versions compatibility?   I don't know.
1919        Anyway, since emacs's are 1-base we expect etags.el to take care
1920        of the difference.  If we wanted to have 1-based numbers, we would
1921        uncomment the +1 below. */
1922     np->cno = cno /* + 1 */ ;
1923   else
1924     np->cno = invalidcharno;
1925   np->left = np->right = NULL;
1926   if (CTAGS && !cxref_style)
1927     {
1928       if (strlen (linestart) < 50)
1929         np->regex = concat (linestart, "$", "");
1930       else
1931         np->regex = savenstr (linestart, 50);
1932     }
1933   else
1934     np->regex = savenstr (linestart, linelen);
1935
1936   add_node (np, &nodehead);
1937 }
1938
1939 /*
1940  * free_tree ()
1941  *      recurse on left children, iterate on right children.
1942  */
1943 static void
1944 free_tree (register node *np)
1945 {
1946   while (np)
1947     {
1948       register node *node_right = np->right;
1949       free_tree (np->left);
1950       free (np->name);
1951       free (np->regex);
1952       free (np);
1953       np = node_right;
1954     }
1955 }
1956
1957 /*
1958  * free_fdesc ()
1959  *      delete a file description
1960  */
1961 static void
1962 free_fdesc (register fdesc *fdp)
1963 {
1964   free (fdp->infname);
1965   free (fdp->infabsname);
1966   free (fdp->infabsdir);
1967   free (fdp->taggedfname);
1968   free (fdp->prop);
1969   free (fdp);
1970 }
1971
1972 /*
1973  * add_node ()
1974  *      Adds a node to the tree of nodes.  In etags mode, sort by file
1975  *      name.  In ctags mode, sort by tag name.  Make no attempt at
1976  *      balancing.
1977  *
1978  *      add_node is the only function allowed to add nodes, so it can
1979  *      maintain state.
1980  */
1981 static void
1982 add_node (node *np, node **cur_node_p)
1983 {
1984   register int dif;
1985   register node *cur_node = *cur_node_p;
1986
1987   if (cur_node == NULL)
1988     {
1989       *cur_node_p = np;
1990       last_node = np;
1991       return;
1992     }
1993
1994   if (!CTAGS)
1995     /* Etags Mode */
1996     {
1997       /* For each file name, tags are in a linked sublist on the right
1998          pointer.  The first tags of different files are a linked list
1999          on the left pointer.  last_node points to the end of the last
2000          used sublist. */
2001       if (last_node != NULL && last_node->fdp == np->fdp)
2002         {
2003           /* Let's use the same sublist as the last added node. */
2004           assert (last_node->right == NULL);
2005           last_node->right = np;
2006           last_node = np;
2007         }
2008       else if (cur_node->fdp == np->fdp)
2009         {
2010           /* Scanning the list we found the head of a sublist which is
2011              good for us.  Let's scan this sublist. */
2012           add_node (np, &cur_node->right);
2013         }
2014       else
2015         /* The head of this sublist is not good for us.  Let's try the
2016            next one. */
2017         add_node (np, &cur_node->left);
2018     } /* if ETAGS mode */
2019
2020   else
2021     {
2022       /* Ctags Mode */
2023       dif = strcmp (np->name, cur_node->name);
2024
2025       /*
2026        * If this tag name matches an existing one, then
2027        * do not add the node, but maybe print a warning.
2028        */
2029       if (no_duplicates && !dif)
2030         {
2031           if (np->fdp == cur_node->fdp)
2032             {
2033               if (!no_warnings)
2034                 {
2035                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2036                            np->fdp->infname, lineno, np->name);
2037                   fprintf (stderr, "Second entry ignored\n");
2038                 }
2039             }
2040           else if (!cur_node->been_warned && !no_warnings)
2041             {
2042               fprintf
2043                 (stderr,
2044                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2045                  np->fdp->infname, cur_node->fdp->infname, np->name);
2046               cur_node->been_warned = TRUE;
2047             }
2048           return;
2049         }
2050
2051       /* Actually add the node */
2052       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2053     } /* if CTAGS mode */
2054 }
2055
2056 /*
2057  * invalidate_nodes ()
2058  *      Scan the node tree and invalidate all nodes pointing to the
2059  *      given file description (CTAGS case) or free them (ETAGS case).
2060  */
2061 static void
2062 invalidate_nodes (fdesc *badfdp, node **npp)
2063 {
2064   node *np = *npp;
2065
2066   if (np == NULL)
2067     return;
2068
2069   if (CTAGS)
2070     {
2071       if (np->left != NULL)
2072         invalidate_nodes (badfdp, &np->left);
2073       if (np->fdp == badfdp)
2074         np->valid = FALSE;
2075       if (np->right != NULL)
2076         invalidate_nodes (badfdp, &np->right);
2077     }
2078   else
2079     {
2080       assert (np->fdp != NULL);
2081       if (np->fdp == badfdp)
2082         {
2083           *npp = np->left;      /* detach the sublist from the list */
2084           np->left = NULL;      /* isolate it */
2085           free_tree (np);       /* free it */
2086           invalidate_nodes (badfdp, npp);
2087         }
2088       else
2089         invalidate_nodes (badfdp, &np->left);
2090     }
2091 }
2092
2093 \f
2094 static int total_size_of_entries (node *);
2095 static int number_len (long) ATTRIBUTE_CONST;
2096
2097 /* Length of a non-negative number's decimal representation. */
2098 static int
2099 number_len (long int num)
2100 {
2101   int len = 1;
2102   while ((num /= 10) > 0)
2103     len += 1;
2104   return len;
2105 }
2106
2107 /*
2108  * Return total number of characters that put_entries will output for
2109  * the nodes in the linked list at the right of the specified node.
2110  * This count is irrelevant with etags.el since emacs 19.34 at least,
2111  * but is still supplied for backward compatibility.
2112  */
2113 static int
2114 total_size_of_entries (register node *np)
2115 {
2116   register int total = 0;
2117
2118   for (; np != NULL; np = np->right)
2119     if (np->valid)
2120       {
2121         total += strlen (np->regex) + 1;                /* pat\177 */
2122         if (np->name != NULL)
2123           total += strlen (np->name) + 1;               /* name\001 */
2124         total += number_len ((long) np->lno) + 1;       /* lno, */
2125         if (np->cno != invalidcharno)                   /* cno */
2126           total += number_len (np->cno);
2127         total += 1;                                     /* newline */
2128       }
2129
2130   return total;
2131 }
2132
2133 static void
2134 put_entries (register node *np)
2135 {
2136   register char *sp;
2137   static fdesc *fdp = NULL;
2138
2139   if (np == NULL)
2140     return;
2141
2142   /* Output subentries that precede this one */
2143   if (CTAGS)
2144     put_entries (np->left);
2145
2146   /* Output this entry */
2147   if (np->valid)
2148     {
2149       if (!CTAGS)
2150         {
2151           /* Etags mode */
2152           if (fdp != np->fdp)
2153             {
2154               fdp = np->fdp;
2155               fprintf (tagf, "\f\n%s,%d\n",
2156                        fdp->taggedfname, total_size_of_entries (np));
2157               fdp->written = TRUE;
2158             }
2159           fputs (np->regex, tagf);
2160           fputc ('\177', tagf);
2161           if (np->name != NULL)
2162             {
2163               fputs (np->name, tagf);
2164               fputc ('\001', tagf);
2165             }
2166           fprintf (tagf, "%d,", np->lno);
2167           if (np->cno != invalidcharno)
2168             fprintf (tagf, "%ld", np->cno);
2169           fputs ("\n", tagf);
2170         }
2171       else
2172         {
2173           /* Ctags mode */
2174           if (np->name == NULL)
2175             error ("internal error: NULL name in ctags mode.");
2176
2177           if (cxref_style)
2178             {
2179               if (vgrind_style)
2180                 fprintf (stdout, "%s %s %d\n",
2181                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2182               else
2183                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2184                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2185             }
2186           else
2187             {
2188               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2189
2190               if (np->is_func)
2191                 {               /* function or #define macro with args */
2192                   putc (searchar, tagf);
2193                   putc ('^', tagf);
2194
2195                   for (sp = np->regex; *sp; sp++)
2196                     {
2197                       if (*sp == '\\' || *sp == searchar)
2198                         putc ('\\', tagf);
2199                       putc (*sp, tagf);
2200                     }
2201                   putc (searchar, tagf);
2202                 }
2203               else
2204                 {               /* anything else; text pattern inadequate */
2205                   fprintf (tagf, "%d", np->lno);
2206                 }
2207               putc ('\n', tagf);
2208             }
2209         }
2210     } /* if this node contains a valid tag */
2211
2212   /* Output subentries that follow this one */
2213   put_entries (np->right);
2214   if (!CTAGS)
2215     put_entries (np->left);
2216 }
2217
2218 \f
2219 /* C extensions. */
2220 #define C_EXT   0x00fff         /* C extensions */
2221 #define C_PLAIN 0x00000         /* C */
2222 #define C_PLPL  0x00001         /* C++ */
2223 #define C_STAR  0x00003         /* C* */
2224 #define C_JAVA  0x00005         /* JAVA */
2225 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2226 #define YACC    0x10000         /* yacc file */
2227
2228 /*
2229  * The C symbol tables.
2230  */
2231 enum sym_type
2232 {
2233   st_none,
2234   st_C_objprot, st_C_objimpl, st_C_objend,
2235   st_C_gnumacro,
2236   st_C_ignore, st_C_attribute,
2237   st_C_javastruct,
2238   st_C_operator,
2239   st_C_class, st_C_template,
2240   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2241 };
2242
2243 static unsigned int hash (const char *, unsigned int);
2244 static struct C_stab_entry * in_word_set (const char *, unsigned int);
2245 static enum sym_type C_symtype (char *, int, int);
2246
2247 /* Feed stuff between (but not including) %[ and %] lines to:
2248      gperf -m 5
2249 %[
2250 %compare-strncmp
2251 %enum
2252 %struct-type
2253 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2254 %%
2255 if,             0,                      st_C_ignore
2256 for,            0,                      st_C_ignore
2257 while,          0,                      st_C_ignore
2258 switch,         0,                      st_C_ignore
2259 return,         0,                      st_C_ignore
2260 __attribute__,  0,                      st_C_attribute
2261 GTY,            0,                      st_C_attribute
2262 @interface,     0,                      st_C_objprot
2263 @protocol,      0,                      st_C_objprot
2264 @implementation,0,                      st_C_objimpl
2265 @end,           0,                      st_C_objend
2266 import,         (C_JAVA & ~C_PLPL),     st_C_ignore
2267 package,        (C_JAVA & ~C_PLPL),     st_C_ignore
2268 friend,         C_PLPL,                 st_C_ignore
2269 extends,        (C_JAVA & ~C_PLPL),     st_C_javastruct
2270 implements,     (C_JAVA & ~C_PLPL),     st_C_javastruct
2271 interface,      (C_JAVA & ~C_PLPL),     st_C_struct
2272 class,          0,                      st_C_class
2273 namespace,      C_PLPL,                 st_C_struct
2274 domain,         C_STAR,                 st_C_struct
2275 union,          0,                      st_C_struct
2276 struct,         0,                      st_C_struct
2277 extern,         0,                      st_C_extern
2278 enum,           0,                      st_C_enum
2279 typedef,        0,                      st_C_typedef
2280 define,         0,                      st_C_define
2281 undef,          0,                      st_C_define
2282 operator,       C_PLPL,                 st_C_operator
2283 template,       0,                      st_C_template
2284 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2285 DEFUN,          0,                      st_C_gnumacro
2286 SYSCALL,        0,                      st_C_gnumacro
2287 ENTRY,          0,                      st_C_gnumacro
2288 PSEUDO,         0,                      st_C_gnumacro
2289 # These are defined inside C functions, so currently they are not met.
2290 # EXFUN used in glibc, DEFVAR_* in emacs.
2291 #EXFUN,         0,                      st_C_gnumacro
2292 #DEFVAR_,       0,                      st_C_gnumacro
2293 %]
2294 and replace lines between %< and %> with its output, then:
2295  - remove the #if characterset check
2296  - make in_word_set static and not inline. */
2297 /*%<*/
2298 /* C code produced by gperf version 3.0.1 */
2299 /* Command-line: gperf -m 5  */
2300 /* Computed positions: -k'2-3' */
2301
2302 struct C_stab_entry { const char *name; int c_ext; enum sym_type type; };
2303 /* maximum key range = 33, duplicates = 0 */
2304
2305 static inline unsigned int
2306 hash (register const char *str, register unsigned int len)
2307 {
2308   static unsigned char asso_values[] =
2309     {
2310       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2311       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2312       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2313       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2314       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2315       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2316       35, 35, 35, 35, 35, 35, 35, 35, 35,  3,
2317       26, 35, 35, 35, 35, 35, 35, 35, 27, 35,
2318       35, 35, 35, 24,  0, 35, 35, 35, 35,  0,
2319       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2320       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2321        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2322        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2323       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2324       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2325       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2326       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2327       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2328       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2329       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2330       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2331       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2332       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2333       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2334       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2335       35, 35, 35, 35, 35, 35
2336     };
2337   register int hval = len;
2338
2339   switch (hval)
2340     {
2341       default:
2342         hval += asso_values[(unsigned char)str[2]];
2343       /*FALLTHROUGH*/
2344       case 2:
2345         hval += asso_values[(unsigned char)str[1]];
2346         break;
2347     }
2348   return hval;
2349 }
2350
2351 static struct C_stab_entry *
2352 in_word_set (register const char *str, register unsigned int len)
2353 {
2354   enum
2355     {
2356       TOTAL_KEYWORDS = 33,
2357       MIN_WORD_LENGTH = 2,
2358       MAX_WORD_LENGTH = 15,
2359       MIN_HASH_VALUE = 2,
2360       MAX_HASH_VALUE = 34
2361     };
2362
2363   static struct C_stab_entry wordlist[] =
2364     {
2365       {""}, {""},
2366       {"if",            0,                      st_C_ignore},
2367       {"GTY",           0,                      st_C_attribute},
2368       {"@end",          0,                      st_C_objend},
2369       {"union",         0,                      st_C_struct},
2370       {"define",                0,                      st_C_define},
2371       {"import",                (C_JAVA & ~C_PLPL),     st_C_ignore},
2372       {"template",      0,                      st_C_template},
2373       {"operator",      C_PLPL,                 st_C_operator},
2374       {"@interface",    0,                      st_C_objprot},
2375       {"implements",    (C_JAVA & ~C_PLPL),     st_C_javastruct},
2376       {"friend",                C_PLPL,                 st_C_ignore},
2377       {"typedef",       0,                      st_C_typedef},
2378       {"return",                0,                      st_C_ignore},
2379       {"@implementation",0,                     st_C_objimpl},
2380       {"@protocol",     0,                      st_C_objprot},
2381       {"interface",     (C_JAVA & ~C_PLPL),     st_C_struct},
2382       {"extern",                0,                      st_C_extern},
2383       {"extends",       (C_JAVA & ~C_PLPL),     st_C_javastruct},
2384       {"struct",                0,                      st_C_struct},
2385       {"domain",                C_STAR,                 st_C_struct},
2386       {"switch",                0,                      st_C_ignore},
2387       {"enum",          0,                      st_C_enum},
2388       {"for",           0,                      st_C_ignore},
2389       {"namespace",     C_PLPL,                 st_C_struct},
2390       {"class",         0,                      st_C_class},
2391       {"while",         0,                      st_C_ignore},
2392       {"undef",         0,                      st_C_define},
2393       {"package",       (C_JAVA & ~C_PLPL),     st_C_ignore},
2394       {"__attribute__", 0,                      st_C_attribute},
2395       {"SYSCALL",       0,                      st_C_gnumacro},
2396       {"ENTRY",         0,                      st_C_gnumacro},
2397       {"PSEUDO",                0,                      st_C_gnumacro},
2398       {"DEFUN",         0,                      st_C_gnumacro}
2399     };
2400
2401   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2402     {
2403       register int key = hash (str, len);
2404
2405       if (key <= MAX_HASH_VALUE && key >= 0)
2406         {
2407           register const char *s = wordlist[key].name;
2408
2409           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2410             return &wordlist[key];
2411         }
2412     }
2413   return 0;
2414 }
2415 /*%>*/
2416
2417 static enum sym_type
2418 C_symtype (char *str, int len, int c_ext)
2419 {
2420   register struct C_stab_entry *se = in_word_set (str, len);
2421
2422   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2423     return st_none;
2424   return se->type;
2425 }
2426
2427 \f
2428 /*
2429  * Ignoring __attribute__ ((list))
2430  */
2431 static bool inattribute;        /* looking at an __attribute__ construct */
2432
2433 /*
2434  * C functions and variables are recognized using a simple
2435  * finite automaton.  fvdef is its state variable.
2436  */
2437 static enum
2438 {
2439   fvnone,                       /* nothing seen */
2440   fdefunkey,                    /* Emacs DEFUN keyword seen */
2441   fdefunname,                   /* Emacs DEFUN name seen */
2442   foperator,                    /* func: operator keyword seen (cplpl) */
2443   fvnameseen,                   /* function or variable name seen */
2444   fstartlist,                   /* func: just after open parenthesis */
2445   finlist,                      /* func: in parameter list */
2446   flistseen,                    /* func: after parameter list */
2447   fignore,                      /* func: before open brace */
2448   vignore                       /* var-like: ignore until ';' */
2449 } fvdef;
2450
2451 static bool fvextern;           /* func or var: extern keyword seen; */
2452
2453 /*
2454  * typedefs are recognized using a simple finite automaton.
2455  * typdef is its state variable.
2456  */
2457 static enum
2458 {
2459   tnone,                        /* nothing seen */
2460   tkeyseen,                     /* typedef keyword seen */
2461   ttypeseen,                    /* defined type seen */
2462   tinbody,                      /* inside typedef body */
2463   tend,                         /* just before typedef tag */
2464   tignore                       /* junk after typedef tag */
2465 } typdef;
2466
2467 /*
2468  * struct-like structures (enum, struct and union) are recognized
2469  * using another simple finite automaton.  `structdef' is its state
2470  * variable.
2471  */
2472 static enum
2473 {
2474   snone,                        /* nothing seen yet,
2475                                    or in struct body if bracelev > 0 */
2476   skeyseen,                     /* struct-like keyword seen */
2477   stagseen,                     /* struct-like tag seen */
2478   scolonseen                    /* colon seen after struct-like tag */
2479 } structdef;
2480
2481 /*
2482  * When objdef is different from onone, objtag is the name of the class.
2483  */
2484 static const char *objtag = "<uninited>";
2485
2486 /*
2487  * Yet another little state machine to deal with preprocessor lines.
2488  */
2489 static enum
2490 {
2491   dnone,                        /* nothing seen */
2492   dsharpseen,                   /* '#' seen as first char on line */
2493   ddefineseen,                  /* '#' and 'define' seen */
2494   dignorerest                   /* ignore rest of line */
2495 } definedef;
2496
2497 /*
2498  * State machine for Objective C protocols and implementations.
2499  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2500  */
2501 static enum
2502 {
2503   onone,                        /* nothing seen */
2504   oprotocol,                    /* @interface or @protocol seen */
2505   oimplementation,              /* @implementations seen */
2506   otagseen,                     /* class name seen */
2507   oparenseen,                   /* parenthesis before category seen */
2508   ocatseen,                     /* category name seen */
2509   oinbody,                      /* in @implementation body */
2510   omethodsign,                  /* in @implementation body, after +/- */
2511   omethodtag,                   /* after method name */
2512   omethodcolon,                 /* after method colon */
2513   omethodparm,                  /* after method parameter */
2514   oignore                       /* wait for @end */
2515 } objdef;
2516
2517
2518 /*
2519  * Use this structure to keep info about the token read, and how it
2520  * should be tagged.  Used by the make_C_tag function to build a tag.
2521  */
2522 static struct tok
2523 {
2524   char *line;                   /* string containing the token */
2525   int offset;                   /* where the token starts in LINE */
2526   int length;                   /* token length */
2527   /*
2528     The previous members can be used to pass strings around for generic
2529     purposes.  The following ones specifically refer to creating tags.  In this
2530     case the token contained here is the pattern that will be used to create a
2531     tag.
2532   */
2533   bool valid;                   /* do not create a tag; the token should be
2534                                    invalidated whenever a state machine is
2535                                    reset prematurely */
2536   bool named;                   /* create a named tag */
2537   int lineno;                   /* source line number of tag */
2538   long linepos;                 /* source char number of tag */
2539 } token;                        /* latest token read */
2540
2541 /*
2542  * Variables and functions for dealing with nested structures.
2543  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2544  */
2545 static void pushclass_above (int, char *, int);
2546 static void popclass_above (int);
2547 static void write_classname (linebuffer *, const char *qualifier);
2548
2549 static struct {
2550   char **cname;                 /* nested class names */
2551   int *bracelev;                /* nested class brace level */
2552   int nl;                       /* class nesting level (elements used) */
2553   int size;                     /* length of the array */
2554 } cstack;                       /* stack for nested declaration tags */
2555 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2556 #define nestlev         (cstack.nl)
2557 /* After struct keyword or in struct body, not inside a nested function. */
2558 #define instruct        (structdef == snone && nestlev > 0                      \
2559                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2560
2561 static void
2562 pushclass_above (int bracelev, char *str, int len)
2563 {
2564   int nl;
2565
2566   popclass_above (bracelev);
2567   nl = cstack.nl;
2568   if (nl >= cstack.size)
2569     {
2570       int size = cstack.size *= 2;
2571       xrnew (cstack.cname, size, char *);
2572       xrnew (cstack.bracelev, size, int);
2573     }
2574   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2575   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2576   cstack.bracelev[nl] = bracelev;
2577   cstack.nl = nl + 1;
2578 }
2579
2580 static void
2581 popclass_above (int bracelev)
2582 {
2583   int nl;
2584
2585   for (nl = cstack.nl - 1;
2586        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2587        nl--)
2588     {
2589       free (cstack.cname[nl]);
2590       cstack.nl = nl;
2591     }
2592 }
2593
2594 static void
2595 write_classname (linebuffer *cn, const char *qualifier)
2596 {
2597   int i, len;
2598   int qlen = strlen (qualifier);
2599
2600   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2601     {
2602       len = 0;
2603       cn->len = 0;
2604       cn->buffer[0] = '\0';
2605     }
2606   else
2607     {
2608       len = strlen (cstack.cname[0]);
2609       linebuffer_setlen (cn, len);
2610       strcpy (cn->buffer, cstack.cname[0]);
2611     }
2612   for (i = 1; i < cstack.nl; i++)
2613     {
2614       char *s = cstack.cname[i];
2615       if (s == NULL)
2616         continue;
2617       linebuffer_setlen (cn, len + qlen + strlen (s));
2618       len += sprintf (cn->buffer + len, "%s%s", qualifier, s);
2619     }
2620 }
2621
2622 \f
2623 static bool consider_token (char *, int, int, int *, int, int, bool *);
2624 static void make_C_tag (bool);
2625
2626 /*
2627  * consider_token ()
2628  *      checks to see if the current token is at the start of a
2629  *      function or variable, or corresponds to a typedef, or
2630  *      is a struct/union/enum tag, or #define, or an enum constant.
2631  *
2632  *      *IS_FUNC gets TRUE if the token is a function or #define macro
2633  *      with args.  C_EXTP points to which language we are looking at.
2634  *
2635  * Globals
2636  *      fvdef                   IN OUT
2637  *      structdef               IN OUT
2638  *      definedef               IN OUT
2639  *      typdef                  IN OUT
2640  *      objdef                  IN OUT
2641  */
2642
2643 static bool
2644 consider_token (register char *str, register int len, register int c, int *c_extp, int bracelev, int parlev, int *is_func_or_var)
2645                                 /* IN: token pointer */
2646                                 /* IN: token length */
2647                                 /* IN: first char after the token */
2648                                 /* IN, OUT: C extensions mask */
2649                                 /* IN: brace level */
2650                                 /* IN: parenthesis level */
2651                                 /* OUT: function or variable found */
2652 {
2653   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2654      structtype is the type of the preceding struct-like keyword, and
2655      structbracelev is the brace level where it has been seen. */
2656   static enum sym_type structtype;
2657   static int structbracelev;
2658   static enum sym_type toktype;
2659
2660
2661   toktype = C_symtype (str, len, *c_extp);
2662
2663   /*
2664    * Skip __attribute__
2665    */
2666   if (toktype == st_C_attribute)
2667     {
2668       inattribute = TRUE;
2669       return FALSE;
2670      }
2671
2672    /*
2673     * Advance the definedef state machine.
2674     */
2675    switch (definedef)
2676      {
2677      case dnone:
2678        /* We're not on a preprocessor line. */
2679        if (toktype == st_C_gnumacro)
2680          {
2681            fvdef = fdefunkey;
2682            return FALSE;
2683          }
2684        break;
2685      case dsharpseen:
2686        if (toktype == st_C_define)
2687          {
2688            definedef = ddefineseen;
2689          }
2690        else
2691          {
2692            definedef = dignorerest;
2693          }
2694        return FALSE;
2695      case ddefineseen:
2696        /*
2697         * Make a tag for any macro, unless it is a constant
2698         * and constantypedefs is FALSE.
2699         */
2700        definedef = dignorerest;
2701        *is_func_or_var = (c == '(');
2702        if (!*is_func_or_var && !constantypedefs)
2703          return FALSE;
2704        else
2705          return TRUE;
2706      case dignorerest:
2707        return FALSE;
2708      default:
2709        error ("internal error: definedef value.");
2710      }
2711
2712    /*
2713     * Now typedefs
2714     */
2715    switch (typdef)
2716      {
2717      case tnone:
2718        if (toktype == st_C_typedef)
2719          {
2720            if (typedefs)
2721              typdef = tkeyseen;
2722            fvextern = FALSE;
2723            fvdef = fvnone;
2724            return FALSE;
2725          }
2726        break;
2727      case tkeyseen:
2728        switch (toktype)
2729          {
2730          case st_none:
2731          case st_C_class:
2732          case st_C_struct:
2733          case st_C_enum:
2734            typdef = ttypeseen;
2735          }
2736        break;
2737      case ttypeseen:
2738        if (structdef == snone && fvdef == fvnone)
2739          {
2740            fvdef = fvnameseen;
2741            return TRUE;
2742          }
2743        break;
2744      case tend:
2745        switch (toktype)
2746          {
2747          case st_C_class:
2748          case st_C_struct:
2749          case st_C_enum:
2750            return FALSE;
2751          }
2752        return TRUE;
2753      }
2754
2755    switch (toktype)
2756      {
2757      case st_C_javastruct:
2758        if (structdef == stagseen)
2759          structdef = scolonseen;
2760        return FALSE;
2761      case st_C_template:
2762      case st_C_class:
2763        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2764            && bracelev == 0
2765            && definedef == dnone && structdef == snone
2766            && typdef == tnone && fvdef == fvnone)
2767          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2768        if (toktype == st_C_template)
2769          break;
2770        /* FALLTHRU */
2771      case st_C_struct:
2772      case st_C_enum:
2773        if (parlev == 0
2774            && fvdef != vignore
2775            && (typdef == tkeyseen
2776                || (typedefs_or_cplusplus && structdef == snone)))
2777          {
2778            structdef = skeyseen;
2779            structtype = toktype;
2780            structbracelev = bracelev;
2781            if (fvdef == fvnameseen)
2782              fvdef = fvnone;
2783          }
2784        return FALSE;
2785      }
2786
2787    if (structdef == skeyseen)
2788      {
2789        structdef = stagseen;
2790        return TRUE;
2791      }
2792
2793    if (typdef != tnone)
2794      definedef = dnone;
2795
2796    /* Detect Objective C constructs. */
2797    switch (objdef)
2798      {
2799      case onone:
2800        switch (toktype)
2801          {
2802          case st_C_objprot:
2803            objdef = oprotocol;
2804            return FALSE;
2805          case st_C_objimpl:
2806            objdef = oimplementation;
2807            return FALSE;
2808          }
2809        break;
2810      case oimplementation:
2811        /* Save the class tag for functions or variables defined inside. */
2812        objtag = savenstr (str, len);
2813        objdef = oinbody;
2814        return FALSE;
2815      case oprotocol:
2816        /* Save the class tag for categories. */
2817        objtag = savenstr (str, len);
2818        objdef = otagseen;
2819        *is_func_or_var = TRUE;
2820        return TRUE;
2821      case oparenseen:
2822        objdef = ocatseen;
2823        *is_func_or_var = TRUE;
2824        return TRUE;
2825      case oinbody:
2826        break;
2827      case omethodsign:
2828        if (parlev == 0)
2829          {
2830            fvdef = fvnone;
2831            objdef = omethodtag;
2832            linebuffer_setlen (&token_name, len);
2833            memcpy (token_name.buffer, str, len);
2834            token_name.buffer[len] = '\0';
2835            return TRUE;
2836          }
2837        return FALSE;
2838      case omethodcolon:
2839        if (parlev == 0)
2840          objdef = omethodparm;
2841        return FALSE;
2842      case omethodparm:
2843        if (parlev == 0)
2844          {
2845            int oldlen = token_name.len;
2846            fvdef = fvnone;
2847            objdef = omethodtag;
2848            linebuffer_setlen (&token_name, oldlen + len);
2849            memcpy (token_name.buffer + oldlen, str, len);
2850            token_name.buffer[oldlen + len] = '\0';
2851            return TRUE;
2852          }
2853        return FALSE;
2854      case oignore:
2855        if (toktype == st_C_objend)
2856          {
2857            /* Memory leakage here: the string pointed by objtag is
2858               never released, because many tests would be needed to
2859               avoid breaking on incorrect input code.  The amount of
2860               memory leaked here is the sum of the lengths of the
2861               class tags.
2862            free (objtag); */
2863            objdef = onone;
2864          }
2865        return FALSE;
2866      }
2867
2868    /* A function, variable or enum constant? */
2869    switch (toktype)
2870      {
2871      case st_C_extern:
2872        fvextern = TRUE;
2873        switch  (fvdef)
2874          {
2875          case finlist:
2876          case flistseen:
2877          case fignore:
2878          case vignore:
2879            break;
2880          default:
2881            fvdef = fvnone;
2882          }
2883        return FALSE;
2884      case st_C_ignore:
2885        fvextern = FALSE;
2886        fvdef = vignore;
2887        return FALSE;
2888      case st_C_operator:
2889        fvdef = foperator;
2890        *is_func_or_var = TRUE;
2891        return TRUE;
2892      case st_none:
2893        if (constantypedefs
2894            && structdef == snone
2895            && structtype == st_C_enum && bracelev > structbracelev)
2896          return TRUE;           /* enum constant */
2897        switch (fvdef)
2898          {
2899          case fdefunkey:
2900            if (bracelev > 0)
2901              break;
2902            fvdef = fdefunname;  /* GNU macro */
2903            *is_func_or_var = TRUE;
2904            return TRUE;
2905          case fvnone:
2906            switch (typdef)
2907              {
2908              case ttypeseen:
2909                return FALSE;
2910              case tnone:
2911                if ((strneq (str, "asm", 3) && endtoken (str[3]))
2912                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2913                  {
2914                    fvdef = vignore;
2915                    return FALSE;
2916                  }
2917                break;
2918              }
2919           /* FALLTHRU */
2920           case fvnameseen:
2921           if (len >= 10 && strneq (str+len-10, "::operator", 10))
2922             {
2923               if (*c_extp & C_AUTO) /* automatic detection of C++ */
2924                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2925               fvdef = foperator;
2926               *is_func_or_var = TRUE;
2927               return TRUE;
2928             }
2929           if (bracelev > 0 && !instruct)
2930             break;
2931           fvdef = fvnameseen;   /* function or variable */
2932           *is_func_or_var = TRUE;
2933           return TRUE;
2934         }
2935       break;
2936     }
2937
2938   return FALSE;
2939 }
2940
2941 \f
2942 /*
2943  * C_entries often keeps pointers to tokens or lines which are older than
2944  * the line currently read.  By keeping two line buffers, and switching
2945  * them at end of line, it is possible to use those pointers.
2946  */
2947 static struct
2948 {
2949   long linepos;
2950   linebuffer lb;
2951 } lbs[2];
2952
2953 #define current_lb_is_new (newndx == curndx)
2954 #define switch_line_buffers() (curndx = 1 - curndx)
2955
2956 #define curlb (lbs[curndx].lb)
2957 #define newlb (lbs[newndx].lb)
2958 #define curlinepos (lbs[curndx].linepos)
2959 #define newlinepos (lbs[newndx].linepos)
2960
2961 #define plainc ((c_ext & C_EXT) == C_PLAIN)
2962 #define cplpl (c_ext & C_PLPL)
2963 #define cjava ((c_ext & C_JAVA) == C_JAVA)
2964
2965 #define CNL_SAVE_DEFINEDEF()                                            \
2966 do {                                                                    \
2967   curlinepos = charno;                                                  \
2968   readline (&curlb, inf);                                               \
2969   lp = curlb.buffer;                                                    \
2970   quotednl = FALSE;                                                     \
2971   newndx = curndx;                                                      \
2972 } while (0)
2973
2974 #define CNL()                                                           \
2975 do {                                                                    \
2976   CNL_SAVE_DEFINEDEF();                                                 \
2977   if (savetoken.valid)                                                  \
2978     {                                                                   \
2979       token = savetoken;                                                \
2980       savetoken.valid = FALSE;                                          \
2981     }                                                                   \
2982   definedef = dnone;                                                    \
2983 } while (0)
2984
2985
2986 static void
2987 make_C_tag (int isfun)
2988 {
2989   /* This function is never called when token.valid is FALSE, but
2990      we must protect against invalid input or internal errors. */
2991   if (token.valid)
2992     make_tag (token_name.buffer, token_name.len, isfun, token.line,
2993               token.offset+token.length+1, token.lineno, token.linepos);
2994   else if (DEBUG)
2995     {                             /* this branch is optimized away if !DEBUG */
2996       make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
2997                 token_name.len + 17, isfun, token.line,
2998                 token.offset+token.length+1, token.lineno, token.linepos);
2999       error ("INVALID TOKEN");
3000     }
3001
3002   token.valid = FALSE;
3003 }
3004
3005
3006 /*
3007  * C_entries ()
3008  *      This routine finds functions, variables, typedefs,
3009  *      #define's, enum constants and struct/union/enum definitions in
3010  *      C syntax and adds them to the list.
3011  */
3012 static void
3013 C_entries (int c_ext, FILE *inf)
3014                                 /* extension of C */
3015                                 /* input file */
3016 {
3017   register char c;              /* latest char read; '\0' for end of line */
3018   register char *lp;            /* pointer one beyond the character `c' */
3019   int curndx, newndx;           /* indices for current and new lb */
3020   register int tokoff;          /* offset in line of start of current token */
3021   register int toklen;          /* length of current token */
3022   const char *qualifier;        /* string used to qualify names */
3023   int qlen;                     /* length of qualifier */
3024   int bracelev;                 /* current brace level */
3025   int bracketlev;               /* current bracket level */
3026   int parlev;                   /* current parenthesis level */
3027   int attrparlev;               /* __attribute__ parenthesis level */
3028   int templatelev;              /* current template level */
3029   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3030   bool incomm, inquote, inchar, quotednl, midtoken;
3031   bool yacc_rules;              /* in the rules part of a yacc file */
3032   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3033
3034
3035   linebuffer_init (&lbs[0].lb);
3036   linebuffer_init (&lbs[1].lb);
3037   if (cstack.size == 0)
3038     {
3039       cstack.size = (DEBUG) ? 1 : 4;
3040       cstack.nl = 0;
3041       cstack.cname = xnew (cstack.size, char *);
3042       cstack.bracelev = xnew (cstack.size, int);
3043     }
3044
3045   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3046   curndx = newndx = 0;
3047   lp = curlb.buffer;
3048   *lp = 0;
3049
3050   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3051   structdef = snone; definedef = dnone; objdef = onone;
3052   yacc_rules = FALSE;
3053   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3054   token.valid = savetoken.valid = FALSE;
3055   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3056   if (cjava)
3057     { qualifier = "."; qlen = 1; }
3058   else
3059     { qualifier = "::"; qlen = 2; }
3060
3061
3062   while (!feof (inf))
3063     {
3064       c = *lp++;
3065       if (c == '\\')
3066         {
3067           /* If we are at the end of the line, the next character is a
3068              '\0'; do not skip it, because it is what tells us
3069              to read the next line.  */
3070           if (*lp == '\0')
3071             {
3072               quotednl = TRUE;
3073               continue;
3074             }
3075           lp++;
3076           c = ' ';
3077         }
3078       else if (incomm)
3079         {
3080           switch (c)
3081             {
3082             case '*':
3083               if (*lp == '/')
3084                 {
3085                   c = *lp++;
3086                   incomm = FALSE;
3087                 }
3088               break;
3089             case '\0':
3090               /* Newlines inside comments do not end macro definitions in
3091                  traditional cpp. */
3092               CNL_SAVE_DEFINEDEF ();
3093               break;
3094             }
3095           continue;
3096         }
3097       else if (inquote)
3098         {
3099           switch (c)
3100             {
3101             case '"':
3102               inquote = FALSE;
3103               break;
3104             case '\0':
3105               /* Newlines inside strings do not end macro definitions
3106                  in traditional cpp, even though compilers don't
3107                  usually accept them. */
3108               CNL_SAVE_DEFINEDEF ();
3109               break;
3110             }
3111           continue;
3112         }
3113       else if (inchar)
3114         {
3115           switch (c)
3116             {
3117             case '\0':
3118               /* Hmmm, something went wrong. */
3119               CNL ();
3120               /* FALLTHRU */
3121             case '\'':
3122               inchar = FALSE;
3123               break;
3124             }
3125           continue;
3126         }
3127       else switch (c)
3128         {
3129         case '"':
3130           inquote = TRUE;
3131           if (bracketlev > 0)
3132             continue;
3133           if (inattribute)
3134             break;
3135           switch (fvdef)
3136             {
3137             case fdefunkey:
3138             case fstartlist:
3139             case finlist:
3140             case fignore:
3141             case vignore:
3142               break;
3143             default:
3144               fvextern = FALSE;
3145               fvdef = fvnone;
3146             }
3147           continue;
3148         case '\'':
3149           inchar = TRUE;
3150           if (bracketlev > 0)
3151             continue;
3152           if (inattribute)
3153             break;
3154           if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
3155             {
3156               fvextern = FALSE;
3157               fvdef = fvnone;
3158             }
3159           continue;
3160         case '/':
3161           if (*lp == '*')
3162             {
3163               incomm = TRUE;
3164               lp++;
3165               c = ' ';
3166               if (bracketlev > 0)
3167                 continue;
3168             }
3169           else if (/* cplpl && */ *lp == '/')
3170             {
3171               c = '\0';
3172             }
3173           break;
3174         case '%':
3175           if ((c_ext & YACC) && *lp == '%')
3176             {
3177               /* Entering or exiting rules section in yacc file. */
3178               lp++;
3179               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3180               typdef = tnone; structdef = snone;
3181               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3182               bracelev = 0;
3183               yacc_rules = !yacc_rules;
3184               continue;
3185             }
3186           else
3187             break;
3188         case '#':
3189           if (definedef == dnone)
3190             {
3191               char *cp;
3192               bool cpptoken = TRUE;
3193
3194               /* Look back on this line.  If all blanks, or nonblanks
3195                  followed by an end of comment, this is a preprocessor
3196                  token. */
3197               for (cp = newlb.buffer; cp < lp-1; cp++)
3198                 if (!iswhite (*cp))
3199                   {
3200                     if (*cp == '*' && cp[1] == '/')
3201                       {
3202                         cp++;
3203                         cpptoken = TRUE;
3204                       }
3205                     else
3206                       cpptoken = FALSE;
3207                   }
3208               if (cpptoken)
3209                 definedef = dsharpseen;
3210             } /* if (definedef == dnone) */
3211           continue;
3212         case '[':
3213           bracketlev++;
3214           continue;
3215         default:
3216           if (bracketlev > 0)
3217             {
3218               if (c == ']')
3219                 --bracketlev;
3220               else if (c == '\0')
3221                 CNL_SAVE_DEFINEDEF ();
3222               continue;
3223             }
3224           break;
3225         } /* switch (c) */
3226
3227
3228       /* Consider token only if some involved conditions are satisfied. */
3229       if (typdef != tignore
3230           && definedef != dignorerest
3231           && fvdef != finlist
3232           && templatelev == 0
3233           && (definedef != dnone
3234               || structdef != scolonseen)
3235           && !inattribute)
3236         {
3237           if (midtoken)
3238             {
3239               if (endtoken (c))
3240                 {
3241                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3242                     /* This handles :: in the middle,
3243                        but not at the beginning of an identifier.
3244                        Also, space-separated :: is not recognized. */
3245                     {
3246                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3247                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3248                       lp += 2;
3249                       toklen += 2;
3250                       c = lp[-1];
3251                       goto still_in_token;
3252                     }
3253                   else
3254                     {
3255                       bool funorvar = FALSE;
3256
3257                       if (yacc_rules
3258                           || consider_token (newlb.buffer + tokoff, toklen, c,
3259                                              &c_ext, bracelev, parlev,
3260                                              &funorvar))
3261                         {
3262                           if (fvdef == foperator)
3263                             {
3264                               char *oldlp = lp;
3265                               lp = skip_spaces (lp-1);
3266                               if (*lp != '\0')
3267                                 lp += 1;
3268                               while (*lp != '\0'
3269                                      && !iswhite (*lp) && *lp != '(')
3270                                 lp += 1;
3271                               c = *lp++;
3272                               toklen += lp - oldlp;
3273                             }
3274                           token.named = FALSE;
3275                           if (!plainc
3276                               && nestlev > 0 && definedef == dnone)
3277                             /* in struct body */
3278                             {
3279                               int len;
3280                               write_classname (&token_name, qualifier);
3281                               len = token_name.len;
3282                               linebuffer_setlen (&token_name, len+qlen+toklen);
3283                               sprintf (token_name.buffer + len, "%s%.*s",
3284                                        qualifier, toklen, newlb.buffer + tokoff);
3285                               token.named = TRUE;
3286                             }
3287                           else if (objdef == ocatseen)
3288                             /* Objective C category */
3289                             {
3290                               int len = strlen (objtag) + 2 + toklen;
3291                               linebuffer_setlen (&token_name, len);
3292                               sprintf (token_name.buffer, "%s(%.*s)",
3293                                        objtag, toklen, newlb.buffer + tokoff);
3294                               token.named = TRUE;
3295                             }
3296                           else if (objdef == omethodtag
3297                                    || objdef == omethodparm)
3298                             /* Objective C method */
3299                             {
3300                               token.named = TRUE;
3301                             }
3302                           else if (fvdef == fdefunname)
3303                             /* GNU DEFUN and similar macros */
3304                             {
3305                               bool defun = (newlb.buffer[tokoff] == 'F');
3306                               int off = tokoff;
3307                               int len = toklen;
3308
3309                               /* Rewrite the tag so that emacs lisp DEFUNs
3310                                  can be found by their elisp name */
3311                               if (defun)
3312                                 {
3313                                   off += 1;
3314                                   len -= 1;
3315                                 }
3316                               linebuffer_setlen (&token_name, len);
3317                               memcpy (token_name.buffer,
3318                                       newlb.buffer + off, len);
3319                               token_name.buffer[len] = '\0';
3320                               if (defun)
3321                                 while (--len >= 0)
3322                                   if (token_name.buffer[len] == '_')
3323                                     token_name.buffer[len] = '-';
3324                               token.named = defun;
3325                             }
3326                           else
3327                             {
3328                               linebuffer_setlen (&token_name, toklen);
3329                               memcpy (token_name.buffer,
3330                                       newlb.buffer + tokoff, toklen);
3331                               token_name.buffer[toklen] = '\0';
3332                               /* Name macros and members. */
3333                               token.named = (structdef == stagseen
3334                                              || typdef == ttypeseen
3335                                              || typdef == tend
3336                                              || (funorvar
3337                                                  && definedef == dignorerest)
3338                                              || (funorvar
3339                                                  && definedef == dnone
3340                                                  && structdef == snone
3341                                                  && bracelev > 0));
3342                             }
3343                           token.lineno = lineno;
3344                           token.offset = tokoff;
3345                           token.length = toklen;
3346                           token.line = newlb.buffer;
3347                           token.linepos = newlinepos;
3348                           token.valid = TRUE;
3349
3350                           if (definedef == dnone
3351                               && (fvdef == fvnameseen
3352                                   || fvdef == foperator
3353                                   || structdef == stagseen
3354                                   || typdef == tend
3355                                   || typdef == ttypeseen
3356                                   || objdef != onone))
3357                             {
3358                               if (current_lb_is_new)
3359                                 switch_line_buffers ();
3360                             }
3361                           else if (definedef != dnone
3362                                    || fvdef == fdefunname
3363                                    || instruct)
3364                             make_C_tag (funorvar);
3365                         }
3366                       else /* not yacc and consider_token failed */
3367                         {
3368                           if (inattribute && fvdef == fignore)
3369                             {
3370                               /* We have just met __attribute__ after a
3371                                  function parameter list: do not tag the
3372                                  function again. */
3373                               fvdef = fvnone;
3374                             }
3375                         }
3376                       midtoken = FALSE;
3377                     }
3378                 } /* if (endtoken (c)) */
3379               else if (intoken (c))
3380                 still_in_token:
3381                 {
3382                   toklen++;
3383                   continue;
3384                 }
3385             } /* if (midtoken) */
3386           else if (begtoken (c))
3387             {
3388               switch (definedef)
3389                 {
3390                 case dnone:
3391                   switch (fvdef)
3392                     {
3393                     case fstartlist:
3394                       /* This prevents tagging fb in
3395                          void (__attribute__((noreturn)) *fb) (void);
3396                          Fixing this is not easy and not very important. */
3397                       fvdef = finlist;
3398                       continue;
3399                     case flistseen:
3400                       if (plainc || declarations)
3401                         {
3402                           make_C_tag (TRUE); /* a function */
3403                           fvdef = fignore;
3404                         }
3405                       break;
3406                     }
3407                   if (structdef == stagseen && !cjava)
3408                     {
3409                       popclass_above (bracelev);
3410                       structdef = snone;
3411                     }
3412                   break;
3413                 case dsharpseen:
3414                   savetoken = token;
3415                   break;
3416                 }
3417               if (!yacc_rules || lp == newlb.buffer + 1)
3418                 {
3419                   tokoff = lp - 1 - newlb.buffer;
3420                   toklen = 1;
3421                   midtoken = TRUE;
3422                 }
3423               continue;
3424             } /* if (begtoken) */
3425         } /* if must look at token */
3426
3427
3428       /* Detect end of line, colon, comma, semicolon and various braces
3429          after having handled a token.*/
3430       switch (c)
3431         {
3432         case ':':
3433           if (inattribute)
3434             break;
3435           if (yacc_rules && token.offset == 0 && token.valid)
3436             {
3437               make_C_tag (FALSE); /* a yacc function */
3438               break;
3439             }
3440           if (definedef != dnone)
3441             break;
3442           switch (objdef)
3443             {
3444             case  otagseen:
3445               objdef = oignore;
3446               make_C_tag (TRUE); /* an Objective C class */
3447               break;
3448             case omethodtag:
3449             case omethodparm:
3450               objdef = omethodcolon;
3451               linebuffer_setlen (&token_name, token_name.len + 1);
3452               strcat (token_name.buffer, ":");
3453               break;
3454             }
3455           if (structdef == stagseen)
3456             {
3457               structdef = scolonseen;
3458               break;
3459             }
3460           /* Should be useless, but may be work as a safety net. */
3461           if (cplpl && fvdef == flistseen)
3462             {
3463               make_C_tag (TRUE); /* a function */
3464               fvdef = fignore;
3465               break;
3466             }
3467           break;
3468         case ';':
3469           if (definedef != dnone || inattribute)
3470             break;
3471           switch (typdef)
3472             {
3473             case tend:
3474             case ttypeseen:
3475               make_C_tag (FALSE); /* a typedef */
3476               typdef = tnone;
3477               fvdef = fvnone;
3478               break;
3479             case tnone:
3480             case tinbody:
3481             case tignore:
3482               switch (fvdef)
3483                 {
3484                 case fignore:
3485                   if (typdef == tignore || cplpl)
3486                     fvdef = fvnone;
3487                   break;
3488                 case fvnameseen:
3489                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3490                       || (members && instruct))
3491                     make_C_tag (FALSE); /* a variable */
3492                   fvextern = FALSE;
3493                   fvdef = fvnone;
3494                   token.valid = FALSE;
3495                   break;
3496                 case flistseen:
3497                   if ((declarations
3498                        && (cplpl || !instruct)
3499                        && (typdef == tnone || (typdef != tignore && instruct)))
3500                       || (members
3501                           && plainc && instruct))
3502                     make_C_tag (TRUE);  /* a function */
3503                   /* FALLTHRU */
3504                 default:
3505                   fvextern = FALSE;
3506                   fvdef = fvnone;
3507                   if (declarations
3508                        && cplpl && structdef == stagseen)
3509                     make_C_tag (FALSE); /* forward declaration */
3510                   else
3511                     token.valid = FALSE;
3512                 } /* switch (fvdef) */
3513               /* FALLTHRU */
3514             default:
3515               if (!instruct)
3516                 typdef = tnone;
3517             }
3518           if (structdef == stagseen)
3519             structdef = snone;
3520           break;
3521         case ',':
3522           if (definedef != dnone || inattribute)
3523             break;
3524           switch (objdef)
3525             {
3526             case omethodtag:
3527             case omethodparm:
3528               make_C_tag (TRUE); /* an Objective C method */
3529               objdef = oinbody;
3530               break;
3531             }
3532           switch (fvdef)
3533             {
3534             case fdefunkey:
3535             case foperator:
3536             case fstartlist:
3537             case finlist:
3538             case fignore:
3539             case vignore:
3540               break;
3541             case fdefunname:
3542               fvdef = fignore;
3543               break;
3544             case fvnameseen:
3545               if (parlev == 0
3546                   && ((globals
3547                        && bracelev == 0
3548                        && templatelev == 0
3549                        && (!fvextern || declarations))
3550                       || (members && instruct)))
3551                   make_C_tag (FALSE); /* a variable */
3552               break;
3553             case flistseen:
3554               if ((declarations && typdef == tnone && !instruct)
3555                   || (members && typdef != tignore && instruct))
3556                 {
3557                   make_C_tag (TRUE); /* a function */
3558                   fvdef = fvnameseen;
3559                 }
3560               else if (!declarations)
3561                 fvdef = fvnone;
3562               token.valid = FALSE;
3563               break;
3564             default:
3565               fvdef = fvnone;
3566             }
3567           if (structdef == stagseen)
3568             structdef = snone;
3569           break;
3570         case ']':
3571           if (definedef != dnone || inattribute)
3572             break;
3573           if (structdef == stagseen)
3574             structdef = snone;
3575           switch (typdef)
3576             {
3577             case ttypeseen:
3578             case tend:
3579               typdef = tignore;
3580               make_C_tag (FALSE);       /* a typedef */
3581               break;
3582             case tnone:
3583             case tinbody:
3584               switch (fvdef)
3585                 {
3586                 case foperator:
3587                 case finlist:
3588                 case fignore:
3589                 case vignore:
3590                   break;
3591                 case fvnameseen:
3592                   if ((members && bracelev == 1)
3593                       || (globals && bracelev == 0
3594                           && (!fvextern || declarations)))
3595                     make_C_tag (FALSE); /* a variable */
3596                   /* FALLTHRU */
3597                 default:
3598                   fvdef = fvnone;
3599                 }
3600               break;
3601             }
3602           break;
3603         case '(':
3604           if (inattribute)
3605             {
3606               attrparlev++;
3607               break;
3608             }
3609           if (definedef != dnone)
3610             break;
3611           if (objdef == otagseen && parlev == 0)
3612             objdef = oparenseen;
3613           switch (fvdef)
3614             {
3615             case fvnameseen:
3616               if (typdef == ttypeseen
3617                   && *lp != '*'
3618                   && !instruct)
3619                 {
3620                   /* This handles constructs like:
3621                      typedef void OperatorFun (int fun); */
3622                   make_C_tag (FALSE);
3623                   typdef = tignore;
3624                   fvdef = fignore;
3625                   break;
3626                 }
3627               /* FALLTHRU */
3628             case foperator:
3629               fvdef = fstartlist;
3630               break;
3631             case flistseen:
3632               fvdef = finlist;
3633               break;
3634             }
3635           parlev++;
3636           break;
3637         case ')':
3638           if (inattribute)
3639             {
3640               if (--attrparlev == 0)
3641                 inattribute = FALSE;
3642               break;
3643             }
3644           if (definedef != dnone)
3645             break;
3646           if (objdef == ocatseen && parlev == 1)
3647             {
3648               make_C_tag (TRUE); /* an Objective C category */
3649               objdef = oignore;
3650             }
3651           if (--parlev == 0)
3652             {
3653               switch (fvdef)
3654                 {
3655                 case fstartlist:
3656                 case finlist:
3657                   fvdef = flistseen;
3658                   break;
3659                 }
3660               if (!instruct
3661                   && (typdef == tend
3662                       || typdef == ttypeseen))
3663                 {
3664                   typdef = tignore;
3665                   make_C_tag (FALSE); /* a typedef */
3666                 }
3667             }
3668           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3669             parlev = 0;
3670           break;
3671         case '{':
3672           if (definedef != dnone)
3673             break;
3674           if (typdef == ttypeseen)
3675             {
3676               /* Whenever typdef is set to tinbody (currently only
3677                  here), typdefbracelev should be set to bracelev. */
3678               typdef = tinbody;
3679               typdefbracelev = bracelev;
3680             }
3681           switch (fvdef)
3682             {
3683             case flistseen:
3684               make_C_tag (TRUE);    /* a function */
3685               /* FALLTHRU */
3686             case fignore:
3687               fvdef = fvnone;
3688               break;
3689             case fvnone:
3690               switch (objdef)
3691                 {
3692                 case otagseen:
3693                   make_C_tag (TRUE); /* an Objective C class */
3694                   objdef = oignore;
3695                   break;
3696                 case omethodtag:
3697                 case omethodparm:
3698                   make_C_tag (TRUE); /* an Objective C method */
3699                   objdef = oinbody;
3700                   break;
3701                 default:
3702                   /* Neutralize `extern "C" {' grot. */
3703                   if (bracelev == 0 && structdef == snone && nestlev == 0
3704                       && typdef == tnone)
3705                     bracelev = -1;
3706                 }
3707               break;
3708             }
3709           switch (structdef)
3710             {
3711             case skeyseen:         /* unnamed struct */
3712               pushclass_above (bracelev, NULL, 0);
3713               structdef = snone;
3714               break;
3715             case stagseen:         /* named struct or enum */
3716             case scolonseen:       /* a class */
3717               pushclass_above (bracelev,token.line+token.offset, token.length);
3718               structdef = snone;
3719               make_C_tag (FALSE);  /* a struct or enum */
3720               break;
3721             }
3722           bracelev += 1;
3723           break;
3724         case '*':
3725           if (definedef != dnone)
3726             break;
3727           if (fvdef == fstartlist)
3728             {
3729               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3730               token.valid = FALSE;
3731             }
3732           break;
3733         case '}':
3734           if (definedef != dnone)
3735             break;
3736           bracelev -= 1;
3737           if (!ignoreindent && lp == newlb.buffer + 1)
3738             {
3739               if (bracelev != 0)
3740                 token.valid = FALSE; /* unexpected value, token unreliable */
3741               bracelev = 0;     /* reset brace level if first column */
3742               parlev = 0;       /* also reset paren level, just in case... */
3743             }
3744           else if (bracelev < 0)
3745             {
3746               token.valid = FALSE; /* something gone amiss, token unreliable */
3747               bracelev = 0;
3748             }
3749           if (bracelev == 0 && fvdef == vignore)
3750             fvdef = fvnone;             /* end of function */
3751           popclass_above (bracelev);
3752           structdef = snone;
3753           /* Only if typdef == tinbody is typdefbracelev significant. */
3754           if (typdef == tinbody && bracelev <= typdefbracelev)
3755             {
3756               assert (bracelev == typdefbracelev);
3757               typdef = tend;
3758             }
3759           break;
3760         case '=':
3761           if (definedef != dnone)
3762             break;
3763           switch (fvdef)
3764             {
3765             case foperator:
3766             case finlist:
3767             case fignore:
3768             case vignore:
3769               break;
3770             case fvnameseen:
3771               if ((members && bracelev == 1)
3772                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3773                 make_C_tag (FALSE); /* a variable */
3774               /* FALLTHRU */
3775             default:
3776               fvdef = vignore;
3777             }
3778           break;
3779         case '<':
3780           if (cplpl
3781               && (structdef == stagseen || fvdef == fvnameseen))
3782             {
3783               templatelev++;
3784               break;
3785             }
3786           goto resetfvdef;
3787         case '>':
3788           if (templatelev > 0)
3789             {
3790               templatelev--;
3791               break;
3792             }
3793           goto resetfvdef;
3794         case '+':
3795         case '-':
3796           if (objdef == oinbody && bracelev == 0)
3797             {
3798               objdef = omethodsign;
3799               break;
3800             }
3801           /* FALLTHRU */
3802         resetfvdef:
3803         case '#': case '~': case '&': case '%': case '/':
3804         case '|': case '^': case '!': case '.': case '?':
3805           if (definedef != dnone)
3806             break;
3807           /* These surely cannot follow a function tag in C. */
3808           switch (fvdef)
3809             {
3810             case foperator:
3811             case finlist:
3812             case fignore:
3813             case vignore:
3814               break;
3815             default:
3816               fvdef = fvnone;
3817             }
3818           break;
3819         case '\0':
3820           if (objdef == otagseen)
3821             {
3822               make_C_tag (TRUE); /* an Objective C class */
3823               objdef = oignore;
3824             }
3825           /* If a macro spans multiple lines don't reset its state. */
3826           if (quotednl)
3827             CNL_SAVE_DEFINEDEF ();
3828           else
3829             CNL ();
3830           break;
3831         } /* switch (c) */
3832
3833     } /* while not eof */
3834
3835   free (lbs[0].lb.buffer);
3836   free (lbs[1].lb.buffer);
3837 }
3838
3839 /*
3840  * Process either a C++ file or a C file depending on the setting
3841  * of a global flag.
3842  */
3843 static void
3844 default_C_entries (FILE *inf)
3845 {
3846   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3847 }
3848
3849 /* Always do plain C. */
3850 static void
3851 plain_C_entries (FILE *inf)
3852 {
3853   C_entries (0, inf);
3854 }
3855
3856 /* Always do C++. */
3857 static void
3858 Cplusplus_entries (FILE *inf)
3859 {
3860   C_entries (C_PLPL, inf);
3861 }
3862
3863 /* Always do Java. */
3864 static void
3865 Cjava_entries (FILE *inf)
3866 {
3867   C_entries (C_JAVA, inf);
3868 }
3869
3870 /* Always do C*. */
3871 static void
3872 Cstar_entries (FILE *inf)
3873 {
3874   C_entries (C_STAR, inf);
3875 }
3876
3877 /* Always do Yacc. */
3878 static void
3879 Yacc_entries (FILE *inf)
3880 {
3881   C_entries (YACC, inf);
3882 }
3883
3884 \f
3885 /* Useful macros. */
3886 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3887   for (;                        /* loop initialization */               \
3888        !feof (file_pointer)     /* loop test */                         \
3889        &&                       /* instructions at start of loop */     \
3890           (readline (&line_buffer, file_pointer),                       \
3891            char_pointer = line_buffer.buffer,                           \
3892            TRUE);                                                       \
3893       )
3894
3895 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
3896   ((assert ("" kw), TRUE)   /* syntax error if not a literal string */  \
3897    && strneq ((cp), kw, sizeof (kw)-1)          /* cp points at kw */   \
3898    && notinname ((cp)[sizeof (kw)-1])           /* end of kw */         \
3899    && ((cp) = skip_spaces ((cp)+sizeof (kw)-1))) /* skip spaces */
3900
3901 /* Similar to LOOKING_AT but does not use notinname, does not skip */
3902 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
3903   ((assert ("" kw), TRUE) /* syntax error if not a literal string */    \
3904    && strncaseeq ((cp), kw, sizeof (kw)-1)      /* cp points at kw */   \
3905    && ((cp) += sizeof (kw)-1))                  /* skip spaces */
3906
3907 /*
3908  * Read a file, but do no processing.  This is used to do regexp
3909  * matching on files that have no language defined.
3910  */
3911 static void
3912 just_read_file (FILE *inf)
3913 {
3914   while (!feof (inf))
3915     readline (&lb, inf);
3916 }
3917
3918 \f
3919 /* Fortran parsing */
3920
3921 static void F_takeprec (void);
3922 static void F_getit (FILE *);
3923
3924 static void
3925 F_takeprec (void)
3926 {
3927   dbp = skip_spaces (dbp);
3928   if (*dbp != '*')
3929     return;
3930   dbp++;
3931   dbp = skip_spaces (dbp);
3932   if (strneq (dbp, "(*)", 3))
3933     {
3934       dbp += 3;
3935       return;
3936     }
3937   if (!ISDIGIT (*dbp))
3938     {
3939       --dbp;                    /* force failure */
3940       return;
3941     }
3942   do
3943     dbp++;
3944   while (ISDIGIT (*dbp));
3945 }
3946
3947 static void
3948 F_getit (FILE *inf)
3949 {
3950   register char *cp;
3951
3952   dbp = skip_spaces (dbp);
3953   if (*dbp == '\0')
3954     {
3955       readline (&lb, inf);
3956       dbp = lb.buffer;
3957       if (dbp[5] != '&')
3958         return;
3959       dbp += 6;
3960       dbp = skip_spaces (dbp);
3961     }
3962   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3963     return;
3964   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3965     continue;
3966   make_tag (dbp, cp-dbp, TRUE,
3967             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3968 }
3969
3970
3971 static void
3972 Fortran_functions (FILE *inf)
3973 {
3974   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3975     {
3976       if (*dbp == '%')
3977         dbp++;                  /* Ratfor escape to fortran */
3978       dbp = skip_spaces (dbp);
3979       if (*dbp == '\0')
3980         continue;
3981
3982       if (LOOKING_AT_NOCASE (dbp, "recursive"))
3983         dbp = skip_spaces (dbp);
3984
3985       if (LOOKING_AT_NOCASE (dbp, "pure"))
3986         dbp = skip_spaces (dbp);
3987
3988       if (LOOKING_AT_NOCASE (dbp, "elemental"))
3989         dbp = skip_spaces (dbp);
3990
3991       switch (lowcase (*dbp))
3992         {
3993         case 'i':
3994           if (nocase_tail ("integer"))
3995             F_takeprec ();
3996           break;
3997         case 'r':
3998           if (nocase_tail ("real"))
3999             F_takeprec ();
4000           break;
4001         case 'l':
4002           if (nocase_tail ("logical"))
4003             F_takeprec ();
4004           break;
4005         case 'c':
4006           if (nocase_tail ("complex") || nocase_tail ("character"))
4007             F_takeprec ();
4008           break;
4009         case 'd':
4010           if (nocase_tail ("double"))
4011             {
4012               dbp = skip_spaces (dbp);
4013               if (*dbp == '\0')
4014                 continue;
4015               if (nocase_tail ("precision"))
4016                 break;
4017               continue;
4018             }
4019           break;
4020         }
4021       dbp = skip_spaces (dbp);
4022       if (*dbp == '\0')
4023         continue;
4024       switch (lowcase (*dbp))
4025         {
4026         case 'f':
4027           if (nocase_tail ("function"))
4028             F_getit (inf);
4029           continue;
4030         case 's':
4031           if (nocase_tail ("subroutine"))
4032             F_getit (inf);
4033           continue;
4034         case 'e':
4035           if (nocase_tail ("entry"))
4036             F_getit (inf);
4037           continue;
4038         case 'b':
4039           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4040             {
4041               dbp = skip_spaces (dbp);
4042               if (*dbp == '\0') /* assume un-named */
4043                 make_tag ("blockdata", 9, TRUE,
4044                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4045               else
4046                 F_getit (inf);  /* look for name */
4047             }
4048           continue;
4049         }
4050     }
4051 }
4052
4053 \f
4054 /*
4055  * Ada parsing
4056  * Original code by
4057  * Philippe Waroquiers (1998)
4058  */
4059
4060 /* Once we are positioned after an "interesting" keyword, let's get
4061    the real tag value necessary. */
4062 static void
4063 Ada_getit (FILE *inf, const char *name_qualifier)
4064 {
4065   register char *cp;
4066   char *name;
4067   char c;
4068
4069   while (!feof (inf))
4070     {
4071       dbp = skip_spaces (dbp);
4072       if (*dbp == '\0'
4073           || (dbp[0] == '-' && dbp[1] == '-'))
4074         {
4075           readline (&lb, inf);
4076           dbp = lb.buffer;
4077         }
4078       switch (lowcase (*dbp))
4079         {
4080         case 'b':
4081           if (nocase_tail ("body"))
4082             {
4083               /* Skipping body of   procedure body   or   package body or ....
4084                  resetting qualifier to body instead of spec. */
4085               name_qualifier = "/b";
4086               continue;
4087             }
4088           break;
4089         case 't':
4090           /* Skipping type of   task type   or   protected type ... */
4091           if (nocase_tail ("type"))
4092             continue;
4093           break;
4094         }
4095       if (*dbp == '"')
4096         {
4097           dbp += 1;
4098           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4099             continue;
4100         }
4101       else
4102         {
4103           dbp = skip_spaces (dbp);
4104           for (cp = dbp;
4105                (*cp != '\0'
4106                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4107                cp++)
4108             continue;
4109           if (cp == dbp)
4110             return;
4111         }
4112       c = *cp;
4113       *cp = '\0';
4114       name = concat (dbp, name_qualifier, "");
4115       *cp = c;
4116       make_tag (name, strlen (name), TRUE,
4117                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4118       free (name);
4119       if (c == '"')
4120         dbp = cp + 1;
4121       return;
4122     }
4123 }
4124
4125 static void
4126 Ada_funcs (FILE *inf)
4127 {
4128   bool inquote = FALSE;
4129   bool skip_till_semicolumn = FALSE;
4130
4131   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4132     {
4133       while (*dbp != '\0')
4134         {
4135           /* Skip a string i.e. "abcd". */
4136           if (inquote || (*dbp == '"'))
4137             {
4138               dbp = etags_strchr (dbp + !inquote, '"');
4139               if (dbp != NULL)
4140                 {
4141                   inquote = FALSE;
4142                   dbp += 1;
4143                   continue;     /* advance char */
4144                 }
4145               else
4146                 {
4147                   inquote = TRUE;
4148                   break;        /* advance line */
4149                 }
4150             }
4151
4152           /* Skip comments. */
4153           if (dbp[0] == '-' && dbp[1] == '-')
4154             break;              /* advance line */
4155
4156           /* Skip character enclosed in single quote i.e. 'a'
4157              and skip single quote starting an attribute i.e. 'Image. */
4158           if (*dbp == '\'')
4159             {
4160               dbp++ ;
4161               if (*dbp != '\0')
4162                 dbp++;
4163               continue;
4164             }
4165
4166           if (skip_till_semicolumn)
4167             {
4168               if (*dbp == ';')
4169                 skip_till_semicolumn = FALSE;
4170               dbp++;
4171               continue;         /* advance char */
4172             }
4173
4174           /* Search for beginning of a token.  */
4175           if (!begtoken (*dbp))
4176             {
4177               dbp++;
4178               continue;         /* advance char */
4179             }
4180
4181           /* We are at the beginning of a token. */
4182           switch (lowcase (*dbp))
4183             {
4184             case 'f':
4185               if (!packages_only && nocase_tail ("function"))
4186                 Ada_getit (inf, "/f");
4187               else
4188                 break;          /* from switch */
4189               continue;         /* advance char */
4190             case 'p':
4191               if (!packages_only && nocase_tail ("procedure"))
4192                 Ada_getit (inf, "/p");
4193               else if (nocase_tail ("package"))
4194                 Ada_getit (inf, "/s");
4195               else if (nocase_tail ("protected")) /* protected type */
4196                 Ada_getit (inf, "/t");
4197               else
4198                 break;          /* from switch */
4199               continue;         /* advance char */
4200
4201             case 'u':
4202               if (typedefs && !packages_only && nocase_tail ("use"))
4203                 {
4204                   /* when tagging types, avoid tagging  use type Pack.Typename;
4205                      for this, we will skip everything till a ; */
4206                   skip_till_semicolumn = TRUE;
4207                   continue;     /* advance char */
4208                 }
4209
4210             case 't':
4211               if (!packages_only && nocase_tail ("task"))
4212                 Ada_getit (inf, "/k");
4213               else if (typedefs && !packages_only && nocase_tail ("type"))
4214                 {
4215                   Ada_getit (inf, "/t");
4216                   while (*dbp != '\0')
4217                     dbp += 1;
4218                 }
4219               else
4220                 break;          /* from switch */
4221               continue;         /* advance char */
4222             }
4223
4224           /* Look for the end of the token. */
4225           while (!endtoken (*dbp))
4226             dbp++;
4227
4228         } /* advance char */
4229     } /* advance line */
4230 }
4231
4232 \f
4233 /*
4234  * Unix and microcontroller assembly tag handling
4235  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4236  * Idea by Bob Weiner, Motorola Inc. (1994)
4237  */
4238 static void
4239 Asm_labels (FILE *inf)
4240 {
4241   register char *cp;
4242
4243   LOOP_ON_INPUT_LINES (inf, lb, cp)
4244     {
4245       /* If first char is alphabetic or one of [_.$], test for colon
4246          following identifier. */
4247       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4248         {
4249           /* Read past label. */
4250           cp++;
4251           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4252             cp++;
4253           if (*cp == ':' || iswhite (*cp))
4254             /* Found end of label, so copy it and add it to the table. */
4255             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4256                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4257         }
4258     }
4259 }
4260
4261 \f
4262 /*
4263  * Perl support
4264  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4265  *                 /^use constant[ \t\n]+[^ \t\n{=,;]+/
4266  * Perl variable names: /^(my|local).../
4267  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4268  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4269  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4270  */
4271 static void
4272 Perl_functions (FILE *inf)
4273 {
4274   char *package = savestr ("main"); /* current package name */
4275   register char *cp;
4276
4277   LOOP_ON_INPUT_LINES (inf, lb, cp)
4278     {
4279       cp = skip_spaces (cp);
4280
4281       if (LOOKING_AT (cp, "package"))
4282         {
4283           free (package);
4284           get_tag (cp, &package);
4285         }
4286       else if (LOOKING_AT (cp, "sub"))
4287         {
4288           char *pos, *sp;
4289
4290         subr:
4291           sp = cp;
4292           while (!notinname (*cp))
4293             cp++;
4294           if (cp == sp)
4295             continue;           /* nothing found */
4296           if ((pos = etags_strchr (sp, ':')) != NULL
4297               && pos < cp && pos[1] == ':')
4298             /* The name is already qualified. */
4299             make_tag (sp, cp - sp, TRUE,
4300                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4301           else
4302             /* Qualify it. */
4303             {
4304               char savechar, *name;
4305
4306               savechar = *cp;
4307               *cp = '\0';
4308               name = concat (package, "::", sp);
4309               *cp = savechar;
4310               make_tag (name, strlen (name), TRUE,
4311                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4312               free (name);
4313             }
4314         }
4315       else if (LOOKING_AT (cp, "use constant")
4316                || LOOKING_AT (cp, "use constant::defer"))
4317         {
4318           /* For hash style multi-constant like
4319                 use constant { FOO => 123,
4320                                BAR => 456 };
4321              only the first FOO is picked up.  Parsing across the value
4322              expressions would be difficult in general, due to possible nested
4323              hashes, here-documents, etc.  */
4324           if (*cp == '{')
4325             cp = skip_spaces (cp+1);
4326           goto subr;
4327         }
4328       else if (globals) /* only if we are tagging global vars */
4329         {
4330           /* Skip a qualifier, if any. */
4331           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4332           /* After "my" or "local", but before any following paren or space. */
4333           char *varstart = cp;
4334
4335           if (qual              /* should this be removed?  If yes, how? */
4336               && (*cp == '$' || *cp == '@' || *cp == '%'))
4337             {
4338               varstart += 1;
4339               do
4340                 cp++;
4341               while (ISALNUM (*cp) || *cp == '_');
4342             }
4343           else if (qual)
4344             {
4345               /* Should be examining a variable list at this point;
4346                  could insist on seeing an open parenthesis. */
4347               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4348                 cp++;
4349             }
4350           else
4351             continue;
4352
4353           make_tag (varstart, cp - varstart, FALSE,
4354                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4355         }
4356     }
4357   free (package);
4358 }
4359
4360
4361 /*
4362  * Python support
4363  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4364  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4365  * More ideas by seb bacon <seb@jamkit.com> (2002)
4366  */
4367 static void
4368 Python_functions (FILE *inf)
4369 {
4370   register char *cp;
4371
4372   LOOP_ON_INPUT_LINES (inf, lb, cp)
4373     {
4374       cp = skip_spaces (cp);
4375       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4376         {
4377           char *name = cp;
4378           while (!notinname (*cp) && *cp != ':')
4379             cp++;
4380           make_tag (name, cp - name, TRUE,
4381                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4382         }
4383     }
4384 }
4385
4386 \f
4387 /*
4388  * PHP support
4389  * Look for:
4390  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4391  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4392  *  - /^[ \t]*define\(\"[^\"]+/
4393  * Only with --members:
4394  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4395  * Idea by Diez B. Roggisch (2001)
4396  */
4397 static void
4398 PHP_functions (FILE *inf)
4399 {
4400   register char *cp, *name;
4401   bool search_identifier = FALSE;
4402
4403   LOOP_ON_INPUT_LINES (inf, lb, cp)
4404     {
4405       cp = skip_spaces (cp);
4406       name = cp;
4407       if (search_identifier
4408           && *cp != '\0')
4409         {
4410           while (!notinname (*cp))
4411             cp++;
4412           make_tag (name, cp - name, TRUE,
4413                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4414           search_identifier = FALSE;
4415         }
4416       else if (LOOKING_AT (cp, "function"))
4417         {
4418           if (*cp == '&')
4419             cp = skip_spaces (cp+1);
4420           if (*cp != '\0')
4421             {
4422               name = cp;
4423               while (!notinname (*cp))
4424                 cp++;
4425               make_tag (name, cp - name, TRUE,
4426                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4427             }
4428           else
4429             search_identifier = TRUE;
4430         }
4431       else if (LOOKING_AT (cp, "class"))
4432         {
4433           if (*cp != '\0')
4434             {
4435               name = cp;
4436               while (*cp != '\0' && !iswhite (*cp))
4437                 cp++;
4438               make_tag (name, cp - name, FALSE,
4439                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4440             }
4441           else
4442             search_identifier = TRUE;
4443         }
4444       else if (strneq (cp, "define", 6)
4445                && (cp = skip_spaces (cp+6))
4446                && *cp++ == '('
4447                && (*cp == '"' || *cp == '\''))
4448         {
4449           char quote = *cp++;
4450           name = cp;
4451           while (*cp != quote && *cp != '\0')
4452             cp++;
4453           make_tag (name, cp - name, FALSE,
4454                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4455         }
4456       else if (members
4457                && LOOKING_AT (cp, "var")
4458                && *cp == '$')
4459         {
4460           name = cp;
4461           while (!notinname (*cp))
4462             cp++;
4463           make_tag (name, cp - name, FALSE,
4464                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4465         }
4466     }
4467 }
4468
4469 \f
4470 /*
4471  * Cobol tag functions
4472  * We could look for anything that could be a paragraph name.
4473  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4474  * Idea by Corny de Souza (1993)
4475  */
4476 static void
4477 Cobol_paragraphs (FILE *inf)
4478 {
4479   register char *bp, *ep;
4480
4481   LOOP_ON_INPUT_LINES (inf, lb, bp)
4482     {
4483       if (lb.len < 9)
4484         continue;
4485       bp += 8;
4486
4487       /* If eoln, compiler option or comment ignore whole line. */
4488       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4489         continue;
4490
4491       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4492         continue;
4493       if (*ep++ == '.')
4494         make_tag (bp, ep - bp, TRUE,
4495                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4496     }
4497 }
4498
4499 \f
4500 /*
4501  * Makefile support
4502  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4503  */
4504 static void
4505 Makefile_targets (FILE *inf)
4506 {
4507   register char *bp;
4508
4509   LOOP_ON_INPUT_LINES (inf, lb, bp)
4510     {
4511       if (*bp == '\t' || *bp == '#')
4512         continue;
4513       while (*bp != '\0' && *bp != '=' && *bp != ':')
4514         bp++;
4515       if (*bp == ':' || (globals && *bp == '='))
4516         {
4517           /* We should detect if there is more than one tag, but we do not.
4518              We just skip initial and final spaces. */
4519           char * namestart = skip_spaces (lb.buffer);
4520           while (--bp > namestart)
4521             if (!notinname (*bp))
4522               break;
4523           make_tag (namestart, bp - namestart + 1, TRUE,
4524                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4525         }
4526     }
4527 }
4528
4529 \f
4530 /*
4531  * Pascal parsing
4532  * Original code by Mosur K. Mohan (1989)
4533  *
4534  *  Locates tags for procedures & functions.  Doesn't do any type- or
4535  *  var-definitions.  It does look for the keyword "extern" or
4536  *  "forward" immediately following the procedure statement; if found,
4537  *  the tag is skipped.
4538  */
4539 static void
4540 Pascal_functions (FILE *inf)
4541 {
4542   linebuffer tline;             /* mostly copied from C_entries */
4543   long save_lcno;
4544   int save_lineno, namelen, taglen;
4545   char c, *name;
4546
4547   bool                          /* each of these flags is TRUE if: */
4548     incomment,                  /* point is inside a comment */
4549     inquote,                    /* point is inside '..' string */
4550     get_tagname,                /* point is after PROCEDURE/FUNCTION
4551                                    keyword, so next item = potential tag */
4552     found_tag,                  /* point is after a potential tag */
4553     inparms,                    /* point is within parameter-list */
4554     verify_tag;                 /* point has passed the parm-list, so the
4555                                    next token will determine whether this
4556                                    is a FORWARD/EXTERN to be ignored, or
4557                                    whether it is a real tag */
4558
4559   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4560   name = NULL;                  /* keep compiler quiet */
4561   dbp = lb.buffer;
4562   *dbp = '\0';
4563   linebuffer_init (&tline);
4564
4565   incomment = inquote = FALSE;
4566   found_tag = FALSE;            /* have a proc name; check if extern */
4567   get_tagname = FALSE;          /* found "procedure" keyword         */
4568   inparms = FALSE;              /* found '(' after "proc"            */
4569   verify_tag = FALSE;           /* check if "extern" is ahead        */
4570
4571
4572   while (!feof (inf))           /* long main loop to get next char */
4573     {
4574       c = *dbp++;
4575       if (c == '\0')            /* if end of line */
4576         {
4577           readline (&lb, inf);
4578           dbp = lb.buffer;
4579           if (*dbp == '\0')
4580             continue;
4581           if (!((found_tag && verify_tag)
4582                 || get_tagname))
4583             c = *dbp++;         /* only if don't need *dbp pointing
4584                                    to the beginning of the name of
4585                                    the procedure or function */
4586         }
4587       if (incomment)
4588         {
4589           if (c == '}')         /* within { } comments */
4590             incomment = FALSE;
4591           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4592             {
4593               dbp++;
4594               incomment = FALSE;
4595             }
4596           continue;
4597         }
4598       else if (inquote)
4599         {
4600           if (c == '\'')
4601             inquote = FALSE;
4602           continue;
4603         }
4604       else
4605         switch (c)
4606           {
4607           case '\'':
4608             inquote = TRUE;     /* found first quote */
4609             continue;
4610           case '{':             /* found open { comment */
4611             incomment = TRUE;
4612             continue;
4613           case '(':
4614             if (*dbp == '*')    /* found open (* comment */
4615               {
4616                 incomment = TRUE;
4617                 dbp++;
4618               }
4619             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4620               inparms = TRUE;
4621             continue;
4622           case ')':             /* end of parms list */
4623             if (inparms)
4624               inparms = FALSE;
4625             continue;
4626           case ';':
4627             if (found_tag && !inparms) /* end of proc or fn stmt */
4628               {
4629                 verify_tag = TRUE;
4630                 break;
4631               }
4632             continue;
4633           }
4634       if (found_tag && verify_tag && (*dbp != ' '))
4635         {
4636           /* Check if this is an "extern" declaration. */
4637           if (*dbp == '\0')
4638             continue;
4639           if (lowcase (*dbp) == 'e')
4640             {
4641               if (nocase_tail ("extern")) /* superfluous, really! */
4642                 {
4643                   found_tag = FALSE;
4644                   verify_tag = FALSE;
4645                 }
4646             }
4647           else if (lowcase (*dbp) == 'f')
4648             {
4649               if (nocase_tail ("forward")) /* check for forward reference */
4650                 {
4651                   found_tag = FALSE;
4652                   verify_tag = FALSE;
4653                 }
4654             }
4655           if (found_tag && verify_tag) /* not external proc, so make tag */
4656             {
4657               found_tag = FALSE;
4658               verify_tag = FALSE;
4659               make_tag (name, namelen, TRUE,
4660                         tline.buffer, taglen, save_lineno, save_lcno);
4661               continue;
4662             }
4663         }
4664       if (get_tagname)          /* grab name of proc or fn */
4665         {
4666           char *cp;
4667
4668           if (*dbp == '\0')
4669             continue;
4670
4671           /* Find block name. */
4672           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4673             continue;
4674
4675           /* Save all values for later tagging. */
4676           linebuffer_setlen (&tline, lb.len);
4677           strcpy (tline.buffer, lb.buffer);
4678           save_lineno = lineno;
4679           save_lcno = linecharno;
4680           name = tline.buffer + (dbp - lb.buffer);
4681           namelen = cp - dbp;
4682           taglen = cp - lb.buffer + 1;
4683
4684           dbp = cp;             /* set dbp to e-o-token */
4685           get_tagname = FALSE;
4686           found_tag = TRUE;
4687           continue;
4688
4689           /* And proceed to check for "extern". */
4690         }
4691       else if (!incomment && !inquote && !found_tag)
4692         {
4693           /* Check for proc/fn keywords. */
4694           switch (lowcase (c))
4695             {
4696             case 'p':
4697               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4698                 get_tagname = TRUE;
4699               continue;
4700             case 'f':
4701               if (nocase_tail ("unction"))
4702                 get_tagname = TRUE;
4703               continue;
4704             }
4705         }
4706     } /* while not eof */
4707
4708   free (tline.buffer);
4709 }
4710
4711 \f
4712 /*
4713  * Lisp tag functions
4714  *  look for (def or (DEF, quote or QUOTE
4715  */
4716
4717 static void L_getit (void);
4718
4719 static void
4720 L_getit (void)
4721 {
4722   if (*dbp == '\'')             /* Skip prefix quote */
4723     dbp++;
4724   else if (*dbp == '(')
4725   {
4726     dbp++;
4727     /* Try to skip "(quote " */
4728     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4729       /* Ok, then skip "(" before name in (defstruct (foo)) */
4730       dbp = skip_spaces (dbp);
4731   }
4732   get_tag (dbp, NULL);
4733 }
4734
4735 static void
4736 Lisp_functions (FILE *inf)
4737 {
4738   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4739     {
4740       if (dbp[0] != '(')
4741         continue;
4742
4743       /* "(defvar foo)" is a declaration rather than a definition.  */
4744       if (! declarations)
4745         {
4746           char *p = dbp + 1;
4747           if (LOOKING_AT (p, "defvar"))
4748             {
4749               p = skip_name (p); /* past var name */
4750               p = skip_spaces (p);
4751               if (*p == ')')
4752                 continue;
4753             }
4754         }
4755
4756       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4757         {
4758           dbp = skip_non_spaces (dbp);
4759           dbp = skip_spaces (dbp);
4760           L_getit ();
4761         }
4762       else
4763         {
4764           /* Check for (foo::defmumble name-defined ... */
4765           do
4766             dbp++;
4767           while (!notinname (*dbp) && *dbp != ':');
4768           if (*dbp == ':')
4769             {
4770               do
4771                 dbp++;
4772               while (*dbp == ':');
4773
4774               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4775                 {
4776                   dbp = skip_non_spaces (dbp);
4777                   dbp = skip_spaces (dbp);
4778                   L_getit ();
4779                 }
4780             }
4781         }
4782     }
4783 }
4784
4785 \f
4786 /*
4787  * Lua script language parsing
4788  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4789  *
4790  *  "function" and "local function" are tags if they start at column 1.
4791  */
4792 static void
4793 Lua_functions (FILE *inf)
4794 {
4795   register char *bp;
4796
4797   LOOP_ON_INPUT_LINES (inf, lb, bp)
4798     {
4799       if (bp[0] != 'f' && bp[0] != 'l')
4800         continue;
4801
4802       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4803
4804       if (LOOKING_AT (bp, "function"))
4805         get_tag (bp, NULL);
4806     }
4807 }
4808
4809 \f
4810 /*
4811  * PostScript tags
4812  * Just look for lines where the first character is '/'
4813  * Also look at "defineps" for PSWrap
4814  * Ideas by:
4815  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4816  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4817  */
4818 static void
4819 PS_functions (FILE *inf)
4820 {
4821   register char *bp, *ep;
4822
4823   LOOP_ON_INPUT_LINES (inf, lb, bp)
4824     {
4825       if (bp[0] == '/')
4826         {
4827           for (ep = bp+1;
4828                *ep != '\0' && *ep != ' ' && *ep != '{';
4829                ep++)
4830             continue;
4831           make_tag (bp, ep - bp, TRUE,
4832                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4833         }
4834       else if (LOOKING_AT (bp, "defineps"))
4835         get_tag (bp, NULL);
4836     }
4837 }
4838
4839 \f
4840 /*
4841  * Forth tags
4842  * Ignore anything after \ followed by space or in ( )
4843  * Look for words defined by :
4844  * Look for constant, code, create, defer, value, and variable
4845  * OBP extensions:  Look for buffer:, field,
4846  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
4847  */
4848 static void
4849 Forth_words (FILE *inf)
4850 {
4851   register char *bp;
4852
4853   LOOP_ON_INPUT_LINES (inf, lb, bp)
4854     while ((bp = skip_spaces (bp))[0] != '\0')
4855       if (bp[0] == '\\' && iswhite (bp[1]))
4856         break;                  /* read next line */
4857       else if (bp[0] == '(' && iswhite (bp[1]))
4858         do                      /* skip to ) or eol */
4859           bp++;
4860         while (*bp != ')' && *bp != '\0');
4861       else if ((bp[0] == ':' && iswhite (bp[1]) && bp++)
4862                || LOOKING_AT_NOCASE (bp, "constant")
4863                || LOOKING_AT_NOCASE (bp, "code")
4864                || LOOKING_AT_NOCASE (bp, "create")
4865                || LOOKING_AT_NOCASE (bp, "defer")
4866                || LOOKING_AT_NOCASE (bp, "value")
4867                || LOOKING_AT_NOCASE (bp, "variable")
4868                || LOOKING_AT_NOCASE (bp, "buffer:")
4869                || LOOKING_AT_NOCASE (bp, "field"))
4870         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
4871       else
4872         bp = skip_non_spaces (bp);
4873 }
4874
4875 \f
4876 /*
4877  * Scheme tag functions
4878  * look for (def... xyzzy
4879  *          (def... (xyzzy
4880  *          (def ... ((...(xyzzy ....
4881  *          (set! xyzzy
4882  * Original code by Ken Haase (1985?)
4883  */
4884 static void
4885 Scheme_functions (FILE *inf)
4886 {
4887   register char *bp;
4888
4889   LOOP_ON_INPUT_LINES (inf, lb, bp)
4890     {
4891       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4892         {
4893           bp = skip_non_spaces (bp+4);
4894           /* Skip over open parens and white space.  Don't continue past
4895              '\0'. */
4896           while (*bp && notinname (*bp))
4897             bp++;
4898           get_tag (bp, NULL);
4899         }
4900       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4901         get_tag (bp, NULL);
4902     }
4903 }
4904
4905 \f
4906 /* Find tags in TeX and LaTeX input files.  */
4907
4908 /* TEX_toktab is a table of TeX control sequences that define tags.
4909  * Each entry records one such control sequence.
4910  *
4911  * Original code from who knows whom.
4912  * Ideas by:
4913  *   Stefan Monnier (2002)
4914  */
4915
4916 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4917
4918 /* Default set of control sequences to put into TEX_toktab.
4919    The value of environment var TEXTAGS is prepended to this.  */
4920 static const char *TEX_defenv = "\
4921 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4922 :part:appendix:entry:index:def\
4923 :newcommand:renewcommand:newenvironment:renewenvironment";
4924
4925 static void TEX_mode (FILE *);
4926 static void TEX_decode_env (const char *, const char *);
4927
4928 static char TEX_esc = '\\';
4929 static char TEX_opgrp = '{';
4930 static char TEX_clgrp = '}';
4931
4932 /*
4933  * TeX/LaTeX scanning loop.
4934  */
4935 static void
4936 TeX_commands (FILE *inf)
4937 {
4938   char *cp;
4939   linebuffer *key;
4940
4941   /* Select either \ or ! as escape character.  */
4942   TEX_mode (inf);
4943
4944   /* Initialize token table once from environment. */
4945   if (TEX_toktab == NULL)
4946     TEX_decode_env ("TEXTAGS", TEX_defenv);
4947
4948   LOOP_ON_INPUT_LINES (inf, lb, cp)
4949     {
4950       /* Look at each TEX keyword in line. */
4951       for (;;)
4952         {
4953           /* Look for a TEX escape. */
4954           while (*cp++ != TEX_esc)
4955             if (cp[-1] == '\0' || cp[-1] == '%')
4956               goto tex_next_line;
4957
4958           for (key = TEX_toktab; key->buffer != NULL; key++)
4959             if (strneq (cp, key->buffer, key->len))
4960               {
4961                 register char *p;
4962                 int namelen, linelen;
4963                 bool opgrp = FALSE;
4964
4965                 cp = skip_spaces (cp + key->len);
4966                 if (*cp == TEX_opgrp)
4967                   {
4968                     opgrp = TRUE;
4969                     cp++;
4970                   }
4971                 for (p = cp;
4972                      (!iswhite (*p) && *p != '#' &&
4973                       *p != TEX_opgrp && *p != TEX_clgrp);
4974                      p++)
4975                   continue;
4976                 namelen = p - cp;
4977                 linelen = lb.len;
4978                 if (!opgrp || *p == TEX_clgrp)
4979                   {
4980                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4981                       p++;
4982                     linelen = p - lb.buffer + 1;
4983                   }
4984                 make_tag (cp, namelen, TRUE,
4985                           lb.buffer, linelen, lineno, linecharno);
4986                 goto tex_next_line; /* We only tag a line once */
4987               }
4988         }
4989     tex_next_line:
4990       ;
4991     }
4992 }
4993
4994 #define TEX_LESC '\\'
4995 #define TEX_SESC '!'
4996
4997 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4998    chars accordingly. */
4999 static void
5000 TEX_mode (FILE *inf)
5001 {
5002   int c;
5003
5004   while ((c = getc (inf)) != EOF)
5005     {
5006       /* Skip to next line if we hit the TeX comment char. */
5007       if (c == '%')
5008         while (c != '\n' && c != EOF)
5009           c = getc (inf);
5010       else if (c == TEX_LESC || c == TEX_SESC )
5011         break;
5012     }
5013
5014   if (c == TEX_LESC)
5015     {
5016       TEX_esc = TEX_LESC;
5017       TEX_opgrp = '{';
5018       TEX_clgrp = '}';
5019     }
5020   else
5021     {
5022       TEX_esc = TEX_SESC;
5023       TEX_opgrp = '<';
5024       TEX_clgrp = '>';
5025     }
5026   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5027      No attempt is made to correct the situation. */
5028   rewind (inf);
5029 }
5030
5031 /* Read environment and prepend it to the default string.
5032    Build token table. */
5033 static void
5034 TEX_decode_env (const char *evarname, const char *defenv)
5035 {
5036   register const char *env, *p;
5037   int i, len;
5038
5039   /* Append default string to environment. */
5040   env = getenv (evarname);
5041   if (!env)
5042     env = defenv;
5043   else
5044     env = concat (env, defenv, "");
5045
5046   /* Allocate a token table */
5047   for (len = 1, p = env; p;)
5048     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5049       len++;
5050   TEX_toktab = xnew (len, linebuffer);
5051
5052   /* Unpack environment string into token table. Be careful about */
5053   /* zero-length strings (leading ':', "::" and trailing ':') */
5054   for (i = 0; *env != '\0';)
5055     {
5056       p = etags_strchr (env, ':');
5057       if (!p)                   /* End of environment string. */
5058         p = env + strlen (env);
5059       if (p - env > 0)
5060         {                       /* Only non-zero strings. */
5061           TEX_toktab[i].buffer = savenstr (env, p - env);
5062           TEX_toktab[i].len = p - env;
5063           i++;
5064         }
5065       if (*p)
5066         env = p + 1;
5067       else
5068         {
5069           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5070           TEX_toktab[i].len = 0;
5071           break;
5072         }
5073     }
5074 }
5075
5076 \f
5077 /* Texinfo support.  Dave Love, Mar. 2000.  */
5078 static void
5079 Texinfo_nodes (FILE *inf)
5080 {
5081   char *cp, *start;
5082   LOOP_ON_INPUT_LINES (inf, lb, cp)
5083     if (LOOKING_AT (cp, "@node"))
5084       {
5085         start = cp;
5086         while (*cp != '\0' && *cp != ',')
5087           cp++;
5088         make_tag (start, cp - start, TRUE,
5089                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5090       }
5091 }
5092
5093 \f
5094 /*
5095  * HTML support.
5096  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5097  * Contents of <a name=xxx> are tags with name xxx.
5098  *
5099  * Francesco Potortì, 2002.
5100  */
5101 static void
5102 HTML_labels (FILE *inf)
5103 {
5104   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5105   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5106   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5107   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5108   char *end;
5109
5110
5111   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5112
5113   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5114     for (;;)                    /* loop on the same line */
5115       {
5116         if (skiptag)            /* skip HTML tag */
5117           {
5118             while (*dbp != '\0' && *dbp != '>')
5119               dbp++;
5120             if (*dbp == '>')
5121               {
5122                 dbp += 1;
5123                 skiptag = FALSE;
5124                 continue;       /* look on the same line */
5125               }
5126             break;              /* go to next line */
5127           }
5128
5129         else if (intag) /* look for "name=" or "id=" */
5130           {
5131             while (*dbp != '\0' && *dbp != '>'
5132                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5133               dbp++;
5134             if (*dbp == '\0')
5135               break;            /* go to next line */
5136             if (*dbp == '>')
5137               {
5138                 dbp += 1;
5139                 intag = FALSE;
5140                 continue;       /* look on the same line */
5141               }
5142             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5143                 || LOOKING_AT_NOCASE (dbp, "id="))
5144               {
5145                 bool quoted = (dbp[0] == '"');
5146
5147                 if (quoted)
5148                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5149                     continue;
5150                 else
5151                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5152                     continue;
5153                 linebuffer_setlen (&token_name, end - dbp);
5154                 memcpy (token_name.buffer, dbp, end - dbp);
5155                 token_name.buffer[end - dbp] = '\0';
5156
5157                 dbp = end;
5158                 intag = FALSE;  /* we found what we looked for */
5159                 skiptag = TRUE; /* skip to the end of the tag */
5160                 getnext = TRUE; /* then grab the text */
5161                 continue;       /* look on the same line */
5162               }
5163             dbp += 1;
5164           }
5165
5166         else if (getnext)       /* grab next tokens and tag them */
5167           {
5168             dbp = skip_spaces (dbp);
5169             if (*dbp == '\0')
5170               break;            /* go to next line */
5171             if (*dbp == '<')
5172               {
5173                 intag = TRUE;
5174                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5175                 continue;       /* look on the same line */
5176               }
5177
5178             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5179               continue;
5180             make_tag (token_name.buffer, token_name.len, TRUE,
5181                       dbp, end - dbp, lineno, linecharno);
5182             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5183             getnext = FALSE;
5184             break;              /* go to next line */
5185           }
5186
5187         else                    /* look for an interesting HTML tag */
5188           {
5189             while (*dbp != '\0' && *dbp != '<')
5190               dbp++;
5191             if (*dbp == '\0')
5192               break;            /* go to next line */
5193             intag = TRUE;
5194             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5195               {
5196                 inanchor = TRUE;
5197                 continue;       /* look on the same line */
5198               }
5199             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5200                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5201                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5202                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5203               {
5204                 intag = FALSE;
5205                 getnext = TRUE;
5206                 continue;       /* look on the same line */
5207               }
5208             dbp += 1;
5209           }
5210       }
5211 }
5212
5213 \f
5214 /*
5215  * Prolog support
5216  *
5217  * Assumes that the predicate or rule starts at column 0.
5218  * Only the first clause of a predicate or rule is added.
5219  * Original code by Sunichirou Sugou (1989)
5220  * Rewritten by Anders Lindgren (1996)
5221  */
5222 static size_t prolog_pr (char *, char *);
5223 static void prolog_skip_comment (linebuffer *, FILE *);
5224 static size_t prolog_atom (char *, size_t);
5225
5226 static void
5227 Prolog_functions (FILE *inf)
5228 {
5229   char *cp, *last;
5230   size_t len;
5231   size_t allocated;
5232
5233   allocated = 0;
5234   len = 0;
5235   last = NULL;
5236
5237   LOOP_ON_INPUT_LINES (inf, lb, cp)
5238     {
5239       if (cp[0] == '\0')        /* Empty line */
5240         continue;
5241       else if (iswhite (cp[0])) /* Not a predicate */
5242         continue;
5243       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5244         prolog_skip_comment (&lb, inf);
5245       else if ((len = prolog_pr (cp, last)) > 0)
5246         {
5247           /* Predicate or rule.  Store the function name so that we
5248              only generate a tag for the first clause.  */
5249           if (last == NULL)
5250             last = xnew (len + 1, char);
5251           else if (len + 1 > allocated)
5252             xrnew (last, len + 1, char);
5253           allocated = len + 1;
5254           memcpy (last, cp, len);
5255           last[len] = '\0';
5256         }
5257     }
5258   free (last);
5259 }
5260
5261
5262 static void
5263 prolog_skip_comment (linebuffer *plb, FILE *inf)
5264 {
5265   char *cp;
5266
5267   do
5268     {
5269       for (cp = plb->buffer; *cp != '\0'; cp++)
5270         if (cp[0] == '*' && cp[1] == '/')
5271           return;
5272       readline (plb, inf);
5273     }
5274   while (!feof (inf));
5275 }
5276
5277 /*
5278  * A predicate or rule definition is added if it matches:
5279  *     <beginning of line><Prolog Atom><whitespace>(
5280  * or  <beginning of line><Prolog Atom><whitespace>:-
5281  *
5282  * It is added to the tags database if it doesn't match the
5283  * name of the previous clause header.
5284  *
5285  * Return the size of the name of the predicate or rule, or 0 if no
5286  * header was found.
5287  */
5288 static size_t
5289 prolog_pr (char *s, char *last)
5290
5291                                 /* Name of last clause. */
5292 {
5293   size_t pos;
5294   size_t len;
5295
5296   pos = prolog_atom (s, 0);
5297   if (! pos)
5298     return 0;
5299
5300   len = pos;
5301   pos = skip_spaces (s + pos) - s;
5302
5303   if ((s[pos] == '.'
5304        || (s[pos] == '(' && (pos += 1))
5305        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5306       && (last == NULL          /* save only the first clause */
5307           || len != strlen (last)
5308           || !strneq (s, last, len)))
5309         {
5310           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5311           return len;
5312         }
5313   else
5314     return 0;
5315 }
5316
5317 /*
5318  * Consume a Prolog atom.
5319  * Return the number of bytes consumed, or 0 if there was an error.
5320  *
5321  * A prolog atom, in this context, could be one of:
5322  * - An alphanumeric sequence, starting with a lower case letter.
5323  * - A quoted arbitrary string. Single quotes can escape themselves.
5324  *   Backslash quotes everything.
5325  */
5326 static size_t
5327 prolog_atom (char *s, size_t pos)
5328 {
5329   size_t origpos;
5330
5331   origpos = pos;
5332
5333   if (ISLOWER (s[pos]) || (s[pos] == '_'))
5334     {
5335       /* The atom is unquoted. */
5336       pos++;
5337       while (ISALNUM (s[pos]) || (s[pos] == '_'))
5338         {
5339           pos++;
5340         }
5341       return pos - origpos;
5342     }
5343   else if (s[pos] == '\'')
5344     {
5345       pos++;
5346
5347       for (;;)
5348         {
5349           if (s[pos] == '\'')
5350             {
5351               pos++;
5352               if (s[pos] != '\'')
5353                 break;
5354               pos++;            /* A double quote */
5355             }
5356           else if (s[pos] == '\0')
5357             /* Multiline quoted atoms are ignored. */
5358             return 0;
5359           else if (s[pos] == '\\')
5360             {
5361               if (s[pos+1] == '\0')
5362                 return 0;
5363               pos += 2;
5364             }
5365           else
5366             pos++;
5367         }
5368       return pos - origpos;
5369     }
5370   else
5371     return 0;
5372 }
5373
5374 \f
5375 /*
5376  * Support for Erlang
5377  *
5378  * Generates tags for functions, defines, and records.
5379  * Assumes that Erlang functions start at column 0.
5380  * Original code by Anders Lindgren (1996)
5381  */
5382 static int erlang_func (char *, char *);
5383 static void erlang_attribute (char *);
5384 static int erlang_atom (char *);
5385
5386 static void
5387 Erlang_functions (FILE *inf)
5388 {
5389   char *cp, *last;
5390   int len;
5391   int allocated;
5392
5393   allocated = 0;
5394   len = 0;
5395   last = NULL;
5396
5397   LOOP_ON_INPUT_LINES (inf, lb, cp)
5398     {
5399       if (cp[0] == '\0')        /* Empty line */
5400         continue;
5401       else if (iswhite (cp[0])) /* Not function nor attribute */
5402         continue;
5403       else if (cp[0] == '%')    /* comment */
5404         continue;
5405       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5406         continue;
5407       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5408         {
5409           erlang_attribute (cp);
5410           if (last != NULL)
5411             {
5412               free (last);
5413               last = NULL;
5414             }
5415         }
5416       else if ((len = erlang_func (cp, last)) > 0)
5417         {
5418           /*
5419            * Function.  Store the function name so that we only
5420            * generates a tag for the first clause.
5421            */
5422           if (last == NULL)
5423             last = xnew (len + 1, char);
5424           else if (len + 1 > allocated)
5425             xrnew (last, len + 1, char);
5426           allocated = len + 1;
5427           memcpy (last, cp, len);
5428           last[len] = '\0';
5429         }
5430     }
5431   free (last);
5432 }
5433
5434
5435 /*
5436  * A function definition is added if it matches:
5437  *     <beginning of line><Erlang Atom><whitespace>(
5438  *
5439  * It is added to the tags database if it doesn't match the
5440  * name of the previous clause header.
5441  *
5442  * Return the size of the name of the function, or 0 if no function
5443  * was found.
5444  */
5445 static int
5446 erlang_func (char *s, char *last)
5447
5448                                 /* Name of last clause. */
5449 {
5450   int pos;
5451   int len;
5452
5453   pos = erlang_atom (s);
5454   if (pos < 1)
5455     return 0;
5456
5457   len = pos;
5458   pos = skip_spaces (s + pos) - s;
5459
5460   /* Save only the first clause. */
5461   if (s[pos++] == '('
5462       && (last == NULL
5463           || len != (int)strlen (last)
5464           || !strneq (s, last, len)))
5465         {
5466           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5467           return len;
5468         }
5469
5470   return 0;
5471 }
5472
5473
5474 /*
5475  * Handle attributes.  Currently, tags are generated for defines
5476  * and records.
5477  *
5478  * They are on the form:
5479  * -define(foo, bar).
5480  * -define(Foo(M, N), M+N).
5481  * -record(graph, {vtab = notable, cyclic = true}).
5482  */
5483 static void
5484 erlang_attribute (char *s)
5485 {
5486   char *cp = s;
5487
5488   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5489       && *cp++ == '(')
5490     {
5491       int len = erlang_atom (skip_spaces (cp));
5492       if (len > 0)
5493         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5494     }
5495   return;
5496 }
5497
5498
5499 /*
5500  * Consume an Erlang atom (or variable).
5501  * Return the number of bytes consumed, or -1 if there was an error.
5502  */
5503 static int
5504 erlang_atom (char *s)
5505 {
5506   int pos = 0;
5507
5508   if (ISALPHA (s[pos]) || s[pos] == '_')
5509     {
5510       /* The atom is unquoted. */
5511       do
5512         pos++;
5513       while (ISALNUM (s[pos]) || s[pos] == '_');
5514     }
5515   else if (s[pos] == '\'')
5516     {
5517       for (pos++; s[pos] != '\''; pos++)
5518         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5519             || (s[pos] == '\\' && s[++pos] == '\0'))
5520           return 0;
5521       pos++;
5522     }
5523
5524   return pos;
5525 }
5526
5527 \f
5528 static char *scan_separators (char *);
5529 static void add_regex (char *, language *);
5530 static char *substitute (char *, char *, struct re_registers *);
5531
5532 /*
5533  * Take a string like "/blah/" and turn it into "blah", verifying
5534  * that the first and last characters are the same, and handling
5535  * quoted separator characters.  Actually, stops on the occurrence of
5536  * an unquoted separator.  Also process \t, \n, etc. and turn into
5537  * appropriate characters. Works in place.  Null terminates name string.
5538  * Returns pointer to terminating separator, or NULL for
5539  * unterminated regexps.
5540  */
5541 static char *
5542 scan_separators (char *name)
5543 {
5544   char sep = name[0];
5545   char *copyto = name;
5546   bool quoted = FALSE;
5547
5548   for (++name; *name != '\0'; ++name)
5549     {
5550       if (quoted)
5551         {
5552           switch (*name)
5553             {
5554             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5555             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5556             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5557             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5558             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5559             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5560             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5561             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5562             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5563             default:
5564               if (*name == sep)
5565                 *copyto++ = sep;
5566               else
5567                 {
5568                   /* Something else is quoted, so preserve the quote. */
5569                   *copyto++ = '\\';
5570                   *copyto++ = *name;
5571                 }
5572               break;
5573             }
5574           quoted = FALSE;
5575         }
5576       else if (*name == '\\')
5577         quoted = TRUE;
5578       else if (*name == sep)
5579         break;
5580       else
5581         *copyto++ = *name;
5582     }
5583   if (*name != sep)
5584     name = NULL;                /* signal unterminated regexp */
5585
5586   /* Terminate copied string. */
5587   *copyto = '\0';
5588   return name;
5589 }
5590
5591 /* Look at the argument of --regex or --no-regex and do the right
5592    thing.  Same for each line of a regexp file. */
5593 static void
5594 analyse_regex (char *regex_arg)
5595 {
5596   if (regex_arg == NULL)
5597     {
5598       free_regexps ();          /* --no-regex: remove existing regexps */
5599       return;
5600     }
5601
5602   /* A real --regexp option or a line in a regexp file. */
5603   switch (regex_arg[0])
5604     {
5605       /* Comments in regexp file or null arg to --regex. */
5606     case '\0':
5607     case ' ':
5608     case '\t':
5609       break;
5610
5611       /* Read a regex file.  This is recursive and may result in a
5612          loop, which will stop when the file descriptors are exhausted. */
5613     case '@':
5614       {
5615         FILE *regexfp;
5616         linebuffer regexbuf;
5617         char *regexfile = regex_arg + 1;
5618
5619         /* regexfile is a file containing regexps, one per line. */
5620         regexfp = fopen (regexfile, "r");
5621         if (regexfp == NULL)
5622           pfatal (regexfile);
5623         linebuffer_init (&regexbuf);
5624         while (readline_internal (&regexbuf, regexfp) > 0)
5625           analyse_regex (regexbuf.buffer);
5626         free (regexbuf.buffer);
5627         fclose (regexfp);
5628       }
5629       break;
5630
5631       /* Regexp to be used for a specific language only. */
5632     case '{':
5633       {
5634         language *lang;
5635         char *lang_name = regex_arg + 1;
5636         char *cp;
5637
5638         for (cp = lang_name; *cp != '}'; cp++)
5639           if (*cp == '\0')
5640             {
5641               error ("unterminated language name in regex: %s", regex_arg);
5642               return;
5643             }
5644         *cp++ = '\0';
5645         lang = get_language_from_langname (lang_name);
5646         if (lang == NULL)
5647           return;
5648         add_regex (cp, lang);
5649       }
5650       break;
5651
5652       /* Regexp to be used for any language. */
5653     default:
5654       add_regex (regex_arg, NULL);
5655       break;
5656     }
5657 }
5658
5659 /* Separate the regexp pattern, compile it,
5660    and care for optional name and modifiers. */
5661 static void
5662 add_regex (char *regexp_pattern, language *lang)
5663 {
5664   static struct re_pattern_buffer zeropattern;
5665   char sep, *pat, *name, *modifiers;
5666   char empty = '\0';
5667   const char *err;
5668   struct re_pattern_buffer *patbuf;
5669   regexp *rp;
5670   bool
5671     force_explicit_name = TRUE, /* do not use implicit tag names */
5672     ignore_case = FALSE,        /* case is significant */
5673     multi_line = FALSE,         /* matches are done one line at a time */
5674     single_line = FALSE;        /* dot does not match newline */
5675
5676
5677   if (strlen (regexp_pattern) < 3)
5678     {
5679       error ("null regexp");
5680       return;
5681     }
5682   sep = regexp_pattern[0];
5683   name = scan_separators (regexp_pattern);
5684   if (name == NULL)
5685     {
5686       error ("%s: unterminated regexp", regexp_pattern);
5687       return;
5688     }
5689   if (name[1] == sep)
5690     {
5691       error ("null name for regexp \"%s\"", regexp_pattern);
5692       return;
5693     }
5694   modifiers = scan_separators (name);
5695   if (modifiers == NULL)        /* no terminating separator --> no name */
5696     {
5697       modifiers = name;
5698       name = &empty;
5699     }
5700   else
5701     modifiers += 1;             /* skip separator */
5702
5703   /* Parse regex modifiers. */
5704   for (; modifiers[0] != '\0'; modifiers++)
5705     switch (modifiers[0])
5706       {
5707       case 'N':
5708         if (modifiers == name)
5709           error ("forcing explicit tag name but no name, ignoring");
5710         force_explicit_name = TRUE;
5711         break;
5712       case 'i':
5713         ignore_case = TRUE;
5714         break;
5715       case 's':
5716         single_line = TRUE;
5717         /* FALLTHRU */
5718       case 'm':
5719         multi_line = TRUE;
5720         need_filebuf = TRUE;
5721         break;
5722       default:
5723         error ("invalid regexp modifier `%c', ignoring", modifiers[0]);
5724         break;
5725       }
5726
5727   patbuf = xnew (1, struct re_pattern_buffer);
5728   *patbuf = zeropattern;
5729   if (ignore_case)
5730     {
5731       static char lc_trans[CHARS];
5732       int i;
5733       for (i = 0; i < CHARS; i++)
5734         lc_trans[i] = lowcase (i);
5735       patbuf->translate = lc_trans;     /* translation table to fold case  */
5736     }
5737
5738   if (multi_line)
5739     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5740   else
5741     pat = regexp_pattern;
5742
5743   if (single_line)
5744     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5745   else
5746     re_set_syntax (RE_SYNTAX_EMACS);
5747
5748   err = re_compile_pattern (pat, strlen (pat), patbuf);
5749   if (multi_line)
5750     free (pat);
5751   if (err != NULL)
5752     {
5753       error ("%s while compiling pattern", err);
5754       return;
5755     }
5756
5757   rp = p_head;
5758   p_head = xnew (1, regexp);
5759   p_head->pattern = savestr (regexp_pattern);
5760   p_head->p_next = rp;
5761   p_head->lang = lang;
5762   p_head->pat = patbuf;
5763   p_head->name = savestr (name);
5764   p_head->error_signaled = FALSE;
5765   p_head->force_explicit_name = force_explicit_name;
5766   p_head->ignore_case = ignore_case;
5767   p_head->multi_line = multi_line;
5768 }
5769
5770 /*
5771  * Do the substitutions indicated by the regular expression and
5772  * arguments.
5773  */
5774 static char *
5775 substitute (char *in, char *out, struct re_registers *regs)
5776 {
5777   char *result, *t;
5778   int size, dig, diglen;
5779
5780   result = NULL;
5781   size = strlen (out);
5782
5783   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5784   if (out[size - 1] == '\\')
5785     fatal ("pattern error in \"%s\"", out);
5786   for (t = etags_strchr (out, '\\');
5787        t != NULL;
5788        t = etags_strchr (t + 2, '\\'))
5789     if (ISDIGIT (t[1]))
5790       {
5791         dig = t[1] - '0';
5792         diglen = regs->end[dig] - regs->start[dig];
5793         size += diglen - 2;
5794       }
5795     else
5796       size -= 1;
5797
5798   /* Allocate space and do the substitutions. */
5799   assert (size >= 0);
5800   result = xnew (size + 1, char);
5801
5802   for (t = result; *out != '\0'; out++)
5803     if (*out == '\\' && ISDIGIT (*++out))
5804       {
5805         dig = *out - '0';
5806         diglen = regs->end[dig] - regs->start[dig];
5807         memcpy (t, in + regs->start[dig], diglen);
5808         t += diglen;
5809       }
5810     else
5811       *t++ = *out;
5812   *t = '\0';
5813
5814   assert (t <= result + size);
5815   assert (t - result == (int)strlen (result));
5816
5817   return result;
5818 }
5819
5820 /* Deallocate all regexps. */
5821 static void
5822 free_regexps (void)
5823 {
5824   regexp *rp;
5825   while (p_head != NULL)
5826     {
5827       rp = p_head->p_next;
5828       free (p_head->pattern);
5829       free (p_head->name);
5830       free (p_head);
5831       p_head = rp;
5832     }
5833   return;
5834 }
5835
5836 /*
5837  * Reads the whole file as a single string from `filebuf' and looks for
5838  * multi-line regular expressions, creating tags on matches.
5839  * readline already dealt with normal regexps.
5840  *
5841  * Idea by Ben Wing <ben@666.com> (2002).
5842  */
5843 static void
5844 regex_tag_multiline (void)
5845 {
5846   char *buffer = filebuf.buffer;
5847   regexp *rp;
5848   char *name;
5849
5850   for (rp = p_head; rp != NULL; rp = rp->p_next)
5851     {
5852       int match = 0;
5853
5854       if (!rp->multi_line)
5855         continue;               /* skip normal regexps */
5856
5857       /* Generic initializations before parsing file from memory. */
5858       lineno = 1;               /* reset global line number */
5859       charno = 0;               /* reset global char number */
5860       linecharno = 0;           /* reset global char number of line start */
5861
5862       /* Only use generic regexps or those for the current language. */
5863       if (rp->lang != NULL && rp->lang != curfdp->lang)
5864         continue;
5865
5866       while (match >= 0 && match < filebuf.len)
5867         {
5868           match = re_search (rp->pat, buffer, filebuf.len, charno,
5869                              filebuf.len - match, &rp->regs);
5870           switch (match)
5871             {
5872             case -2:
5873               /* Some error. */
5874               if (!rp->error_signaled)
5875                 {
5876                   error ("regexp stack overflow while matching \"%s\"",
5877                          rp->pattern);
5878                   rp->error_signaled = TRUE;
5879                 }
5880               break;
5881             case -1:
5882               /* No match. */
5883               break;
5884             default:
5885               if (match == rp->regs.end[0])
5886                 {
5887                   if (!rp->error_signaled)
5888                     {
5889                       error ("regexp matches the empty string: \"%s\"",
5890                              rp->pattern);
5891                       rp->error_signaled = TRUE;
5892                     }
5893                   match = -3;   /* exit from while loop */
5894                   break;
5895                 }
5896
5897               /* Match occurred.  Construct a tag. */
5898               while (charno < rp->regs.end[0])
5899                 if (buffer[charno++] == '\n')
5900                   lineno++, linecharno = charno;
5901               name = rp->name;
5902               if (name[0] == '\0')
5903                 name = NULL;
5904               else /* make a named tag */
5905                 name = substitute (buffer, rp->name, &rp->regs);
5906               if (rp->force_explicit_name)
5907                 /* Force explicit tag name, if a name is there. */
5908                 pfnote (name, TRUE, buffer + linecharno,
5909                         charno - linecharno + 1, lineno, linecharno);
5910               else
5911                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
5912                           charno - linecharno + 1, lineno, linecharno);
5913               break;
5914             }
5915         }
5916     }
5917 }
5918
5919 \f
5920 static bool
5921 nocase_tail (const char *cp)
5922 {
5923   register int len = 0;
5924
5925   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5926     cp++, len++;
5927   if (*cp == '\0' && !intoken (dbp[len]))
5928     {
5929       dbp += len;
5930       return TRUE;
5931     }
5932   return FALSE;
5933 }
5934
5935 static void
5936 get_tag (register char *bp, char **namepp)
5937 {
5938   register char *cp = bp;
5939
5940   if (*bp != '\0')
5941     {
5942       /* Go till you get to white space or a syntactic break */
5943       for (cp = bp + 1; !notinname (*cp); cp++)
5944         continue;
5945       make_tag (bp, cp - bp, TRUE,
5946                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5947     }
5948
5949   if (namepp != NULL)
5950     *namepp = savenstr (bp, cp - bp);
5951 }
5952
5953 /*
5954  * Read a line of text from `stream' into `lbp', excluding the
5955  * newline or CR-NL, if any.  Return the number of characters read from
5956  * `stream', which is the length of the line including the newline.
5957  *
5958  * On DOS or Windows we do not count the CR character, if any before the
5959  * NL, in the returned length; this mirrors the behavior of Emacs on those
5960  * platforms (for text files, it translates CR-NL to NL as it reads in the
5961  * file).
5962  *
5963  * If multi-line regular expressions are requested, each line read is
5964  * appended to `filebuf'.
5965  */
5966 static long
5967 readline_internal (linebuffer *lbp, register FILE *stream)
5968 {
5969   char *buffer = lbp->buffer;
5970   register char *p = lbp->buffer;
5971   register char *pend;
5972   int chars_deleted;
5973
5974   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5975
5976   for (;;)
5977     {
5978       register int c = getc (stream);
5979       if (p == pend)
5980         {
5981           /* We're at the end of linebuffer: expand it. */
5982           lbp->size *= 2;
5983           xrnew (buffer, lbp->size, char);
5984           p += buffer - lbp->buffer;
5985           pend = buffer + lbp->size;
5986           lbp->buffer = buffer;
5987         }
5988       if (c == EOF)
5989         {
5990           *p = '\0';
5991           chars_deleted = 0;
5992           break;
5993         }
5994       if (c == '\n')
5995         {
5996           if (p > buffer && p[-1] == '\r')
5997             {
5998               p -= 1;
5999 #ifdef DOS_NT
6000              /* Assume CRLF->LF translation will be performed by Emacs
6001                 when loading this file, so CRs won't appear in the buffer.
6002                 It would be cleaner to compensate within Emacs;
6003                 however, Emacs does not know how many CRs were deleted
6004                 before any given point in the file.  */
6005               chars_deleted = 1;
6006 #else
6007               chars_deleted = 2;
6008 #endif
6009             }
6010           else
6011             {
6012               chars_deleted = 1;
6013             }
6014           *p = '\0';
6015           break;
6016         }
6017       *p++ = c;
6018     }
6019   lbp->len = p - buffer;
6020
6021   if (need_filebuf              /* we need filebuf for multi-line regexps */
6022       && chars_deleted > 0)     /* not at EOF */
6023     {
6024       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6025         {
6026           /* Expand filebuf. */
6027           filebuf.size *= 2;
6028           xrnew (filebuf.buffer, filebuf.size, char);
6029         }
6030       memcpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6031       filebuf.len += lbp->len;
6032       filebuf.buffer[filebuf.len++] = '\n';
6033       filebuf.buffer[filebuf.len] = '\0';
6034     }
6035
6036   return lbp->len + chars_deleted;
6037 }
6038
6039 /*
6040  * Like readline_internal, above, but in addition try to match the
6041  * input line against relevant regular expressions and manage #line
6042  * directives.
6043  */
6044 static void
6045 readline (linebuffer *lbp, FILE *stream)
6046 {
6047   long result;
6048
6049   linecharno = charno;          /* update global char number of line start */
6050   result = readline_internal (lbp, stream); /* read line */
6051   lineno += 1;                  /* increment global line number */
6052   charno += result;             /* increment global char number */
6053
6054   /* Honor #line directives. */
6055   if (!no_line_directive)
6056     {
6057       static bool discard_until_line_directive;
6058
6059       /* Check whether this is a #line directive. */
6060       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6061         {
6062           unsigned int lno;
6063           int start = 0;
6064
6065           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6066               && start > 0)     /* double quote character found */
6067             {
6068               char *endp = lbp->buffer + start;
6069
6070               while ((endp = etags_strchr (endp, '"')) != NULL
6071                      && endp[-1] == '\\')
6072                 endp++;
6073               if (endp != NULL)
6074                 /* Ok, this is a real #line directive.  Let's deal with it. */
6075                 {
6076                   char *taggedabsname;  /* absolute name of original file */
6077                   char *taggedfname;    /* name of original file as given */
6078                   char *name;           /* temp var */
6079
6080                   discard_until_line_directive = FALSE; /* found it */
6081                   name = lbp->buffer + start;
6082                   *endp = '\0';
6083                   canonicalize_filename (name);
6084                   taggedabsname = absolute_filename (name, tagfiledir);
6085                   if (filename_is_absolute (name)
6086                       || filename_is_absolute (curfdp->infname))
6087                     taggedfname = savestr (taggedabsname);
6088                   else
6089                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6090
6091                   if (streq (curfdp->taggedfname, taggedfname))
6092                     /* The #line directive is only a line number change.  We
6093                        deal with this afterwards. */
6094                     free (taggedfname);
6095                   else
6096                     /* The tags following this #line directive should be
6097                        attributed to taggedfname.  In order to do this, set
6098                        curfdp accordingly. */
6099                     {
6100                       fdesc *fdp; /* file description pointer */
6101
6102                       /* Go look for a file description already set up for the
6103                          file indicated in the #line directive.  If there is
6104                          one, use it from now until the next #line
6105                          directive. */
6106                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6107                         if (streq (fdp->infname, curfdp->infname)
6108                             && streq (fdp->taggedfname, taggedfname))
6109                           /* If we remove the second test above (after the &&)
6110                              then all entries pertaining to the same file are
6111                              coalesced in the tags file.  If we use it, then
6112                              entries pertaining to the same file but generated
6113                              from different files (via #line directives) will
6114                              go into separate sections in the tags file.  These
6115                              alternatives look equivalent.  The first one
6116                              destroys some apparently useless information. */
6117                           {
6118                             curfdp = fdp;
6119                             free (taggedfname);
6120                             break;
6121                           }
6122                       /* Else, if we already tagged the real file, skip all
6123                          input lines until the next #line directive. */
6124                       if (fdp == NULL) /* not found */
6125                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6126                           if (streq (fdp->infabsname, taggedabsname))
6127                             {
6128                               discard_until_line_directive = TRUE;
6129                               free (taggedfname);
6130                               break;
6131                             }
6132                       /* Else create a new file description and use that from
6133                          now on, until the next #line directive. */
6134                       if (fdp == NULL) /* not found */
6135                         {
6136                           fdp = fdhead;
6137                           fdhead = xnew (1, fdesc);
6138                           *fdhead = *curfdp; /* copy curr. file description */
6139                           fdhead->next = fdp;
6140                           fdhead->infname = savestr (curfdp->infname);
6141                           fdhead->infabsname = savestr (curfdp->infabsname);
6142                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6143                           fdhead->taggedfname = taggedfname;
6144                           fdhead->usecharno = FALSE;
6145                           fdhead->prop = NULL;
6146                           fdhead->written = FALSE;
6147                           curfdp = fdhead;
6148                         }
6149                     }
6150                   free (taggedabsname);
6151                   lineno = lno - 1;
6152                   readline (lbp, stream);
6153                   return;
6154                 } /* if a real #line directive */
6155             } /* if #line is followed by a number */
6156         } /* if line begins with "#line " */
6157
6158       /* If we are here, no #line directive was found. */
6159       if (discard_until_line_directive)
6160         {
6161           if (result > 0)
6162             {
6163               /* Do a tail recursion on ourselves, thus discarding the contents
6164                  of the line buffer. */
6165               readline (lbp, stream);
6166               return;
6167             }
6168           /* End of file. */
6169           discard_until_line_directive = FALSE;
6170           return;
6171         }
6172     } /* if #line directives should be considered */
6173
6174   {
6175     int match;
6176     regexp *rp;
6177     char *name;
6178
6179     /* Match against relevant regexps. */
6180     if (lbp->len > 0)
6181       for (rp = p_head; rp != NULL; rp = rp->p_next)
6182         {
6183           /* Only use generic regexps or those for the current language.
6184              Also do not use multiline regexps, which is the job of
6185              regex_tag_multiline. */
6186           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6187               || rp->multi_line)
6188             continue;
6189
6190           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6191           switch (match)
6192             {
6193             case -2:
6194               /* Some error. */
6195               if (!rp->error_signaled)
6196                 {
6197                   error ("regexp stack overflow while matching \"%s\"",
6198                          rp->pattern);
6199                   rp->error_signaled = TRUE;
6200                 }
6201               break;
6202             case -1:
6203               /* No match. */
6204               break;
6205             case 0:
6206               /* Empty string matched. */
6207               if (!rp->error_signaled)
6208                 {
6209                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6210                   rp->error_signaled = TRUE;
6211                 }
6212               break;
6213             default:
6214               /* Match occurred.  Construct a tag. */
6215               name = rp->name;
6216               if (name[0] == '\0')
6217                 name = NULL;
6218               else /* make a named tag */
6219                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6220               if (rp->force_explicit_name)
6221                 /* Force explicit tag name, if a name is there. */
6222                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6223               else
6224                 make_tag (name, strlen (name), TRUE,
6225                           lbp->buffer, match, lineno, linecharno);
6226               break;
6227             }
6228         }
6229   }
6230 }
6231
6232 \f
6233 /*
6234  * Return a pointer to a space of size strlen(cp)+1 allocated
6235  * with xnew where the string CP has been copied.
6236  */
6237 static char *
6238 savestr (const char *cp)
6239 {
6240   return savenstr (cp, strlen (cp));
6241 }
6242
6243 /*
6244  * Return a pointer to a space of size LEN+1 allocated with xnew where
6245  * the string CP has been copied for at most the first LEN characters.
6246  */
6247 static char *
6248 savenstr (const char *cp, int len)
6249 {
6250   register char *dp;
6251
6252   dp = xnew (len + 1, char);
6253   memcpy (dp, cp, len);
6254   dp[len] = '\0';
6255   return dp;
6256 }
6257
6258 /*
6259  * Return the ptr in sp at which the character c last
6260  * appears; NULL if not found
6261  *
6262  * Identical to POSIX strrchr, included for portability.
6263  */
6264 static char *
6265 etags_strrchr (register const char *sp, register int c)
6266 {
6267   register const char *r;
6268
6269   r = NULL;
6270   do
6271     {
6272       if (*sp == c)
6273         r = sp;
6274   } while (*sp++);
6275   return (char *)r;
6276 }
6277
6278 /*
6279  * Return the ptr in sp at which the character c first
6280  * appears; NULL if not found
6281  *
6282  * Identical to POSIX strchr, included for portability.
6283  */
6284 static char *
6285 etags_strchr (register const char *sp, register int c)
6286 {
6287   do
6288     {
6289       if (*sp == c)
6290         return (char *)sp;
6291     } while (*sp++);
6292   return NULL;
6293 }
6294
6295 /* Skip spaces (end of string is not space), return new pointer. */
6296 static char *
6297 skip_spaces (char *cp)
6298 {
6299   while (iswhite (*cp))
6300     cp++;
6301   return cp;
6302 }
6303
6304 /* Skip non spaces, except end of string, return new pointer. */
6305 static char *
6306 skip_non_spaces (char *cp)
6307 {
6308   while (*cp != '\0' && !iswhite (*cp))
6309     cp++;
6310   return cp;
6311 }
6312
6313 /* Skip any chars in the "name" class.*/
6314 static char *
6315 skip_name (char *cp)
6316 {
6317   /* '\0' is a notinname() so loop stops there too */
6318   while (! notinname (*cp))
6319     cp++;
6320   return cp;
6321 }
6322
6323 /* Print error message and exit.  */
6324 void
6325 fatal (const char *s1, const char *s2)
6326 {
6327   error (s1, s2);
6328   exit (EXIT_FAILURE);
6329 }
6330
6331 static void
6332 pfatal (const char *s1)
6333 {
6334   perror (s1);
6335   exit (EXIT_FAILURE);
6336 }
6337
6338 static void
6339 suggest_asking_for_help (void)
6340 {
6341   fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
6342            progname);
6343   exit (EXIT_FAILURE);
6344 }
6345
6346 /* Output a diagnostic with printf-style FORMAT and args.  */
6347 static void
6348 error (const char *format, ...)
6349 {
6350   va_list ap;
6351   va_start (ap, format);
6352   fprintf (stderr, "%s: ", progname);
6353   vfprintf (stderr, format, ap);
6354   fprintf (stderr, "\n");
6355   va_end (ap);
6356 }
6357
6358 /* Return a newly-allocated string whose contents
6359    concatenate those of s1, s2, s3.  */
6360 static char *
6361 concat (const char *s1, const char *s2, const char *s3)
6362 {
6363   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6364   char *result = xnew (len1 + len2 + len3 + 1, char);
6365
6366   strcpy (result, s1);
6367   strcpy (result + len1, s2);
6368   strcpy (result + len1 + len2, s3);
6369   result[len1 + len2 + len3] = '\0';
6370
6371   return result;
6372 }
6373
6374 \f
6375 /* Does the same work as the system V getcwd, but does not need to
6376    guess the buffer size in advance. */
6377 static char *
6378 etags_getcwd (void)
6379 {
6380   int bufsize = 200;
6381   char *path = xnew (bufsize, char);
6382
6383   while (getcwd (path, bufsize) == NULL)
6384     {
6385       if (errno != ERANGE)
6386         pfatal ("getcwd");
6387       bufsize *= 2;
6388       free (path);
6389       path = xnew (bufsize, char);
6390     }
6391
6392   canonicalize_filename (path);
6393   return path;
6394 }
6395
6396 /* Return a newly allocated string containing the file name of FILE
6397    relative to the absolute directory DIR (which should end with a slash). */
6398 static char *
6399 relative_filename (char *file, char *dir)
6400 {
6401   char *fp, *dp, *afn, *res;
6402   int i;
6403
6404   /* Find the common root of file and dir (with a trailing slash). */
6405   afn = absolute_filename (file, cwd);
6406   fp = afn;
6407   dp = dir;
6408   while (*fp++ == *dp++)
6409     continue;
6410   fp--, dp--;                   /* back to the first differing char */
6411 #ifdef DOS_NT
6412   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6413     return afn;
6414 #endif
6415   do                            /* look at the equal chars until '/' */
6416     fp--, dp--;
6417   while (*fp != '/');
6418
6419   /* Build a sequence of "../" strings for the resulting relative file name. */
6420   i = 0;
6421   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6422     i += 1;
6423   res = xnew (3*i + strlen (fp + 1) + 1, char);
6424   res[0] = '\0';
6425   while (i-- > 0)
6426     strcat (res, "../");
6427
6428   /* Add the file name relative to the common root of file and dir. */
6429   strcat (res, fp + 1);
6430   free (afn);
6431
6432   return res;
6433 }
6434
6435 /* Return a newly allocated string containing the absolute file name
6436    of FILE given DIR (which should end with a slash). */
6437 static char *
6438 absolute_filename (char *file, char *dir)
6439 {
6440   char *slashp, *cp, *res;
6441
6442   if (filename_is_absolute (file))
6443     res = savestr (file);
6444 #ifdef DOS_NT
6445   /* We don't support non-absolute file names with a drive
6446      letter, like `d:NAME' (it's too much hassle).  */
6447   else if (file[1] == ':')
6448     fatal ("%s: relative file names with drive letters not supported", file);
6449 #endif
6450   else
6451     res = concat (dir, file, "");
6452
6453   /* Delete the "/dirname/.." and "/." substrings. */
6454   slashp = etags_strchr (res, '/');
6455   while (slashp != NULL && slashp[0] != '\0')
6456     {
6457       if (slashp[1] == '.')
6458         {
6459           if (slashp[2] == '.'
6460               && (slashp[3] == '/' || slashp[3] == '\0'))
6461             {
6462               cp = slashp;
6463               do
6464                 cp--;
6465               while (cp >= res && !filename_is_absolute (cp));
6466               if (cp < res)
6467                 cp = slashp;    /* the absolute name begins with "/.." */
6468 #ifdef DOS_NT
6469               /* Under MSDOS and NT we get `d:/NAME' as absolute
6470                  file name, so the luser could say `d:/../NAME'.
6471                  We silently treat this as `d:/NAME'.  */
6472               else if (cp[0] != '/')
6473                 cp = slashp;
6474 #endif
6475               memmove (cp, slashp + 3, strlen (slashp + 2));
6476               slashp = cp;
6477               continue;
6478             }
6479           else if (slashp[2] == '/' || slashp[2] == '\0')
6480             {
6481               memmove (slashp, slashp + 2, strlen (slashp + 1));
6482               continue;
6483             }
6484         }
6485
6486       slashp = etags_strchr (slashp + 1, '/');
6487     }
6488
6489   if (res[0] == '\0')           /* just a safety net: should never happen */
6490     {
6491       free (res);
6492       return savestr ("/");
6493     }
6494   else
6495     return res;
6496 }
6497
6498 /* Return a newly allocated string containing the absolute
6499    file name of dir where FILE resides given DIR (which should
6500    end with a slash). */
6501 static char *
6502 absolute_dirname (char *file, char *dir)
6503 {
6504   char *slashp, *res;
6505   char save;
6506
6507   slashp = etags_strrchr (file, '/');
6508   if (slashp == NULL)
6509     return savestr (dir);
6510   save = slashp[1];
6511   slashp[1] = '\0';
6512   res = absolute_filename (file, dir);
6513   slashp[1] = save;
6514
6515   return res;
6516 }
6517
6518 /* Whether the argument string is an absolute file name.  The argument
6519    string must have been canonicalized with canonicalize_filename. */
6520 static bool
6521 filename_is_absolute (char *fn)
6522 {
6523   return (fn[0] == '/'
6524 #ifdef DOS_NT
6525           || (ISALPHA (fn[0]) && fn[1] == ':' && fn[2] == '/')
6526 #endif
6527           );
6528 }
6529
6530 /* Downcase DOS drive letter and collapse separators into single slashes.
6531    Works in place. */
6532 static void
6533 canonicalize_filename (register char *fn)
6534 {
6535   register char* cp;
6536   char sep = '/';
6537
6538 #ifdef DOS_NT
6539   /* Canonicalize drive letter case.  */
6540 # define ISUPPER(c)     isupper (CHAR (c))
6541   if (fn[0] != '\0' && fn[1] == ':' && ISUPPER (fn[0]))
6542     fn[0] = lowcase (fn[0]);
6543
6544   sep = '\\';
6545 #endif
6546
6547   /* Collapse multiple separators into a single slash. */
6548   for (cp = fn; *cp != '\0'; cp++, fn++)
6549     if (*cp == sep)
6550       {
6551         *fn = '/';
6552         while (cp[1] == sep)
6553           cp++;
6554       }
6555     else
6556       *fn = *cp;
6557   *fn = '\0';
6558 }
6559
6560 \f
6561 /* Initialize a linebuffer for use. */
6562 static void
6563 linebuffer_init (linebuffer *lbp)
6564 {
6565   lbp->size = (DEBUG) ? 3 : 200;
6566   lbp->buffer = xnew (lbp->size, char);
6567   lbp->buffer[0] = '\0';
6568   lbp->len = 0;
6569 }
6570
6571 /* Set the minimum size of a string contained in a linebuffer. */
6572 static void
6573 linebuffer_setlen (linebuffer *lbp, int toksize)
6574 {
6575   while (lbp->size <= toksize)
6576     {
6577       lbp->size *= 2;
6578       xrnew (lbp->buffer, lbp->size, char);
6579     }
6580   lbp->len = toksize;
6581 }
6582
6583 /* Like malloc but get fatal error if memory is exhausted. */
6584 static void *
6585 xmalloc (size_t size)
6586 {
6587   void *result = malloc (size);
6588   if (result == NULL)
6589     fatal ("virtual memory exhausted", (char *)NULL);
6590   return result;
6591 }
6592
6593 static void *
6594 xrealloc (char *ptr, size_t size)
6595 {
6596   void *result = realloc (ptr, size);
6597   if (result == NULL)
6598     fatal ("virtual memory exhausted", (char *)NULL);
6599   return result;
6600 }
6601
6602 /*
6603  * Local Variables:
6604  * indent-tabs-mode: t
6605  * tab-width: 8
6606  * fill-column: 79
6607  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6608  * c-file-style: "gnu"
6609  * End:
6610  */
6611
6612 /* etags.c ends here */