lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
   3                  1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4                  2005, 2006 Free Software Foundation, Inc. and Ken Arnold
   5
   6  This file is not considered part of GNU Emacs.
   7
   8  This program is free software; you can redistribute it and/or modify
   9  it under the terms of the GNU General Public License as published by
  10  the Free Software Foundation; either version 2 of the License, or
  11  (at your option) any later version.
  12
  13  This program is distributed in the hope that it will be useful,
  14  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  GNU General Public License for more details.
  17
  18  You should have received a copy of the GNU General Public License
  19  along with this program; if not, write to the Free Software Foundation,
  20  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  21
  22 /*
  23  * Authors:
  24  *      Ctags originally by Ken Arnold.
  25  *      Fortran added by Jim Kleckner.
  26  *      Ed Pelegri-Llopart added C typedefs.
  27  *      Gnu Emacs TAGS format and modifications by RMS?
  28  * 1989 Sam Kendall added C++.
  29  * 1992 Joseph B. Wells improved C and C++ parsing.
  30  * 1993 Francesco Potortì reorganised C and C++.
  31  * 1994 Line-by-line regexp tags by Tom Tromey.
  32  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  33  * 2002 #line directives by Francesco Potortì.
  34  *
  35  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  36  */
  37
  38 /*
  39  * If you want to add support for a new language, start by looking at the LUA
  40  * language, which is the simplest.  Alternatively, consider shipping a
  41  * configuration file containing regexp definitions for etags.
  42  */
  43
  44 char pot_etags_version[] = "@(#) pot revision number is 17.26";
  45
  46 #define TRUE    1
  47 #define FALSE   0
  48
  49 #ifdef DEBUG
  50 #  undef DEBUG
  51 #  define DEBUG TRUE
  52 #else
  53 #  define DEBUG  FALSE
  54 #  define NDEBUG                /* disable assert */
  55 #endif
  56
  57 #ifdef HAVE_CONFIG_H
  58 # include <config.h>
  59   /* On some systems, Emacs defines static as nothing for the sake
  60      of unexec.  We don't want that here since we don't use unexec. */
  61 # undef static
  62 # ifndef PTR                    /* for XEmacs */
  63 #   define PTR void *
  64 # endif
  65 # ifndef __P                    /* for XEmacs */
  66 #   define __P(args) args
  67 # endif
  68 #else  /* no config.h */
  69 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  70 #   define __P(args) args       /* use prototypes */
  71 #   define PTR void *           /* for generic pointers */
  72 # else /* not standard C */
  73 #   define __P(args) ()         /* no prototypes */
  74 #   define const                /* remove const for old compilers' sake */
  75 #   define PTR long *           /* don't use void* */
  76 # endif
  77 #endif /* !HAVE_CONFIG_H */
  78
  79 #ifndef _GNU_SOURCE
  80 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  81 #endif
  82
  83 /* WIN32_NATIVE is for XEmacs.
  84    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  85 #ifdef WIN32_NATIVE
  86 # undef MSDOS
  87 # undef  WINDOWSNT
  88 # define WINDOWSNT
  89 #endif /* WIN32_NATIVE */
  90
  91 #ifdef MSDOS
  92 # undef MSDOS
  93 # define MSDOS TRUE
  94 # include <fcntl.h>
  95 # include <sys/param.h>
  96 # include <io.h>
  97 # ifndef HAVE_CONFIG_H
  98 #   define DOS_NT
  99 #   include <sys/config.h>
 100 # endif
 101 #else
 102 # define MSDOS FALSE
 103 #endif /* MSDOS */
 104
 105 #ifdef WINDOWSNT
 106 # include <stdlib.h>
 107 # include <fcntl.h>
 108 # include <string.h>
 109 # include <direct.h>
 110 # include <io.h>
 111 # define MAXPATHLEN _MAX_PATH
 112 # undef HAVE_NTGUI
 113 # undef  DOS_NT
 114 # define DOS_NT
 115 # ifndef HAVE_GETCWD
 116 #   define HAVE_GETCWD
 117 # endif /* undef HAVE_GETCWD */
 118 #else /* not WINDOWSNT */
 119 # ifdef STDC_HEADERS
 120 #  include <stdlib.h>
 121 #  include <string.h>
 122 # else /* no standard C headers */
 123     extern char *getenv ();
 124 #  ifdef VMS
 125 #   define EXIT_SUCCESS 1
 126 #   define EXIT_FAILURE 0
 127 #  else /* no VMS */
 128 #   define EXIT_SUCCESS 0
 129 #   define EXIT_FAILURE 1
 130 #  endif
 131 # endif
 132 #endif /* !WINDOWSNT */
 133
 134 #ifdef HAVE_UNISTD_H
 135 # include <unistd.h>
 136 #else
 137 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 138     extern char *getcwd (char *buf, size_t size);
 139 # endif
 140 #endif /* HAVE_UNISTD_H */
 141
 142 #include <stdio.h>
 143 #include <ctype.h>
 144 #include <errno.h>
 145 #ifndef errno
 146   extern int errno;
 147 #endif
 148 #include <sys/types.h>
 149 #include <sys/stat.h>
 150
 151 #include <assert.h>
 152 #ifdef NDEBUG
 153 # undef  assert                 /* some systems have a buggy assert.h */
 154 # define assert(x) ((void) 0)
 155 #endif
 156
 157 #if !defined (S_ISREG) && defined (S_IFREG)
 158 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 159 #endif
 160
 161 #ifdef NO_LONG_OPTIONS          /* define this if you don't have GNU getopt */
 162 # define NO_LONG_OPTIONS TRUE
 163 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 164   extern char *optarg;
 165   extern int optind, opterr;
 166 #else
 167 # define NO_LONG_OPTIONS FALSE
 168 # include <getopt.h>
 169 #endif /* NO_LONG_OPTIONS */
 170
 171 #ifndef HAVE_CONFIG_H           /* this is a standalone compilation */
 172 # ifdef __CYGWIN__              /* compiling on Cygwin */
 173                              !!! NOTICE !!!
 174  the regex.h distributed with Cygwin is not compatible with etags, alas!
 175 If you want regular expression support, you should delete this notice and
 176               arrange to use the GNU regex.h and regex.c.
 177 # endif
 178 #endif
 179 #include <regex.h>
 180
 181 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 182  Leave it undefined to make the program "etags", which makes emacs-style
 183  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 184 #ifdef CTAGS
 185 # undef  CTAGS
 186 # define CTAGS TRUE
 187 #else
 188 # define CTAGS FALSE
 189 #endif
 190
 191 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 192 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 193 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 194 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 195
 196 #define CHARS 256               /* 2^sizeof(char) */
 197 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 198 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 199 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 200 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 201 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 202 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 203
 204 #define ISALNUM(c)      isalnum (CHAR(c))
 205 #define ISALPHA(c)      isalpha (CHAR(c))
 206 #define ISDIGIT(c)      isdigit (CHAR(c))
 207 #define ISLOWER(c)      islower (CHAR(c))
 208
 209 #define lowcase(c)      tolower (CHAR(c))
 210 #define upcase(c)       toupper (CHAR(c))
 211
 212
 213 /*
 214  *      xnew, xrnew -- allocate, reallocate storage
 215  *
 216  * SYNOPSIS:    Type *xnew (int n, Type);
 217  *              void xrnew (OldPointer, int n, Type);
 218  */
 219 #if DEBUG
 220 # include "chkmalloc.h"
 221 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 222                                                   (n) * sizeof (Type)))
 223 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 224                                         (char *) (op), (n) * sizeof (Type)))
 225 #else
 226 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 227 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 228                                         (char *) (op), (n) * sizeof (Type)))
 229 #endif
 230
 231 #define bool int
 232
 233 typedef void Lang_function __P((FILE *));
 234
 235 typedef struct
 236 {
 237   char *suffix;                 /* file name suffix for this compressor */
 238   char *command;                /* takes one arg and decompresses to stdout */
 239 } compressor;
 240
 241 typedef struct
 242 {
 243   char *name;                   /* language name */
 244   char *help;                   /* detailed help for the language */
 245   Lang_function *function;      /* parse function */
 246   char **suffixes;              /* name suffixes of this language's files */
 247   char **filenames;             /* names of this language's files */
 248   char **interpreters;          /* interpreters for this language */
 249   bool metasource;              /* source used to generate other sources */
 250 } language;
 251
 252 typedef struct fdesc
 253 {
 254   struct fdesc *next;           /* for the linked list */
 255   char *infname;                /* uncompressed input file name */
 256   char *infabsname;             /* absolute uncompressed input file name */
 257   char *infabsdir;              /* absolute dir of input file */
 258   char *taggedfname;            /* file name to write in tagfile */
 259   language *lang;               /* language of file */
 260   char *prop;                   /* file properties to write in tagfile */
 261   bool usecharno;               /* etags tags shall contain char number */
 262   bool written;                 /* entry written in the tags file */
 263 } fdesc;
 264
 265 typedef struct node_st
 266 {                               /* sorting structure */
 267   struct node_st *left, *right; /* left and right sons */
 268   fdesc *fdp;                   /* description of file to whom tag belongs */
 269   char *name;                   /* tag name */
 270   char *regex;                  /* search regexp */
 271   bool valid;                   /* write this tag on the tag file */
 272   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 273   bool been_warned;             /* warning already given for duplicated tag */
 274   int lno;                      /* line number tag is on */
 275   long cno;                     /* character number line starts on */
 276 } node;
 277
 278 /*
 279  * A `linebuffer' is a structure which holds a line of text.
 280  * `readline_internal' reads a line from a stream into a linebuffer
 281  * and works regardless of the length of the line.
 282  * SIZE is the size of BUFFER, LEN is the length of the string in
 283  * BUFFER after readline reads it.
 284  */
 285 typedef struct
 286 {
 287   long size;
 288   int len;
 289   char *buffer;
 290 } linebuffer;
 291
 292 /* Used to support mixing of --lang and file names. */
 293 typedef struct
 294 {
 295   enum {
 296     at_language,                /* a language specification */
 297     at_regexp,                  /* a regular expression */
 298     at_filename,                /* a file name */
 299     at_stdin,                   /* read from stdin here */
 300     at_end                      /* stop parsing the list */
 301   } arg_type;                   /* argument type */
 302   language *lang;               /* language associated with the argument */
 303   char *what;                   /* the argument itself */
 304 } argument;
 305
 306 /* Structure defining a regular expression. */
 307 typedef struct regexp
 308 {
 309   struct regexp *p_next;        /* pointer to next in list */
 310   language *lang;               /* if set, use only for this language */
 311   char *pattern;                /* the regexp pattern */
 312   char *name;                   /* tag name */
 313   struct re_pattern_buffer *pat; /* the compiled pattern */
 314   struct re_registers regs;     /* re registers */
 315   bool error_signaled;          /* already signaled for this regexp */
 316   bool force_explicit_name;     /* do not allow implict tag name */
 317   bool ignore_case;             /* ignore case when matching */
 318   bool multi_line;              /* do a multi-line match on the whole file */
 319 } regexp;
 320
 321
 322 /* Many compilers barf on this:
 323         Lang_function Ada_funcs;
 324    so let's write it this way */
 325 static void Ada_funcs __P((FILE *));
 326 static void Asm_labels __P((FILE *));
 327 static void C_entries __P((int c_ext, FILE *));
 328 static void default_C_entries __P((FILE *));
 329 static void plain_C_entries __P((FILE *));
 330 static void Cjava_entries __P((FILE *));
 331 static void Cobol_paragraphs __P((FILE *));
 332 static void Cplusplus_entries __P((FILE *));
 333 static void Cstar_entries __P((FILE *));
 334 static void Erlang_functions __P((FILE *));
 335 static void Forth_words __P((FILE *));
 336 static void Fortran_functions __P((FILE *));
 337 static void HTML_labels __P((FILE *));
 338 static void Lisp_functions __P((FILE *));
 339 static void Lua_functions __P((FILE *));
 340 static void Makefile_targets __P((FILE *));
 341 static void Pascal_functions __P((FILE *));
 342 static void Perl_functions __P((FILE *));
 343 static void PHP_functions __P((FILE *));
 344 static void PS_functions __P((FILE *));
 345 static void Prolog_functions __P((FILE *));
 346 static void Python_functions __P((FILE *));
 347 static void Scheme_functions __P((FILE *));
 348 static void TeX_commands __P((FILE *));
 349 static void Texinfo_nodes __P((FILE *));
 350 static void Yacc_entries __P((FILE *));
 351 static void just_read_file __P((FILE *));
 352
 353 static void print_language_names __P((void));
 354 static void print_version __P((void));
 355 static void print_help __P((argument *));
 356 int main __P((int, char **));
 357
 358 static compressor *get_compressor_from_suffix __P((char *, char **));
 359 static language *get_language_from_langname __P((const char *));
 360 static language *get_language_from_interpreter __P((char *));
 361 static language *get_language_from_filename __P((char *, bool));
 362 static void readline __P((linebuffer *, FILE *));
 363 static long readline_internal __P((linebuffer *, FILE *));
 364 static bool nocase_tail __P((char *));
 365 static void get_tag __P((char *, char **));
 366
 367 static void analyse_regex __P((char *));
 368 static void free_regexps __P((void));
 369 static void regex_tag_multiline __P((void));
 370 static void error __P((const char *, const char *));
 371 static void suggest_asking_for_help __P((void));
 372 void fatal __P((char *, char *));
 373 static void pfatal __P((char *));
 374 static void add_node __P((node *, node **));
 375
 376 static void init __P((void));
 377 static void process_file_name __P((char *, language *));
 378 static void process_file __P((FILE *, char *, language *));
 379 static void find_entries __P((FILE *));
 380 static void free_tree __P((node *));
 381 static void free_fdesc __P((fdesc *));
 382 static void pfnote __P((char *, bool, char *, int, int, long));
 383 static void make_tag __P((char *, int, bool, char *, int, int, long));
 384 static void invalidate_nodes __P((fdesc *, node **));
 385 static void put_entries __P((node *));
 386
 387 static char *concat __P((char *, char *, char *));
 388 static char *skip_spaces __P((char *));
 389 static char *skip_non_spaces __P((char *));
 390 static char *savenstr __P((char *, int));
 391 static char *savestr __P((char *));
 392 static char *etags_strchr __P((const char *, int));
 393 static char *etags_strrchr __P((const char *, int));
 394 static int etags_strcasecmp __P((const char *, const char *));
 395 static int etags_strncasecmp __P((const char *, const char *, int));
 396 static char *etags_getcwd __P((void));
 397 static char *relative_filename __P((char *, char *));
 398 static char *absolute_filename __P((char *, char *));
 399 static char *absolute_dirname __P((char *, char *));
 400 static bool filename_is_absolute __P((char *f));
 401 static void canonicalize_filename __P((char *));
 402 static void linebuffer_init __P((linebuffer *));
 403 static void linebuffer_setlen __P((linebuffer *, int));
 404 static PTR xmalloc __P((unsigned int));
 405 static PTR xrealloc __P((char *, unsigned int));
 406
 407 \f
 408 static char searchar = '/';     /* use /.../ searches */
 409
 410 static char *tagfile;           /* output file */
 411 static char *progname;          /* name this program was invoked with */
 412 static char *cwd;               /* current working directory */
 413 static char *tagfiledir;        /* directory of tagfile */
 414 static FILE *tagf;              /* ioptr for tags file */
 415
 416 static fdesc *fdhead;           /* head of file description list */
 417 static fdesc *curfdp;           /* current file description */
 418 static int lineno;              /* line number of current line */
 419 static long charno;             /* current character number */
 420 static long linecharno;         /* charno of start of current line */
 421 static char *dbp;               /* pointer to start of current tag */
 422
 423 static const int invalidcharno = -1;
 424
 425 static node *nodehead;          /* the head of the binary tree of tags */
 426 static node *last_node;         /* the last node created */
 427
 428 static linebuffer lb;           /* the current line */
 429 static linebuffer filebuf;      /* a buffer containing the whole file */
 430 static linebuffer token_name;   /* a buffer containing a tag name */
 431
 432 /* boolean "functions" (see init)       */
 433 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 434 static char
 435   /* white chars */
 436   *white = " \f\t\n\r\v",
 437   /* not in a name */
 438   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 439   /* token ending chars */
 440   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 441   /* token starting chars */
 442   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 443   /* valid in-token chars */
 444   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 445
 446 static bool append_to_tagfile;  /* -a: append to tags */
 447 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 448 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 449 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 450                                 /* 0 struct/enum/union decls, and C++ */
 451                                 /* member functions. */
 452 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 453                                 /* constants and variables. */
 454                                 /* -D: opposite of -d.  Default under ctags. */
 455 static bool globals;            /* create tags for global variables */
 456 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 457 static bool members;            /* create tags for C member variables */
 458 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 459 static bool no_duplicates;      /* no duplicate tags for ctags (undocumented) */
 460 static bool update;             /* -u: update tags */
 461 static bool vgrind_style;       /* -v: create vgrind style index output */
 462 static bool no_warnings;        /* -w: suppress warnings (undocumented) */
 463 static bool cxref_style;        /* -x: create cxref style output */
 464 static bool cplusplus;          /* .[hc] means C++, not C */
 465 static bool ignoreindent;       /* -I: ignore indentation in C */
 466 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 467
 468 /* STDIN is defined in LynxOS system headers */
 469 #ifdef STDIN
 470 # undef STDIN
 471 #endif
 472
 473 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 474 static bool parsing_stdin;      /* --parse-stdin used */
 475
 476 static regexp *p_head;          /* list of all regexps */
 477 static bool need_filebuf;       /* some regexes are multi-line */
 478
 479 static struct option longopts[] =
 480 {
 481   { "append",             no_argument,       NULL,               'a'   },
 482   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 483   { "c++",                no_argument,       NULL,               'C'   },
 484   { "declarations",       no_argument,       &declarations,      TRUE  },
 485   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 486   { "no-duplicates",      no_argument,       &no_duplicates,     TRUE  },
 487   { "help",               no_argument,       NULL,               'h'   },
 488   { "help",               no_argument,       NULL,               'H'   },
 489   { "ignore-indentation", no_argument,       NULL,               'I'   },
 490   { "language",           required_argument, NULL,               'l'   },
 491   { "members",            no_argument,       &members,           TRUE  },
 492   { "no-members",         no_argument,       &members,           FALSE },
 493   { "output",             required_argument, NULL,               'o'   },
 494   { "regex",              required_argument, NULL,               'r'   },
 495   { "no-regex",           no_argument,       NULL,               'R'   },
 496   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 497   { "parse-stdin",        required_argument, NULL,               STDIN },
 498   { "version",            no_argument,       NULL,               'V'   },
 499
 500 #if CTAGS /* Ctags options */
 501   { "backward-search",    no_argument,       NULL,               'B'   },
 502   { "cxref",              no_argument,       NULL,               'x'   },
 503   { "defines",            no_argument,       NULL,               'd'   },
 504   { "globals",            no_argument,       &globals,           TRUE  },
 505   { "typedefs",           no_argument,       NULL,               't'   },
 506   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 507   { "update",             no_argument,       NULL,               'u'   },
 508   { "vgrind",             no_argument,       NULL,               'v'   },
 509   { "no-warn",            no_argument,       NULL,               'w'   },
 510
 511 #else /* Etags options */
 512   { "no-defines",         no_argument,       NULL,               'D'   },
 513   { "no-globals",         no_argument,       &globals,           FALSE },
 514   { "include",            required_argument, NULL,               'i'   },
 515 #endif
 516   { NULL }
 517 };
 518
 519 static compressor compressors[] =
 520 {
 521   { "z", "gzip -d -c"},
 522   { "Z", "gzip -d -c"},
 523   { "gz", "gzip -d -c"},
 524   { "GZ", "gzip -d -c"},
 525   { "bz2", "bzip2 -d -c" },
 526   { NULL }
 527 };
 528
 529 /*
 530  * Language stuff.
 531  */
 532
 533 /* Ada code */
 534 static char *Ada_suffixes [] =
 535   { "ads", "adb", "ada", NULL };
 536 static char Ada_help [] =
 537 "In Ada code, functions, procedures, packages, tasks and types are\n\
 538 tags.  Use the `--packages-only' option to create tags for\n\
 539 packages only.\n\
 540 Ada tag names have suffixes indicating the type of entity:\n\
 541         Entity type:    Qualifier:\n\
 542         ------------    ----------\n\
 543         function        /f\n\
 544         procedure       /p\n\
 545         package spec    /s\n\
 546         package body    /b\n\
 547         type            /t\n\
 548         task            /k\n\
 549 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 550 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 551 will just search for any tag `bidule'.";
 552
 553 /* Assembly code */
 554 static char *Asm_suffixes [] =
 555   { "a",        /* Unix assembler */
 556     "asm", /* Microcontroller assembly */
 557     "def", /* BSO/Tasking definition includes  */
 558     "inc", /* Microcontroller include files */
 559     "ins", /* Microcontroller include files */
 560     "s", "sa", /* Unix assembler */
 561     "S",   /* cpp-processed Unix assembler */
 562     "src", /* BSO/Tasking C compiler output */
 563     NULL
 564   };
 565 static char Asm_help [] =
 566 "In assembler code, labels appearing at the beginning of a line,\n\
 567 followed by a colon, are tags.";
 568
 569
 570 /* Note that .c and .h can be considered C++, if the --c++ flag was
 571    given, or if the `class' or `template' keyowrds are met inside the file.
 572    That is why default_C_entries is called for these. */
 573 static char *default_C_suffixes [] =
 574   { "c", "h", NULL };
 575 static char default_C_help [] =
 576 "In C code, any C function or typedef is a tag, and so are\n\
 577 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 578 definitions and `enum' constants are tags unless you specify\n\
 579 `--no-defines'.  Global variables are tags unless you specify\n\
 580 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 581 can make the tags table file much smaller.\n\
 582 You can tag function declarations and external variables by\n\
 583 using `--declarations', and struct members by using `--members'.";
 584
 585 static char *Cplusplus_suffixes [] =
 586   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 587     "M",                        /* Objective C++ */
 588     "pdb",                      /* Postscript with C syntax */
 589     NULL };
 590 static char Cplusplus_help [] =
 591 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 592 --help --lang=c --lang=c++ for full help.)\n\
 593 In addition to C tags, member functions are also recognized, and\n\
 594 optionally member variables if you use the `--members' option.\n\
 595 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 596 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 597 `operator+'.";
 598
 599 static char *Cjava_suffixes [] =
 600   { "java", NULL };
 601 static char Cjava_help [] =
 602 "In Java code, all the tags constructs of C and C++ code are\n\
 603 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 604
 605
 606 static char *Cobol_suffixes [] =
 607   { "COB", "cob", NULL };
 608 static char Cobol_help [] =
 609 "In Cobol code, tags are paragraph names; that is, any word\n\
 610 starting in column 8 and followed by a period.";
 611
 612 static char *Cstar_suffixes [] =
 613   { "cs", "hs", NULL };
 614
 615 static char *Erlang_suffixes [] =
 616   { "erl", "hrl", NULL };
 617 static char Erlang_help [] =
 618 "In Erlang code, the tags are the functions, records and macros\n\
 619 defined in the file.";
 620
 621 char *Forth_suffixes [] =
 622   { "fth", "tok", NULL };
 623 static char Forth_help [] =
 624 "In Forth code, tags are words defined by `:',\n\
 625 constant, code, create, defer, value, variable, buffer:, field.";
 626
 627 static char *Fortran_suffixes [] =
 628   { "F", "f", "f90", "for", NULL };
 629 static char Fortran_help [] =
 630 "In Fortran code, functions, subroutines and block data are tags.";
 631
 632 static char *HTML_suffixes [] =
 633   { "htm", "html", "shtml", NULL };
 634 static char HTML_help [] =
 635 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 636 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 637 occurrences of `id='.";
 638
 639 static char *Lisp_suffixes [] =
 640   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 641 static char Lisp_help [] =
 642 "In Lisp code, any function defined with `defun', any variable\n\
 643 defined with `defvar' or `defconst', and in general the first\n\
 644 argument of any expression that starts with `(def' in column zero\n\
 645 is a tag.";
 646
 647 static char *Lua_suffixes [] =
 648   { "lua", "LUA", NULL };
 649 static char Lua_help [] =
 650 "In Lua scripts, all functions are tags.";
 651
 652 static char *Makefile_filenames [] =
 653   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 654 static char Makefile_help [] =
 655 "In makefiles, targets are tags; additionally, variables are tags\n\
 656 unless you specify `--no-globals'.";
 657
 658 static char *Objc_suffixes [] =
 659   { "lm",                       /* Objective lex file */
 660     "m",                        /* Objective C file */
 661      NULL };
 662 static char Objc_help [] =
 663 "In Objective C code, tags include Objective C definitions for classes,\n\
 664 class categories, methods and protocols.  Tags for variables and\n\
 665 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.\n\
 666 (Use --help --lang=c --lang=objc --lang=java for full help.)";
 667
 668 static char *Pascal_suffixes [] =
 669   { "p", "pas", NULL };
 670 static char Pascal_help [] =
 671 "In Pascal code, the tags are the functions and procedures defined\n\
 672 in the file.";
 673 /* " // this is for working around an Emacs highlighting bug... */
 674
 675 static char *Perl_suffixes [] =
 676   { "pl", "pm", NULL };
 677 static char *Perl_interpreters [] =
 678   { "perl", "@PERL@", NULL };
 679 static char Perl_help [] =
 680 "In Perl code, the tags are the packages, subroutines and variables\n\
 681 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 682 `--globals' if you want to tag global variables.  Tags for\n\
 683 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 684 defined in the default package is `main::SUB'.";
 685
 686 static char *PHP_suffixes [] =
 687   { "php", "php3", "php4", NULL };
 688 static char PHP_help [] =
 689 "In PHP code, tags are functions, classes and defines.  When using\n\
 690 the `--members' option, vars are tags too.";
 691
 692 static char *plain_C_suffixes [] =
 693   { "pc",                       /* Pro*C file */
 694      NULL };
 695
 696 static char *PS_suffixes [] =
 697   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 698 static char PS_help [] =
 699 "In PostScript code, the tags are the functions.";
 700
 701 static char *Prolog_suffixes [] =
 702   { "prolog", NULL };
 703 static char Prolog_help [] =
 704 "In Prolog code, tags are predicates and rules at the beginning of\n\
 705 line.";
 706
 707 static char *Python_suffixes [] =
 708   { "py", NULL };
 709 static char Python_help [] =
 710 "In Python code, `def' or `class' at the beginning of a line\n\
 711 generate a tag.";
 712
 713 /* Can't do the `SCM' or `scm' prefix with a version number. */
 714 static char *Scheme_suffixes [] =
 715   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 716 static char Scheme_help [] =
 717 "In Scheme code, tags include anything defined with `def' or with a\n\
 718 construct whose name starts with `def'.  They also include\n\
 719 variables set with `set!' at top level in the file.";
 720
 721 static char *TeX_suffixes [] =
 722   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 723 static char TeX_help [] =
 724 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 725 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 726 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 727 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 728 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 729 \n\
 730 Other commands can be specified by setting the environment variable\n\
 731 `TEXTAGS' to a colon-separated list like, for example,\n\
 732      TEXTAGS=\"mycommand:myothercommand\".";
 733
 734
 735 static char *Texinfo_suffixes [] =
 736   { "texi", "texinfo", "txi", NULL };
 737 static char Texinfo_help [] =
 738 "for texinfo files, lines starting with @node are tagged.";
 739
 740 static char *Yacc_suffixes [] =
 741   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 742 static char Yacc_help [] =
 743 "In Bison or Yacc input files, each rule defines as a tag the\n\
 744 nonterminal it constructs.  The portions of the file that contain\n\
 745 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 746 for full help).";
 747
 748 static char auto_help [] =
 749 "`auto' is not a real language, it indicates to use\n\
 750 a default language for files base on file name suffix and file contents.";
 751
 752 static char none_help [] =
 753 "`none' is not a real language, it indicates to only do\n\
 754 regexp processing on files.";
 755
 756 static char no_lang_help [] =
 757 "No detailed help available for this language.";
 758
 759
 760 /*
 761  * Table of languages.
 762  *
 763  * It is ok for a given function to be listed under more than one
 764  * name.  I just didn't.
 765  */
 766
 767 static language lang_names [] =
 768 {
 769   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 770   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 771   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 772   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 773   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 774   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 775   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 776   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 777   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 778   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 779   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 780   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 781   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 782   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 783   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 784   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 785   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 786   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 787   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 788   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 789   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 790   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 791   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 792   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 793   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 794   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 795   { "auto",      auto_help },                      /* default guessing scheme */
 796   { "none",      none_help,      just_read_file }, /* regexp matching only */
 797   { NULL }                /* end of list */
 798 };
 799
 800 \f
 801 static void
 802 print_language_names ()
 803 {
 804   language *lang;
 805   char **name, **ext;
 806
 807   puts ("\nThese are the currently supported languages, along with the\n\
 808 default file names and dot suffixes:");
 809   for (lang = lang_names; lang->name != NULL; lang++)
 810     {
 811       printf ("  %-*s", 10, lang->name);
 812       if (lang->filenames != NULL)
 813         for (name = lang->filenames; *name != NULL; name++)
 814           printf (" %s", *name);
 815       if (lang->suffixes != NULL)
 816         for (ext = lang->suffixes; *ext != NULL; ext++)
 817           printf (" .%s", *ext);
 818       puts ("");
 819     }
 820   puts ("where `auto' means use default language for files based on file\n\
 821 name suffix, and `none' means only do regexp processing on files.\n\
 822 If no language is specified and no matching suffix is found,\n\
 823 the first line of the file is read for a sharp-bang (#!) sequence\n\
 824 followed by the name of an interpreter.  If no such sequence is found,\n\
 825 Fortran is tried first; if no tags are found, C is tried next.\n\
 826 When parsing any C file, a \"class\" or \"template\" keyword\n\
 827 switches to C++.");
 828   puts ("Compressed files are supported using gzip and bzip2.\n\
 829 \n\
 830 For detailed help on a given language use, for example,\n\
 831 etags --help --lang=ada.");
 832 }
 833
 834 #ifndef EMACS_NAME
 835 # define EMACS_NAME "standalone"
 836 #endif
 837 #ifndef VERSION
 838 # define VERSION "version"
 839 #endif
 840 static void
 841 print_version ()
 842 {
 843   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 844   puts ("Copyright (C) 2006 Free Software Foundation, Inc. and Ken Arnold");
 845   puts ("This program is distributed under the same terms as Emacs");
 846
 847   exit (EXIT_SUCCESS);
 848 }
 849
 850 static void
 851 print_help (argbuffer)
 852      argument *argbuffer;
 853 {
 854   bool help_for_lang = FALSE;
 855
 856   for (; argbuffer->arg_type != at_end; argbuffer++)
 857     if (argbuffer->arg_type == at_language)
 858       {
 859         if (help_for_lang)
 860           puts ("");
 861         puts (argbuffer->lang->help);
 862         help_for_lang = TRUE;
 863       }
 864
 865   if (help_for_lang)
 866     exit (EXIT_SUCCESS);
 867
 868   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 869 \n\
 870 These are the options accepted by %s.\n", progname, progname);
 871   if (NO_LONG_OPTIONS)
 872     puts ("WARNING: long option names do not work with this executable,\n\
 873 as it is not linked with GNU getopt.");
 874   else
 875     puts ("You may use unambiguous abbreviations for the long option names.");
 876   puts ("  A - as file name means read names from stdin (one per line).\n\
 877 Absolute names are stored in the output file as they are.\n\
 878 Relative ones are stored relative to the output file's directory.\n");
 879
 880   puts ("-a, --append\n\
 881         Append tag entries to existing tags file.");
 882
 883   puts ("--packages-only\n\
 884         For Ada files, only generate tags for packages.");
 885
 886   if (CTAGS)
 887     puts ("-B, --backward-search\n\
 888         Write the search commands for the tag entries using '?', the\n\
 889         backward-search command instead of '/', the forward-search command.");
 890
 891   /* This option is mostly obsolete, because etags can now automatically
 892      detect C++.  Retained for backward compatibility and for debugging and
 893      experimentation.  In principle, we could want to tag as C++ even
 894      before any "class" or "template" keyword.
 895   puts ("-C, --c++\n\
 896         Treat files whose name suffix defaults to C language as C++ files.");
 897   */
 898
 899   puts ("--declarations\n\
 900         In C and derived languages, create tags for function declarations,");
 901   if (CTAGS)
 902     puts ("\tand create tags for extern variables if --globals is used.");
 903   else
 904     puts
 905       ("\tand create tags for extern variables unless --no-globals is used.");
 906
 907   if (CTAGS)
 908     puts ("-d, --defines\n\
 909         Create tag entries for C #define constants and enum constants, too.");
 910   else
 911     puts ("-D, --no-defines\n\
 912         Don't create tag entries for C #define constants and enum constants.\n\
 913         This makes the tags file smaller.");
 914
 915   if (!CTAGS)
 916     puts ("-i FILE, --include=FILE\n\
 917         Include a note in tag file indicating that, when searching for\n\
 918         a tag, one should also consult the tags file FILE after\n\
 919         checking the current file.");
 920
 921   puts ("-l LANG, --language=LANG\n\
 922         Force the following files to be considered as written in the\n\
 923         named language up to the next --language=LANG option.");
 924
 925   if (CTAGS)
 926     puts ("--globals\n\
 927         Create tag entries for global variables in some languages.");
 928   else
 929     puts ("--no-globals\n\
 930         Do not create tag entries for global variables in some\n\
 931         languages.  This makes the tags file smaller.");
 932   puts ("--members\n\
 933         Create tag entries for members of structures in some languages.");
 934
 935   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 936         Make a tag for each line matching a regular expression pattern\n\
 937         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 938         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 939         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 940         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 941   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 942         For example Tcl named tags can be created with:\n\
 943           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 944         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 945         `m' means to allow multi-line matches, `s' implies `m' and\n\
 946         causes dot to match any character, including newline.");
 947   puts ("-R, --no-regex\n\
 948         Don't create tags from regexps for the following files.");
 949   puts ("-I, --ignore-indentation\n\
 950         In C and C++ do not assume that a closing brace in the first\n\
 951         column is the final brace of a function or structure definition.");
 952   puts ("-o FILE, --output=FILE\n\
 953         Write the tags to FILE.");
 954   puts ("--parse-stdin=NAME\n\
 955         Read from standard input and record tags as belonging to file NAME.");
 956
 957   if (CTAGS)
 958     {
 959       puts ("-t, --typedefs\n\
 960         Generate tag entries for C and Ada typedefs.");
 961       puts ("-T, --typedefs-and-c++\n\
 962         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 963         and C++ member functions.");
 964     }
 965
 966   if (CTAGS)
 967     puts ("-u, --update\n\
 968         Update the tag entries for the given files, leaving tag\n\
 969         entries for other files in place.  Currently, this is\n\
 970         implemented by deleting the existing entries for the given\n\
 971         files and then rewriting the new entries at the end of the\n\
 972         tags file.  It is often faster to simply rebuild the entire\n\
 973         tag file than to use this.");
 974
 975   if (CTAGS)
 976     {
 977       puts ("-v, --vgrind\n\
 978         Print on the standard output an index of items intended for\n\
 979         human consumption, similar to the output of vgrind.  The index\n\
 980         is sorted, and gives the page number of each item.");
 981 # if PRINT_UNDOCUMENTED_OPTIONS_HELP
 982       puts ("-w, --no-duplicates\n\
 983         Do not create duplicate tag entries, for compatibility with\n\
 984         traditional ctags.");
 985       puts ("-w, --no-warn\n\
 986         Suppress warning messages about duplicate tag entries.");
 987 # endif /* PRINT_UNDOCUMENTED_OPTIONS_HELP */
 988       puts ("-x, --cxref\n\
 989         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 990         The output uses line numbers instead of page numbers, but\n\
 991         beyond that the differences are cosmetic; try both to see\n\
 992         which you like.");
 993     }
 994
 995   puts ("-V, --version\n\
 996         Print the version of the program.\n\
 997 -h, --help\n\
 998         Print this help message.\n\
 999         Followed by one or more `--language' options prints detailed\n\
1000         help about tag generation for the specified languages.");
1001
1002   print_language_names ();
1003
1004   puts ("");
1005   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1006
1007   exit (EXIT_SUCCESS);
1008 }
1009
1010 \f
1011 #ifdef VMS                      /* VMS specific functions */
1012
1013 #define EOS     '\0'
1014
1015 /* This is a BUG!  ANY arbitrary limit is a BUG!
1016    Won't someone please fix this?  */
1017 #define MAX_FILE_SPEC_LEN       255
1018 typedef struct  {
1019   short   curlen;
1020   char    body[MAX_FILE_SPEC_LEN + 1];
1021 } vspec;
1022
1023 /*
1024  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1025  returning in each successive call the next file name matching the input
1026  spec. The function expects that each in_spec passed
1027  to it will be processed to completion; in particular, up to and
1028  including the call following that in which the last matching name
1029  is returned, the function ignores the value of in_spec, and will
1030  only start processing a new spec with the following call.
1031  If an error occurs, on return out_spec contains the value
1032  of in_spec when the error occurred.
1033
1034  With each successive file name returned in out_spec, the
1035  function's return value is one. When there are no more matching
1036  names the function returns zero. If on the first call no file
1037  matches in_spec, or there is any other error, -1 is returned.
1038 */
1039
1040 #include        <rmsdef.h>
1041 #include        <descrip.h>
1042 #define         OUTSIZE MAX_FILE_SPEC_LEN
1043 static short
1044 fn_exp (out, in)
1045      vspec *out;
1046      char *in;
1047 {
1048   static long context = 0;
1049   static struct dsc$descriptor_s o;
1050   static struct dsc$descriptor_s i;
1051   static bool pass1 = TRUE;
1052   long status;
1053   short retval;
1054
1055   if (pass1)
1056     {
1057       pass1 = FALSE;
1058       o.dsc$a_pointer = (char *) out;
1059       o.dsc$w_length = (short)OUTSIZE;
1060       i.dsc$a_pointer = in;
1061       i.dsc$w_length = (short)strlen(in);
1062       i.dsc$b_dtype = DSC$K_DTYPE_T;
1063       i.dsc$b_class = DSC$K_CLASS_S;
1064       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1065       o.dsc$b_class = DSC$K_CLASS_VS;
1066     }
1067   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1068     {
1069       out->body[out->curlen] = EOS;
1070       return 1;
1071     }
1072   else if (status == RMS$_NMF)
1073     retval = 0;
1074   else
1075     {
1076       strcpy(out->body, in);
1077       retval = -1;
1078     }
1079   lib$find_file_end(&context);
1080   pass1 = TRUE;
1081   return retval;
1082 }
1083
1084 /*
1085   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1086   name of each file specified by the provided arg expanding wildcards.
1087 */
1088 static char *
1089 gfnames (arg, p_error)
1090      char *arg;
1091      bool *p_error;
1092 {
1093   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1094
1095   switch (fn_exp (&filename, arg))
1096     {
1097     case 1:
1098       *p_error = FALSE;
1099       return filename.body;
1100     case 0:
1101       *p_error = FALSE;
1102       return NULL;
1103     default:
1104       *p_error = TRUE;
1105       return filename.body;
1106     }
1107 }
1108
1109 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1110 system (cmd)
1111      char *cmd;
1112 {
1113   error ("%s", "system() function not implemented under VMS");
1114 }
1115 #endif
1116
1117 #define VERSION_DELIM   ';'
1118 char *massage_name (s)
1119      char *s;
1120 {
1121   char *start = s;
1122
1123   for ( ; *s; s++)
1124     if (*s == VERSION_DELIM)
1125       {
1126         *s = EOS;
1127         break;
1128       }
1129     else
1130       *s = lowcase (*s);
1131   return start;
1132 }
1133 #endif /* VMS */
1134
1135 \f
1136 int
1137 main (argc, argv)
1138      int argc;
1139      char *argv[];
1140 {
1141   int i;
1142   unsigned int nincluded_files;
1143   char **included_files;
1144   argument *argbuffer;
1145   int current_arg, file_count;
1146   linebuffer filename_lb;
1147   bool help_asked = FALSE;
1148 #ifdef VMS
1149   bool got_err;
1150 #endif
1151  char *optstring;
1152  int opt;
1153
1154
1155 #ifdef DOS_NT
1156   _fmode = O_BINARY;   /* all of files are treated as binary files */
1157 #endif /* DOS_NT */
1158
1159   progname = argv[0];
1160   nincluded_files = 0;
1161   included_files = xnew (argc, char *);
1162   current_arg = 0;
1163   file_count = 0;
1164
1165   /* Allocate enough no matter what happens.  Overkill, but each one
1166      is small. */
1167   argbuffer = xnew (argc, argument);
1168
1169   /*
1170    * If etags, always find typedefs and structure tags.  Why not?
1171    * Also default to find macro constants, enum constants and
1172    * global variables.
1173    */
1174   if (!CTAGS)
1175     {
1176       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1177       globals = TRUE;
1178     }
1179
1180   /* When the optstring begins with a '-' getopt_long does not rearrange the
1181      non-options arguments to be at the end, but leaves them alone. */
1182   optstring = concat (NO_LONG_OPTIONS ? "" : "-",
1183                       "ac:Cf:Il:o:r:RSVhH",
1184                       (CTAGS) ? "BxdtTuvw" : "Di:");
1185
1186   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1187     switch (opt)
1188       {
1189       case 0:
1190         /* If getopt returns 0, then it has already processed a
1191            long-named option.  We should do nothing.  */
1192         break;
1193
1194       case 1:
1195         /* This means that a file name has been seen.  Record it. */
1196         argbuffer[current_arg].arg_type = at_filename;
1197         argbuffer[current_arg].what     = optarg;
1198         ++current_arg;
1199         ++file_count;
1200         break;
1201
1202       case STDIN:
1203         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1204         argbuffer[current_arg].arg_type = at_stdin;
1205         argbuffer[current_arg].what     = optarg;
1206         ++current_arg;
1207         ++file_count;
1208         if (parsing_stdin)
1209           fatal ("cannot parse standard input more than once", (char *)NULL);
1210         parsing_stdin = TRUE;
1211         break;
1212
1213         /* Common options. */
1214       case 'a': append_to_tagfile = TRUE;       break;
1215       case 'C': cplusplus = TRUE;               break;
1216       case 'f':         /* for compatibility with old makefiles */
1217       case 'o':
1218         if (tagfile)
1219           {
1220             error ("-o option may only be given once.", (char *)NULL);
1221             suggest_asking_for_help ();
1222             /* NOTREACHED */
1223           }
1224         tagfile = optarg;
1225         break;
1226       case 'I':
1227       case 'S':         /* for backward compatibility */
1228         ignoreindent = TRUE;
1229         break;
1230       case 'l':
1231         {
1232           language *lang = get_language_from_langname (optarg);
1233           if (lang != NULL)
1234             {
1235               argbuffer[current_arg].lang = lang;
1236               argbuffer[current_arg].arg_type = at_language;
1237               ++current_arg;
1238             }
1239         }
1240         break;
1241       case 'c':
1242         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1243         optarg = concat (optarg, "i", ""); /* memory leak here */
1244         /* FALLTHRU */
1245       case 'r':
1246         argbuffer[current_arg].arg_type = at_regexp;
1247         argbuffer[current_arg].what = optarg;
1248         ++current_arg;
1249         break;
1250       case 'R':
1251         argbuffer[current_arg].arg_type = at_regexp;
1252         argbuffer[current_arg].what = NULL;
1253         ++current_arg;
1254         break;
1255       case 'V':
1256         print_version ();
1257         break;
1258       case 'h':
1259       case 'H':
1260         help_asked = TRUE;
1261         break;
1262
1263         /* Etags options */
1264       case 'D': constantypedefs = FALSE;                        break;
1265       case 'i': included_files[nincluded_files++] = optarg;     break;
1266
1267         /* Ctags options. */
1268       case 'B': searchar = '?';                                 break;
1269       case 'd': constantypedefs = TRUE;                         break;
1270       case 't': typedefs = TRUE;                                break;
1271       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1272       case 'u': update = TRUE;                                  break;
1273       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1274       case 'x': cxref_style = TRUE;                             break;
1275       case 'w': no_warnings = TRUE;                             break;
1276       default:
1277         suggest_asking_for_help ();
1278         /* NOTREACHED */
1279       }
1280
1281   /* No more options.  Store the rest of arguments. */
1282   for (; optind < argc; optind++)
1283     {
1284       argbuffer[current_arg].arg_type = at_filename;
1285       argbuffer[current_arg].what = argv[optind];
1286       ++current_arg;
1287       ++file_count;
1288     }
1289
1290   argbuffer[current_arg].arg_type = at_end;
1291
1292   if (help_asked)
1293     print_help (argbuffer);
1294     /* NOTREACHED */
1295
1296   if (nincluded_files == 0 && file_count == 0)
1297     {
1298       error ("no input files specified.", (char *)NULL);
1299       suggest_asking_for_help ();
1300       /* NOTREACHED */
1301     }
1302
1303   if (tagfile == NULL)
1304     tagfile = CTAGS ? "tags" : "TAGS";
1305   cwd = etags_getcwd ();        /* the current working directory */
1306   if (cwd[strlen (cwd) - 1] != '/')
1307     {
1308       char *oldcwd = cwd;
1309       cwd = concat (oldcwd, "/", "");
1310       free (oldcwd);
1311     }
1312   /* Relative file names are made relative to the current directory. */
1313   if (streq (tagfile, "-")
1314       || strneq (tagfile, "/dev/", 5))
1315     tagfiledir = cwd;
1316   else
1317     tagfiledir = absolute_dirname (tagfile, cwd);
1318
1319   init ();                      /* set up boolean "functions" */
1320
1321   linebuffer_init (&lb);
1322   linebuffer_init (&filename_lb);
1323   linebuffer_init (&filebuf);
1324   linebuffer_init (&token_name);
1325
1326   if (!CTAGS)
1327     {
1328       if (streq (tagfile, "-"))
1329         {
1330           tagf = stdout;
1331 #ifdef DOS_NT
1332           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1333              doesn't take effect until after `stdout' is already open). */
1334           if (!isatty (fileno (stdout)))
1335             setmode (fileno (stdout), O_BINARY);
1336 #endif /* DOS_NT */
1337         }
1338       else
1339         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1340       if (tagf == NULL)
1341         pfatal (tagfile);
1342     }
1343
1344   /*
1345    * Loop through files finding functions.
1346    */
1347   for (i = 0; i < current_arg; i++)
1348     {
1349       static language *lang;    /* non-NULL if language is forced */
1350       char *this_file;
1351
1352       switch (argbuffer[i].arg_type)
1353         {
1354         case at_language:
1355           lang = argbuffer[i].lang;
1356           break;
1357         case at_regexp:
1358           analyse_regex (argbuffer[i].what);
1359           break;
1360         case at_filename:
1361 #ifdef VMS
1362           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1363             {
1364               if (got_err)
1365                 {
1366                   error ("can't find file %s\n", this_file);
1367                   argc--, argv++;
1368                 }
1369               else
1370                 {
1371                   this_file = massage_name (this_file);
1372                 }
1373 #else
1374               this_file = argbuffer[i].what;
1375 #endif
1376               /* Input file named "-" means read file names from stdin
1377                  (one per line) and use them. */
1378               if (streq (this_file, "-"))
1379                 {
1380                   if (parsing_stdin)
1381                     fatal ("cannot parse standard input AND read file names from it",
1382                            (char *)NULL);
1383                   while (readline_internal (&filename_lb, stdin) > 0)
1384                     process_file_name (filename_lb.buffer, lang);
1385                 }
1386               else
1387                 process_file_name (this_file, lang);
1388 #ifdef VMS
1389             }
1390 #endif
1391           break;
1392         case at_stdin:
1393           this_file = argbuffer[i].what;
1394           process_file (stdin, this_file, lang);
1395           break;
1396         }
1397     }
1398
1399   free_regexps ();
1400   free (lb.buffer);
1401   free (filebuf.buffer);
1402   free (token_name.buffer);
1403
1404   if (!CTAGS || cxref_style)
1405     {
1406       /* Write the remaining tags to tagf (ETAGS) or stdout (CXREF). */
1407       put_entries (nodehead);
1408       free_tree (nodehead);
1409       nodehead = NULL;
1410       if (!CTAGS)
1411         {
1412           fdesc *fdp;
1413
1414           /* Output file entries that have no tags. */
1415           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1416             if (!fdp->written)
1417               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1418
1419           while (nincluded_files-- > 0)
1420             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1421
1422           if (fclose (tagf) == EOF)
1423             pfatal (tagfile);
1424         }
1425
1426       exit (EXIT_SUCCESS);
1427     }
1428
1429   if (update)
1430     {
1431       char cmd[BUFSIZ];
1432       for (i = 0; i < current_arg; ++i)
1433         {
1434           switch (argbuffer[i].arg_type)
1435             {
1436             case at_filename:
1437             case at_stdin:
1438               break;
1439             default:
1440               continue;         /* the for loop */
1441             }
1442           sprintf (cmd,
1443                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1444                    tagfile, argbuffer[i].what, tagfile);
1445           if (system (cmd) != EXIT_SUCCESS)
1446             fatal ("failed to execute shell command", (char *)NULL);
1447         }
1448       append_to_tagfile = TRUE;
1449     }
1450
1451   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1452   if (tagf == NULL)
1453     pfatal (tagfile);
1454   put_entries (nodehead);       /* write all the tags (CTAGS) */
1455   free_tree (nodehead);
1456   nodehead = NULL;
1457   if (fclose (tagf) == EOF)
1458     pfatal (tagfile);
1459
1460   if (CTAGS)
1461     if (append_to_tagfile || update)
1462       {
1463         char cmd[2*BUFSIZ+10];
1464         sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1465         exit (system (cmd));
1466       }
1467   return EXIT_SUCCESS;
1468 }
1469
1470
1471 /*
1472  * Return a compressor given the file name.  If EXTPTR is non-zero,
1473  * return a pointer into FILE where the compressor-specific
1474  * extension begins.  If no compressor is found, NULL is returned
1475  * and EXTPTR is not significant.
1476  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1477  */
1478 static compressor *
1479 get_compressor_from_suffix (file, extptr)
1480      char *file;
1481      char **extptr;
1482 {
1483   compressor *compr;
1484   char *slash, *suffix;
1485
1486   /* This relies on FN to be after canonicalize_filename,
1487      so we don't need to consider backslashes on DOS_NT.  */
1488   slash = etags_strrchr (file, '/');
1489   suffix = etags_strrchr (file, '.');
1490   if (suffix == NULL || suffix < slash)
1491     return NULL;
1492   if (extptr != NULL)
1493     *extptr = suffix;
1494   suffix += 1;
1495   /* Let those poor souls who live with DOS 8+3 file name limits get
1496      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1497      Only the first do loop is run if not MSDOS */
1498   do
1499     {
1500       for (compr = compressors; compr->suffix != NULL; compr++)
1501         if (streq (compr->suffix, suffix))
1502           return compr;
1503       if (!MSDOS)
1504         break;                  /* do it only once: not really a loop */
1505       if (extptr != NULL)
1506         *extptr = ++suffix;
1507     } while (*suffix != '\0');
1508   return NULL;
1509 }
1510
1511
1512
1513 /*
1514  * Return a language given the name.
1515  */
1516 static language *
1517 get_language_from_langname (name)
1518      const char *name;
1519 {
1520   language *lang;
1521
1522   if (name == NULL)
1523     error ("empty language name", (char *)NULL);
1524   else
1525     {
1526       for (lang = lang_names; lang->name != NULL; lang++)
1527         if (streq (name, lang->name))
1528           return lang;
1529       error ("unknown language \"%s\"", name);
1530     }
1531
1532   return NULL;
1533 }
1534
1535
1536 /*
1537  * Return a language given the interpreter name.
1538  */
1539 static language *
1540 get_language_from_interpreter (interpreter)
1541      char *interpreter;
1542 {
1543   language *lang;
1544   char **iname;
1545
1546   if (interpreter == NULL)
1547     return NULL;
1548   for (lang = lang_names; lang->name != NULL; lang++)
1549     if (lang->interpreters != NULL)
1550       for (iname = lang->interpreters; *iname != NULL; iname++)
1551         if (streq (*iname, interpreter))
1552             return lang;
1553
1554   return NULL;
1555 }
1556
1557
1558
1559 /*
1560  * Return a language given the file name.
1561  */
1562 static language *
1563 get_language_from_filename (file, case_sensitive)
1564      char *file;
1565      bool case_sensitive;
1566 {
1567   language *lang;
1568   char **name, **ext, *suffix;
1569
1570   /* Try whole file name first. */
1571   for (lang = lang_names; lang->name != NULL; lang++)
1572     if (lang->filenames != NULL)
1573       for (name = lang->filenames; *name != NULL; name++)
1574         if ((case_sensitive)
1575             ? streq (*name, file)
1576             : strcaseeq (*name, file))
1577           return lang;
1578
1579   /* If not found, try suffix after last dot. */
1580   suffix = etags_strrchr (file, '.');
1581   if (suffix == NULL)
1582     return NULL;
1583   suffix += 1;
1584   for (lang = lang_names; lang->name != NULL; lang++)
1585     if (lang->suffixes != NULL)
1586       for (ext = lang->suffixes; *ext != NULL; ext++)
1587         if ((case_sensitive)
1588             ? streq (*ext, suffix)
1589             : strcaseeq (*ext, suffix))
1590           return lang;
1591   return NULL;
1592 }
1593
1594 \f
1595 /*
1596  * This routine is called on each file argument.
1597  */
1598 static void
1599 process_file_name (file, lang)
1600      char *file;
1601      language *lang;
1602 {
1603   struct stat stat_buf;
1604   FILE *inf;
1605   fdesc *fdp;
1606   compressor *compr;
1607   char *compressed_name, *uncompressed_name;
1608   char *ext, *real_name;
1609   int retval;
1610
1611   canonicalize_filename (file);
1612   if (streq (file, tagfile) && !streq (tagfile, "-"))
1613     {
1614       error ("skipping inclusion of %s in self.", file);
1615       return;
1616     }
1617   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1618     {
1619       compressed_name = NULL;
1620       real_name = uncompressed_name = savestr (file);
1621     }
1622   else
1623     {
1624       real_name = compressed_name = savestr (file);
1625       uncompressed_name = savenstr (file, ext - file);
1626     }
1627
1628   /* If the canonicalized uncompressed name
1629      has already been dealt with, skip it silently. */
1630   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1631     {
1632       assert (fdp->infname != NULL);
1633       if (streq (uncompressed_name, fdp->infname))
1634         goto cleanup;
1635     }
1636
1637   if (stat (real_name, &stat_buf) != 0)
1638     {
1639       /* Reset real_name and try with a different name. */
1640       real_name = NULL;
1641       if (compressed_name != NULL) /* try with the given suffix */
1642         {
1643           if (stat (uncompressed_name, &stat_buf) == 0)
1644             real_name = uncompressed_name;
1645         }
1646       else                      /* try all possible suffixes */
1647         {
1648           for (compr = compressors; compr->suffix != NULL; compr++)
1649             {
1650               compressed_name = concat (file, ".", compr->suffix);
1651               if (stat (compressed_name, &stat_buf) != 0)
1652                 {
1653                   if (MSDOS)
1654                     {
1655                       char *suf = compressed_name + strlen (file);
1656                       size_t suflen = strlen (compr->suffix) + 1;
1657                       for ( ; suf[1]; suf++, suflen--)
1658                         {
1659                           memmove (suf, suf + 1, suflen);
1660                           if (stat (compressed_name, &stat_buf) == 0)
1661                             {
1662                               real_name = compressed_name;
1663                               break;
1664                             }
1665                         }
1666                       if (real_name != NULL)
1667                         break;
1668                     } /* MSDOS */
1669                   free (compressed_name);
1670                   compressed_name = NULL;
1671                 }
1672               else
1673                 {
1674                   real_name = compressed_name;
1675                   break;
1676                 }
1677             }
1678         }
1679       if (real_name == NULL)
1680         {
1681           perror (file);
1682           goto cleanup;
1683         }
1684     } /* try with a different name */
1685
1686   if (!S_ISREG (stat_buf.st_mode))
1687     {
1688       error ("skipping %s: it is not a regular file.", real_name);
1689       goto cleanup;
1690     }
1691   if (real_name == compressed_name)
1692     {
1693       char *cmd = concat (compr->command, " ", real_name);
1694       inf = (FILE *) popen (cmd, "r");
1695       free (cmd);
1696     }
1697   else
1698     inf = fopen (real_name, "r");
1699   if (inf == NULL)
1700     {
1701       perror (real_name);
1702       goto cleanup;
1703     }
1704
1705   process_file (inf, uncompressed_name, lang);
1706
1707   if (real_name == compressed_name)
1708     retval = pclose (inf);
1709   else
1710     retval = fclose (inf);
1711   if (retval < 0)
1712     pfatal (file);
1713
1714  cleanup:
1715   if (compressed_name) free (compressed_name);
1716   if (uncompressed_name) free (uncompressed_name);
1717   last_node = NULL;
1718   curfdp = NULL;
1719   return;
1720 }
1721
1722 static void
1723 process_file (fh, fn, lang)
1724      FILE *fh;
1725      char *fn;
1726      language *lang;
1727 {
1728   static const fdesc emptyfdesc;
1729   fdesc *fdp;
1730
1731   /* Create a new input file description entry. */
1732   fdp = xnew (1, fdesc);
1733   *fdp = emptyfdesc;
1734   fdp->next = fdhead;
1735   fdp->infname = savestr (fn);
1736   fdp->lang = lang;
1737   fdp->infabsname = absolute_filename (fn, cwd);
1738   fdp->infabsdir = absolute_dirname (fn, cwd);
1739   if (filename_is_absolute (fn))
1740     {
1741       /* An absolute file name.  Canonicalize it. */
1742       fdp->taggedfname = absolute_filename (fn, NULL);
1743     }
1744   else
1745     {
1746       /* A file name relative to cwd.  Make it relative
1747          to the directory of the tags file. */
1748       fdp->taggedfname = relative_filename (fn, tagfiledir);
1749     }
1750   fdp->usecharno = TRUE;        /* use char position when making tags */
1751   fdp->prop = NULL;
1752   fdp->written = FALSE;         /* not written on tags file yet */
1753
1754   fdhead = fdp;
1755   curfdp = fdhead;              /* the current file description */
1756
1757   find_entries (fh);
1758
1759   /* If not Ctags, and if this is not metasource and if it contained no #line
1760      directives, we can write the tags and free all nodes pointing to
1761      curfdp. */
1762   if (!CTAGS
1763       && curfdp->usecharno      /* no #line directives in this file */
1764       && !curfdp->lang->metasource)
1765     {
1766       node *np, *prev;
1767
1768       /* Look for the head of the sublist relative to this file.  See add_node
1769          for the structure of the node tree. */
1770       prev = NULL;
1771       for (np = nodehead; np != NULL; prev = np, np = np->left)
1772         if (np->fdp == curfdp)
1773           break;
1774
1775       /* If we generated tags for this file, write and delete them. */
1776       if (np != NULL)
1777         {
1778           /* This is the head of the last sublist, if any.  The following
1779              instructions depend on this being true. */
1780           assert (np->left == NULL);
1781
1782           assert (fdhead == curfdp);
1783           assert (last_node->fdp == curfdp);
1784           put_entries (np);     /* write tags for file curfdp->taggedfname */
1785           free_tree (np);       /* remove the written nodes */
1786           if (prev == NULL)
1787             nodehead = NULL;    /* no nodes left */
1788           else
1789             prev->left = NULL;  /* delete the pointer to the sublist */
1790         }
1791     }
1792 }
1793
1794 /*
1795  * This routine sets up the boolean pseudo-functions which work
1796  * by setting boolean flags dependent upon the corresponding character.
1797  * Every char which is NOT in that string is not a white char.  Therefore,
1798  * all of the array "_wht" is set to FALSE, and then the elements
1799  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1800  * of a char is TRUE if it is the string "white", else FALSE.
1801  */
1802 static void
1803 init ()
1804 {
1805   register char *sp;
1806   register int i;
1807
1808   for (i = 0; i < CHARS; i++)
1809     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1810   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1811   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1812   notinname('\0') = notinname('\n');
1813   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1814   begtoken('\0') = begtoken('\n');
1815   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1816   intoken('\0') = intoken('\n');
1817   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1818   endtoken('\0') = endtoken('\n');
1819 }
1820
1821 /*
1822  * This routine opens the specified file and calls the function
1823  * which finds the function and type definitions.
1824  */
1825 static void
1826 find_entries (inf)
1827      FILE *inf;
1828 {
1829   char *cp;
1830   language *lang = curfdp->lang;
1831   Lang_function *parser = NULL;
1832
1833   /* If user specified a language, use it. */
1834   if (lang != NULL && lang->function != NULL)
1835     {
1836       parser = lang->function;
1837     }
1838
1839   /* Else try to guess the language given the file name. */
1840   if (parser == NULL)
1841     {
1842       lang = get_language_from_filename (curfdp->infname, TRUE);
1843       if (lang != NULL && lang->function != NULL)
1844         {
1845           curfdp->lang = lang;
1846           parser = lang->function;
1847         }
1848     }
1849
1850   /* Else look for sharp-bang as the first two characters. */
1851   if (parser == NULL
1852       && readline_internal (&lb, inf) > 0
1853       && lb.len >= 2
1854       && lb.buffer[0] == '#'
1855       && lb.buffer[1] == '!')
1856     {
1857       char *lp;
1858
1859       /* Set lp to point at the first char after the last slash in the
1860          line or, if no slashes, at the first nonblank.  Then set cp to
1861          the first successive blank and terminate the string. */
1862       lp = etags_strrchr (lb.buffer+2, '/');
1863       if (lp != NULL)
1864         lp += 1;
1865       else
1866         lp = skip_spaces (lb.buffer + 2);
1867       cp = skip_non_spaces (lp);
1868       *cp = '\0';
1869
1870       if (strlen (lp) > 0)
1871         {
1872           lang = get_language_from_interpreter (lp);
1873           if (lang != NULL && lang->function != NULL)
1874             {
1875               curfdp->lang = lang;
1876               parser = lang->function;
1877             }
1878         }
1879     }
1880
1881   /* We rewind here, even if inf may be a pipe.  We fail if the
1882      length of the first line is longer than the pipe block size,
1883      which is unlikely. */
1884   rewind (inf);
1885
1886   /* Else try to guess the language given the case insensitive file name. */
1887   if (parser == NULL)
1888     {
1889       lang = get_language_from_filename (curfdp->infname, FALSE);
1890       if (lang != NULL && lang->function != NULL)
1891         {
1892           curfdp->lang = lang;
1893           parser = lang->function;
1894         }
1895     }
1896
1897   /* Else try Fortran or C. */
1898   if (parser == NULL)
1899     {
1900       node *old_last_node = last_node;
1901
1902       curfdp->lang = get_language_from_langname ("fortran");
1903       find_entries (inf);
1904
1905       if (old_last_node == last_node)
1906         /* No Fortran entries found.  Try C. */
1907         {
1908           /* We do not tag if rewind fails.
1909              Only the file name will be recorded in the tags file. */
1910           rewind (inf);
1911           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1912           find_entries (inf);
1913         }
1914       return;
1915     }
1916
1917   if (!no_line_directive
1918       && curfdp->lang != NULL && curfdp->lang->metasource)
1919     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1920        file, or anyway we parsed a file that is automatically generated from
1921        this one.  If this is the case, the bingo.c file contained #line
1922        directives that generated tags pointing to this file.  Let's delete
1923        them all before parsing this file, which is the real source. */
1924     {
1925       fdesc **fdpp = &fdhead;
1926       while (*fdpp != NULL)
1927         if (*fdpp != curfdp
1928             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1929           /* We found one of those!  We must delete both the file description
1930              and all tags referring to it. */
1931           {
1932             fdesc *badfdp = *fdpp;
1933
1934             /* Delete the tags referring to badfdp->taggedfname
1935                that were obtained from badfdp->infname. */
1936             invalidate_nodes (badfdp, &nodehead);
1937
1938             *fdpp = badfdp->next; /* remove the bad description from the list */
1939             free_fdesc (badfdp);
1940           }
1941         else
1942           fdpp = &(*fdpp)->next; /* advance the list pointer */
1943     }
1944
1945   assert (parser != NULL);
1946
1947   /* Generic initialisations before reading from file. */
1948   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1949
1950   /* Generic initialisations before parsing file with readline. */
1951   lineno = 0;                  /* reset global line number */
1952   charno = 0;                  /* reset global char number */
1953   linecharno = 0;              /* reset global char number of line start */
1954
1955   parser (inf);
1956
1957   regex_tag_multiline ();
1958 }
1959
1960 \f
1961 /*
1962  * Check whether an implicitly named tag should be created,
1963  * then call `pfnote'.
1964  * NAME is a string that is internally copied by this function.
1965  *
1966  * TAGS format specification
1967  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1968  * The following is explained in some more detail in etc/ETAGS.EBNF.
1969  *
1970  * make_tag creates tags with "implicit tag names" (unnamed tags)
1971  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1972  *  1. NAME does not contain any of the characters in NONAM;
1973  *  2. LINESTART contains name as either a rightmost, or rightmost but
1974  *     one character, substring;
1975  *  3. the character, if any, immediately before NAME in LINESTART must
1976  *     be a character in NONAM;
1977  *  4. the character, if any, immediately after NAME in LINESTART must
1978  *     also be a character in NONAM.
1979  *
1980  * The implementation uses the notinname() macro, which recognises the
1981  * characters stored in the string `nonam'.
1982  * etags.el needs to use the same characters that are in NONAM.
1983  */
1984 static void
1985 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
1986      char *name;                /* tag name, or NULL if unnamed */
1987      int namelen;               /* tag length */
1988      bool is_func;              /* tag is a function */
1989      char *linestart;           /* start of the line where tag is */
1990      int linelen;               /* length of the line where tag is */
1991      int lno;                   /* line number */
1992      long cno;                  /* character number */
1993 {
1994   bool named = (name != NULL && namelen > 0);
1995
1996   if (!CTAGS && named)          /* maybe set named to false */
1997     /* Let's try to make an implicit tag name, that is, create an unnamed tag
1998        such that etags.el can guess a name from it. */
1999     {
2000       int i;
2001       register char *cp = name;
2002
2003       for (i = 0; i < namelen; i++)
2004         if (notinname (*cp++))
2005           break;
2006       if (i == namelen)                         /* rule #1 */
2007         {
2008           cp = linestart + linelen - namelen;
2009           if (notinname (linestart[linelen-1]))
2010             cp -= 1;                            /* rule #4 */
2011           if (cp >= linestart                   /* rule #2 */
2012               && (cp == linestart
2013                   || notinname (cp[-1]))        /* rule #3 */
2014               && strneq (name, cp, namelen))    /* rule #2 */
2015             named = FALSE;      /* use implicit tag name */
2016         }
2017     }
2018
2019   if (named)
2020     name = savenstr (name, namelen);
2021   else
2022     name = NULL;
2023   pfnote (name, is_func, linestart, linelen, lno, cno);
2024 }
2025
2026 /* Record a tag. */
2027 static void
2028 pfnote (name, is_func, linestart, linelen, lno, cno)
2029      char *name;                /* tag name, or NULL if unnamed */
2030      bool is_func;              /* tag is a function */
2031      char *linestart;           /* start of the line where tag is */
2032      int linelen;               /* length of the line where tag is */
2033      int lno;                   /* line number */
2034      long cno;                  /* character number */
2035 {
2036   register node *np;
2037
2038   assert (name == NULL || name[0] != '\0');
2039   if (CTAGS && name == NULL)
2040     return;
2041
2042   np = xnew (1, node);
2043
2044   /* If ctags mode, change name "main" to M<thisfilename>. */
2045   if (CTAGS && !cxref_style && streq (name, "main"))
2046     {
2047       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2048       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2049       fp = etags_strrchr (np->name, '.');
2050       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2051         fp[0] = '\0';
2052     }
2053   else
2054     np->name = name;
2055   np->valid = TRUE;
2056   np->been_warned = FALSE;
2057   np->fdp = curfdp;
2058   np->is_func = is_func;
2059   np->lno = lno;
2060   if (np->fdp->usecharno)
2061     /* Our char numbers are 0-base, because of C language tradition?
2062        ctags compatibility?  old versions compatibility?   I don't know.
2063        Anyway, since emacs's are 1-base we expect etags.el to take care
2064        of the difference.  If we wanted to have 1-based numbers, we would
2065        uncomment the +1 below. */
2066     np->cno = cno /* + 1 */ ;
2067   else
2068     np->cno = invalidcharno;
2069   np->left = np->right = NULL;
2070   if (CTAGS && !cxref_style)
2071     {
2072       if (strlen (linestart) < 50)
2073         np->regex = concat (linestart, "$", "");
2074       else
2075         np->regex = savenstr (linestart, 50);
2076     }
2077   else
2078     np->regex = savenstr (linestart, linelen);
2079
2080   add_node (np, &nodehead);
2081 }
2082
2083 /*
2084  * free_tree ()
2085  *      recurse on left children, iterate on right children.
2086  */
2087 static void
2088 free_tree (np)
2089      register node *np;
2090 {
2091   while (np)
2092     {
2093       register node *node_right = np->right;
2094       free_tree (np->left);
2095       if (np->name != NULL)
2096         free (np->name);
2097       free (np->regex);
2098       free (np);
2099       np = node_right;
2100     }
2101 }
2102
2103 /*
2104  * free_fdesc ()
2105  *      delete a file description
2106  */
2107 static void
2108 free_fdesc (fdp)
2109      register fdesc *fdp;
2110 {
2111   if (fdp->infname != NULL) free (fdp->infname);
2112   if (fdp->infabsname != NULL) free (fdp->infabsname);
2113   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2114   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2115   if (fdp->prop != NULL) free (fdp->prop);
2116   free (fdp);
2117 }
2118
2119 /*
2120  * add_node ()
2121  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2122  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2123  *      balancing.
2124  *
2125  *      add_node is the only function allowed to add nodes, so it can
2126  *      maintain state.
2127  */
2128 static void
2129 add_node (np, cur_node_p)
2130      node *np, **cur_node_p;
2131 {
2132   register int dif;
2133   register node *cur_node = *cur_node_p;
2134
2135   if (cur_node == NULL)
2136     {
2137       *cur_node_p = np;
2138       last_node = np;
2139       return;
2140     }
2141
2142   if (!CTAGS)
2143     /* Etags Mode */
2144     {
2145       /* For each file name, tags are in a linked sublist on the right
2146          pointer.  The first tags of different files are a linked list
2147          on the left pointer.  last_node points to the end of the last
2148          used sublist. */
2149       if (last_node != NULL && last_node->fdp == np->fdp)
2150         {
2151           /* Let's use the same sublist as the last added node. */
2152           assert (last_node->right == NULL);
2153           last_node->right = np;
2154           last_node = np;
2155         }
2156       else if (cur_node->fdp == np->fdp)
2157         {
2158           /* Scanning the list we found the head of a sublist which is
2159              good for us.  Let's scan this sublist. */
2160           add_node (np, &cur_node->right);
2161         }
2162       else
2163         /* The head of this sublist is not good for us.  Let's try the
2164            next one. */
2165         add_node (np, &cur_node->left);
2166     } /* if ETAGS mode */
2167
2168   else
2169     {
2170       /* Ctags Mode */
2171       dif = strcmp (np->name, cur_node->name);
2172
2173       /*
2174        * If this tag name matches an existing one, then
2175        * do not add the node, but maybe print a warning.
2176        */
2177       if (no_duplicates && !dif)
2178         {
2179           if (np->fdp == cur_node->fdp)
2180             {
2181               if (!no_warnings)
2182                 {
2183                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2184                            np->fdp->infname, lineno, np->name);
2185                   fprintf (stderr, "Second entry ignored\n");
2186                 }
2187             }
2188           else if (!cur_node->been_warned && !no_warnings)
2189             {
2190               fprintf
2191                 (stderr,
2192                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2193                  np->fdp->infname, cur_node->fdp->infname, np->name);
2194               cur_node->been_warned = TRUE;
2195             }
2196           return;
2197         }
2198
2199       /* Actually add the node */
2200       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2201     } /* if CTAGS mode */
2202 }
2203
2204 /*
2205  * invalidate_nodes ()
2206  *      Scan the node tree and invalidate all nodes pointing to the
2207  *      given file description (CTAGS case) or free them (ETAGS case).
2208  */
2209 static void
2210 invalidate_nodes (badfdp, npp)
2211      fdesc *badfdp;
2212      node **npp;
2213 {
2214   node *np = *npp;
2215
2216   if (np == NULL)
2217     return;
2218
2219   if (CTAGS)
2220     {
2221       if (np->left != NULL)
2222         invalidate_nodes (badfdp, &np->left);
2223       if (np->fdp == badfdp)
2224         np->valid = FALSE;
2225       if (np->right != NULL)
2226         invalidate_nodes (badfdp, &np->right);
2227     }
2228   else
2229     {
2230       assert (np->fdp != NULL);
2231       if (np->fdp == badfdp)
2232         {
2233           *npp = np->left;      /* detach the sublist from the list */
2234           np->left = NULL;      /* isolate it */
2235           free_tree (np);       /* free it */
2236           invalidate_nodes (badfdp, npp);
2237         }
2238       else
2239         invalidate_nodes (badfdp, &np->left);
2240     }
2241 }
2242
2243 \f
2244 static int total_size_of_entries __P((node *));
2245 static int number_len __P((long));
2246
2247 /* Length of a non-negative number's decimal representation. */
2248 static int
2249 number_len (num)
2250      long num;
2251 {
2252   int len = 1;
2253   while ((num /= 10) > 0)
2254     len += 1;
2255   return len;
2256 }
2257
2258 /*
2259  * Return total number of characters that put_entries will output for
2260  * the nodes in the linked list at the right of the specified node.
2261  * This count is irrelevant with etags.el since emacs 19.34 at least,
2262  * but is still supplied for backward compatibility.
2263  */
2264 static int
2265 total_size_of_entries (np)
2266      register node *np;
2267 {
2268   register int total = 0;
2269
2270   for (; np != NULL; np = np->right)
2271     if (np->valid)
2272       {
2273         total += strlen (np->regex) + 1;                /* pat\177 */
2274         if (np->name != NULL)
2275           total += strlen (np->name) + 1;               /* name\001 */
2276         total += number_len ((long) np->lno) + 1;       /* lno, */
2277         if (np->cno != invalidcharno)                   /* cno */
2278           total += number_len (np->cno);
2279         total += 1;                                     /* newline */
2280       }
2281
2282   return total;
2283 }
2284
2285 static void
2286 put_entries (np)
2287      register node *np;
2288 {
2289   register char *sp;
2290   static fdesc *fdp = NULL;
2291
2292   if (np == NULL)
2293     return;
2294
2295   /* Output subentries that precede this one */
2296   if (CTAGS)
2297     put_entries (np->left);
2298
2299   /* Output this entry */
2300   if (np->valid)
2301     {
2302       if (!CTAGS)
2303         {
2304           /* Etags mode */
2305           if (fdp != np->fdp)
2306             {
2307               fdp = np->fdp;
2308               fprintf (tagf, "\f\n%s,%d\n",
2309                        fdp->taggedfname, total_size_of_entries (np));
2310               fdp->written = TRUE;
2311             }
2312           fputs (np->regex, tagf);
2313           fputc ('\177', tagf);
2314           if (np->name != NULL)
2315             {
2316               fputs (np->name, tagf);
2317               fputc ('\001', tagf);
2318             }
2319           fprintf (tagf, "%d,", np->lno);
2320           if (np->cno != invalidcharno)
2321             fprintf (tagf, "%ld", np->cno);
2322           fputs ("\n", tagf);
2323         }
2324       else
2325         {
2326           /* Ctags mode */
2327           if (np->name == NULL)
2328             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2329
2330           if (cxref_style)
2331             {
2332               if (vgrind_style)
2333                 fprintf (stdout, "%s %s %d\n",
2334                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2335               else
2336                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2337                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2338             }
2339           else
2340             {
2341               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2342
2343               if (np->is_func)
2344                 {               /* function or #define macro with args */
2345                   putc (searchar, tagf);
2346                   putc ('^', tagf);
2347
2348                   for (sp = np->regex; *sp; sp++)
2349                     {
2350                       if (*sp == '\\' || *sp == searchar)
2351                         putc ('\\', tagf);
2352                       putc (*sp, tagf);
2353                     }
2354                   putc (searchar, tagf);
2355                 }
2356               else
2357                 {               /* anything else; text pattern inadequate */
2358                   fprintf (tagf, "%d", np->lno);
2359                 }
2360               putc ('\n', tagf);
2361             }
2362         }
2363     } /* if this node contains a valid tag */
2364
2365   /* Output subentries that follow this one */
2366   put_entries (np->right);
2367   if (!CTAGS)
2368     put_entries (np->left);
2369 }
2370
2371 \f
2372 /* C extensions. */
2373 #define C_EXT   0x00fff         /* C extensions */
2374 #define C_PLAIN 0x00000         /* C */
2375 #define C_PLPL  0x00001         /* C++ */
2376 #define C_STAR  0x00003         /* C* */
2377 #define C_JAVA  0x00005         /* JAVA */
2378 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2379 #define YACC    0x10000         /* yacc file */
2380
2381 /*
2382  * The C symbol tables.
2383  */
2384 enum sym_type
2385 {
2386   st_none,
2387   st_C_objprot, st_C_objimpl, st_C_objend,
2388   st_C_gnumacro,
2389   st_C_ignore, st_C_attribute,
2390   st_C_javastruct,
2391   st_C_operator,
2392   st_C_class, st_C_template,
2393   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2394 };
2395
2396 static unsigned int hash __P((const char *, unsigned int));
2397 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2398 static enum sym_type C_symtype __P((char *, int, int));
2399
2400 /* Feed stuff between (but not including) %[ and %] lines to:
2401      gperf -m 5
2402 %[
2403 %compare-strncmp
2404 %enum
2405 %struct-type
2406 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2407 %%
2408 if,             0,                      st_C_ignore
2409 for,            0,                      st_C_ignore
2410 while,          0,                      st_C_ignore
2411 switch,         0,                      st_C_ignore
2412 return,         0,                      st_C_ignore
2413 __attribute__,  0,                      st_C_attribute
2414 @interface,     0,                      st_C_objprot
2415 @protocol,      0,                      st_C_objprot
2416 @implementation,0,                      st_C_objimpl
2417 @end,           0,                      st_C_objend
2418 import,         (C_JAVA & !C_PLPL),     st_C_ignore
2419 package,        (C_JAVA & !C_PLPL),     st_C_ignore
2420 friend,         C_PLPL,                 st_C_ignore
2421 extends,        (C_JAVA & !C_PLPL),     st_C_javastruct
2422 implements,     (C_JAVA & !C_PLPL),     st_C_javastruct
2423 interface,      (C_JAVA & !C_PLPL),     st_C_struct
2424 class,          0,                      st_C_class
2425 namespace,      C_PLPL,                 st_C_struct
2426 domain,         C_STAR,                 st_C_struct
2427 union,          0,                      st_C_struct
2428 struct,         0,                      st_C_struct
2429 extern,         0,                      st_C_extern
2430 enum,           0,                      st_C_enum
2431 typedef,        0,                      st_C_typedef
2432 define,         0,                      st_C_define
2433 undef,          0,                      st_C_define
2434 operator,       C_PLPL,                 st_C_operator
2435 template,       0,                      st_C_template
2436 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2437 DEFUN,          0,                      st_C_gnumacro
2438 SYSCALL,        0,                      st_C_gnumacro
2439 ENTRY,          0,                      st_C_gnumacro
2440 PSEUDO,         0,                      st_C_gnumacro
2441 # These are defined inside C functions, so currently they are not met.
2442 # EXFUN used in glibc, DEFVAR_* in emacs.
2443 #EXFUN,         0,                      st_C_gnumacro
2444 #DEFVAR_,       0,                      st_C_gnumacro
2445 %]
2446 and replace lines between %< and %> with its output, then:
2447  - remove the #if characterset check
2448  - make in_word_set static and not inline. */
2449 /*%<*/
2450 /* C code produced by gperf version 3.0.1 */
2451 /* Command-line: gperf -m 5  */
2452 /* Computed positions: -k'2-3' */
2453
2454 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2455 /* maximum key range = 33, duplicates = 0 */
2456
2457 #ifdef __GNUC__
2458 __inline
2459 #else
2460 #ifdef __cplusplus
2461 inline
2462 #endif
2463 #endif
2464 static unsigned int
2465 hash (str, len)
2466      register const char *str;
2467      register unsigned int len;
2468 {
2469   static unsigned char asso_values[] =
2470     {
2471       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2472       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2473       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2474       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2475       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2476       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2477       35, 35, 35, 35, 35, 35, 35, 35, 35, 15,
2478       14, 35, 35, 35, 35, 35, 35, 35, 14, 35,
2479       35, 35, 35, 12, 13, 35, 35, 35, 35, 12,
2480       35, 35, 35, 35, 35,  1, 35, 16, 35,  6,
2481       23,  0,  0, 35, 22,  0, 35, 35,  5,  0,
2482        0, 15,  1, 35,  6, 35,  8, 19, 35, 16,
2483        4,  5, 35, 35, 35, 35, 35, 35, 35, 35,
2484       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2485       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2486       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2487       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2488       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2489       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2490       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2491       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2492       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2493       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2494       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2495       35, 35, 35, 35, 35, 35, 35, 35, 35, 35,
2496       35, 35, 35, 35, 35, 35
2497     };
2498   register int hval = len;
2499
2500   switch (hval)
2501     {
2502       default:
2503         hval += asso_values[(unsigned char)str[2]];
2504       /*FALLTHROUGH*/
2505       case 2:
2506         hval += asso_values[(unsigned char)str[1]];
2507         break;
2508     }
2509   return hval;
2510 }
2511
2512 static struct C_stab_entry *
2513 in_word_set (str, len)
2514      register const char *str;
2515      register unsigned int len;
2516 {
2517   enum
2518     {
2519       TOTAL_KEYWORDS = 32,
2520       MIN_WORD_LENGTH = 2,
2521       MAX_WORD_LENGTH = 15,
2522       MIN_HASH_VALUE = 2,
2523       MAX_HASH_VALUE = 34
2524     };
2525
2526   static struct C_stab_entry wordlist[] =
2527     {
2528       {""}, {""},
2529       {"if",            0,                      st_C_ignore},
2530       {""},
2531       {"@end",          0,                      st_C_objend},
2532       {"union",         0,                      st_C_struct},
2533       {"define",                0,                      st_C_define},
2534       {"import",                (C_JAVA & !C_PLPL),     st_C_ignore},
2535       {"template",      0,                      st_C_template},
2536       {"operator",      C_PLPL,                 st_C_operator},
2537       {"@interface",    0,                      st_C_objprot},
2538       {"implements",    (C_JAVA & !C_PLPL),     st_C_javastruct},
2539       {"friend",                C_PLPL,                 st_C_ignore},
2540       {"typedef",       0,                      st_C_typedef},
2541       {"return",                0,                      st_C_ignore},
2542       {"@implementation",0,                     st_C_objimpl},
2543       {"@protocol",     0,                      st_C_objprot},
2544       {"interface",     (C_JAVA & !C_PLPL),     st_C_struct},
2545       {"extern",                0,                      st_C_extern},
2546       {"extends",       (C_JAVA & !C_PLPL),     st_C_javastruct},
2547       {"struct",                0,                      st_C_struct},
2548       {"domain",                C_STAR,                 st_C_struct},
2549       {"switch",                0,                      st_C_ignore},
2550       {"enum",          0,                      st_C_enum},
2551       {"for",           0,                      st_C_ignore},
2552       {"namespace",     C_PLPL,                 st_C_struct},
2553       {"class",         0,                      st_C_class},
2554       {"while",         0,                      st_C_ignore},
2555       {"undef",         0,                      st_C_define},
2556       {"package",       (C_JAVA & !C_PLPL),     st_C_ignore},
2557       {"__attribute__", 0,                      st_C_attribute},
2558       {"SYSCALL",       0,                      st_C_gnumacro},
2559       {"ENTRY",         0,                      st_C_gnumacro},
2560       {"PSEUDO",                0,                      st_C_gnumacro},
2561       {"DEFUN",         0,                      st_C_gnumacro}
2562     };
2563
2564   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2565     {
2566       register int key = hash (str, len);
2567
2568       if (key <= MAX_HASH_VALUE && key >= 0)
2569         {
2570           register const char *s = wordlist[key].name;
2571
2572           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2573             return &wordlist[key];
2574         }
2575     }
2576   return 0;
2577 }
2578 /*%>*/
2579
2580 static enum sym_type
2581 C_symtype (str, len, c_ext)
2582      char *str;
2583      int len;
2584      int c_ext;
2585 {
2586   register struct C_stab_entry *se = in_word_set (str, len);
2587
2588   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2589     return st_none;
2590   return se->type;
2591 }
2592
2593 \f
2594 /*
2595  * Ignoring __attribute__ ((list))
2596  */
2597 static bool inattribute;        /* looking at an __attribute__ construct */
2598
2599 /*
2600  * C functions and variables are recognized using a simple
2601  * finite automaton.  fvdef is its state variable.
2602  */
2603 static enum
2604 {
2605   fvnone,                       /* nothing seen */
2606   fdefunkey,                    /* Emacs DEFUN keyword seen */
2607   fdefunname,                   /* Emacs DEFUN name seen */
2608   foperator,                    /* func: operator keyword seen (cplpl) */
2609   fvnameseen,                   /* function or variable name seen */
2610   fstartlist,                   /* func: just after open parenthesis */
2611   finlist,                      /* func: in parameter list */
2612   flistseen,                    /* func: after parameter list */
2613   fignore,                      /* func: before open brace */
2614   vignore                       /* var-like: ignore until ';' */
2615 } fvdef;
2616
2617 static bool fvextern;           /* func or var: extern keyword seen; */
2618
2619 /*
2620  * typedefs are recognized using a simple finite automaton.
2621  * typdef is its state variable.
2622  */
2623 static enum
2624 {
2625   tnone,                        /* nothing seen */
2626   tkeyseen,                     /* typedef keyword seen */
2627   ttypeseen,                    /* defined type seen */
2628   tinbody,                      /* inside typedef body */
2629   tend,                         /* just before typedef tag */
2630   tignore                       /* junk after typedef tag */
2631 } typdef;
2632
2633 /*
2634  * struct-like structures (enum, struct and union) are recognized
2635  * using another simple finite automaton.  `structdef' is its state
2636  * variable.
2637  */
2638 static enum
2639 {
2640   snone,                        /* nothing seen yet,
2641                                    or in struct body if bracelev > 0 */
2642   skeyseen,                     /* struct-like keyword seen */
2643   stagseen,                     /* struct-like tag seen */
2644   scolonseen                    /* colon seen after struct-like tag */
2645 } structdef;
2646
2647 /*
2648  * When objdef is different from onone, objtag is the name of the class.
2649  */
2650 static char *objtag = "<uninited>";
2651
2652 /*
2653  * Yet another little state machine to deal with preprocessor lines.
2654  */
2655 static enum
2656 {
2657   dnone,                        /* nothing seen */
2658   dsharpseen,                   /* '#' seen as first char on line */
2659   ddefineseen,                  /* '#' and 'define' seen */
2660   dignorerest                   /* ignore rest of line */
2661 } definedef;
2662
2663 /*
2664  * State machine for Objective C protocols and implementations.
2665  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2666  */
2667 static enum
2668 {
2669   onone,                        /* nothing seen */
2670   oprotocol,                    /* @interface or @protocol seen */
2671   oimplementation,              /* @implementations seen */
2672   otagseen,                     /* class name seen */
2673   oparenseen,                   /* parenthesis before category seen */
2674   ocatseen,                     /* category name seen */
2675   oinbody,                      /* in @implementation body */
2676   omethodsign,                  /* in @implementation body, after +/- */
2677   omethodtag,                   /* after method name */
2678   omethodcolon,                 /* after method colon */
2679   omethodparm,                  /* after method parameter */
2680   oignore                       /* wait for @end */
2681 } objdef;
2682
2683
2684 /*
2685  * Use this structure to keep info about the token read, and how it
2686  * should be tagged.  Used by the make_C_tag function to build a tag.
2687  */
2688 static struct tok
2689 {
2690   char *line;                   /* string containing the token */
2691   int offset;                   /* where the token starts in LINE */
2692   int length;                   /* token length */
2693   /*
2694     The previous members can be used to pass strings around for generic
2695     purposes.  The following ones specifically refer to creating tags.  In this
2696     case the token contained here is the pattern that will be used to create a
2697     tag.
2698   */
2699   bool valid;                   /* do not create a tag; the token should be
2700                                    invalidated whenever a state machine is
2701                                    reset prematurely */
2702   bool named;                   /* create a named tag */
2703   int lineno;                   /* source line number of tag */
2704   long linepos;                 /* source char number of tag */
2705 } token;                        /* latest token read */
2706
2707 /*
2708  * Variables and functions for dealing with nested structures.
2709  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2710  */
2711 static void pushclass_above __P((int, char *, int));
2712 static void popclass_above __P((int));
2713 static void write_classname __P((linebuffer *, char *qualifier));
2714
2715 static struct {
2716   char **cname;                 /* nested class names */
2717   int *bracelev;                /* nested class brace level */
2718   int nl;                       /* class nesting level (elements used) */
2719   int size;                     /* length of the array */
2720 } cstack;                       /* stack for nested declaration tags */
2721 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2722 #define nestlev         (cstack.nl)
2723 /* After struct keyword or in struct body, not inside a nested function. */
2724 #define instruct        (structdef == snone && nestlev > 0                      \
2725                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2726
2727 static void
2728 pushclass_above (bracelev, str, len)
2729      int bracelev;
2730      char *str;
2731      int len;
2732 {
2733   int nl;
2734
2735   popclass_above (bracelev);
2736   nl = cstack.nl;
2737   if (nl >= cstack.size)
2738     {
2739       int size = cstack.size *= 2;
2740       xrnew (cstack.cname, size, char *);
2741       xrnew (cstack.bracelev, size, int);
2742     }
2743   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2744   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2745   cstack.bracelev[nl] = bracelev;
2746   cstack.nl = nl + 1;
2747 }
2748
2749 static void
2750 popclass_above (bracelev)
2751      int bracelev;
2752 {
2753   int nl;
2754
2755   for (nl = cstack.nl - 1;
2756        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2757        nl--)
2758     {
2759       if (cstack.cname[nl] != NULL)
2760         free (cstack.cname[nl]);
2761       cstack.nl = nl;
2762     }
2763 }
2764
2765 static void
2766 write_classname (cn, qualifier)
2767      linebuffer *cn;
2768      char *qualifier;
2769 {
2770   int i, len;
2771   int qlen = strlen (qualifier);
2772
2773   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2774     {
2775       len = 0;
2776       cn->len = 0;
2777       cn->buffer[0] = '\0';
2778     }
2779   else
2780     {
2781       len = strlen (cstack.cname[0]);
2782       linebuffer_setlen (cn, len);
2783       strcpy (cn->buffer, cstack.cname[0]);
2784     }
2785   for (i = 1; i < cstack.nl; i++)
2786     {
2787       char *s;
2788       int slen;
2789
2790       s = cstack.cname[i];
2791       if (s == NULL)
2792         continue;
2793       slen = strlen (s);
2794       len += slen + qlen;
2795       linebuffer_setlen (cn, len);
2796       strncat (cn->buffer, qualifier, qlen);
2797       strncat (cn->buffer, s, slen);
2798     }
2799 }
2800
2801 \f
2802 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2803 static void make_C_tag __P((bool));
2804
2805 /*
2806  * consider_token ()
2807  *      checks to see if the current token is at the start of a
2808  *      function or variable, or corresponds to a typedef, or
2809  *      is a struct/union/enum tag, or #define, or an enum constant.
2810  *
2811  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2812  *      with args.  C_EXTP points to which language we are looking at.
2813  *
2814  * Globals
2815  *      fvdef                   IN OUT
2816  *      structdef               IN OUT
2817  *      definedef               IN OUT
2818  *      typdef                  IN OUT
2819  *      objdef                  IN OUT
2820  */
2821
2822 static bool
2823 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2824      register char *str;        /* IN: token pointer */
2825      register int len;          /* IN: token length */
2826      register int c;            /* IN: first char after the token */
2827      int *c_extp;               /* IN, OUT: C extensions mask */
2828      int bracelev;              /* IN: brace level */
2829      int parlev;                /* IN: parenthesis level */
2830      bool *is_func_or_var;      /* OUT: function or variable found */
2831 {
2832   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2833      structtype is the type of the preceding struct-like keyword, and
2834      structbracelev is the brace level where it has been seen. */
2835   static enum sym_type structtype;
2836   static int structbracelev;
2837   static enum sym_type toktype;
2838
2839
2840   toktype = C_symtype (str, len, *c_extp);
2841
2842   /*
2843    * Skip __attribute__
2844    */
2845   if (toktype == st_C_attribute)
2846     {
2847       inattribute = TRUE;
2848       return FALSE;
2849      }
2850
2851    /*
2852     * Advance the definedef state machine.
2853     */
2854    switch (definedef)
2855      {
2856      case dnone:
2857        /* We're not on a preprocessor line. */
2858        if (toktype == st_C_gnumacro)
2859          {
2860            fvdef = fdefunkey;
2861            return FALSE;
2862          }
2863        break;
2864      case dsharpseen:
2865        if (toktype == st_C_define)
2866          {
2867            definedef = ddefineseen;
2868          }
2869        else
2870          {
2871            definedef = dignorerest;
2872          }
2873        return FALSE;
2874      case ddefineseen:
2875        /*
2876         * Make a tag for any macro, unless it is a constant
2877         * and constantypedefs is FALSE.
2878         */
2879        definedef = dignorerest;
2880        *is_func_or_var = (c == '(');
2881        if (!*is_func_or_var && !constantypedefs)
2882          return FALSE;
2883        else
2884          return TRUE;
2885      case dignorerest:
2886        return FALSE;
2887      default:
2888        error ("internal error: definedef value.", (char *)NULL);
2889      }
2890
2891    /*
2892     * Now typedefs
2893     */
2894    switch (typdef)
2895      {
2896      case tnone:
2897        if (toktype == st_C_typedef)
2898          {
2899            if (typedefs)
2900              typdef = tkeyseen;
2901            fvextern = FALSE;
2902            fvdef = fvnone;
2903            return FALSE;
2904          }
2905        break;
2906      case tkeyseen:
2907        switch (toktype)
2908          {
2909          case st_none:
2910          case st_C_class:
2911          case st_C_struct:
2912          case st_C_enum:
2913            typdef = ttypeseen;
2914          }
2915        break;
2916      case ttypeseen:
2917        if (structdef == snone && fvdef == fvnone)
2918          {
2919            fvdef = fvnameseen;
2920            return TRUE;
2921          }
2922        break;
2923      case tend:
2924        switch (toktype)
2925          {
2926          case st_C_class:
2927          case st_C_struct:
2928          case st_C_enum:
2929            return FALSE;
2930          }
2931        return TRUE;
2932      }
2933
2934    /*
2935     * This structdef business is NOT invoked when we are ctags and the
2936     * file is plain C.  This is because a struct tag may have the same
2937     * name as another tag, and this loses with ctags.
2938     */
2939    switch (toktype)
2940      {
2941      case st_C_javastruct:
2942        if (structdef == stagseen)
2943          structdef = scolonseen;
2944        return FALSE;
2945      case st_C_template:
2946      case st_C_class:
2947        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2948            && bracelev == 0
2949            && definedef == dnone && structdef == snone
2950            && typdef == tnone && fvdef == fvnone)
2951          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2952        if (toktype == st_C_template)
2953          break;
2954        /* FALLTHRU */
2955      case st_C_struct:
2956      case st_C_enum:
2957        if (parlev == 0
2958            && fvdef != vignore
2959            && (typdef == tkeyseen
2960                || (typedefs_or_cplusplus && structdef == snone)))
2961          {
2962            structdef = skeyseen;
2963            structtype = toktype;
2964            structbracelev = bracelev;
2965            if (fvdef == fvnameseen)
2966              fvdef = fvnone;
2967          }
2968        return FALSE;
2969      }
2970
2971    if (structdef == skeyseen)
2972      {
2973        structdef = stagseen;
2974        return TRUE;
2975      }
2976
2977    if (typdef != tnone)
2978      definedef = dnone;
2979
2980    /* Detect Objective C constructs. */
2981    switch (objdef)
2982      {
2983      case onone:
2984        switch (toktype)
2985          {
2986          case st_C_objprot:
2987            objdef = oprotocol;
2988            return FALSE;
2989          case st_C_objimpl:
2990            objdef = oimplementation;
2991            return FALSE;
2992          }
2993        break;
2994      case oimplementation:
2995        /* Save the class tag for functions or variables defined inside. */
2996        objtag = savenstr (str, len);
2997        objdef = oinbody;
2998        return FALSE;
2999      case oprotocol:
3000        /* Save the class tag for categories. */
3001        objtag = savenstr (str, len);
3002        objdef = otagseen;
3003        *is_func_or_var = TRUE;
3004        return TRUE;
3005      case oparenseen:
3006        objdef = ocatseen;
3007        *is_func_or_var = TRUE;
3008        return TRUE;
3009      case oinbody:
3010        break;
3011      case omethodsign:
3012        if (parlev == 0)
3013          {
3014            fvdef = fvnone;
3015            objdef = omethodtag;
3016            linebuffer_setlen (&token_name, len);
3017            strncpy (token_name.buffer, str, len);
3018            token_name.buffer[len] = '\0';
3019            return TRUE;
3020          }
3021        return FALSE;
3022      case omethodcolon:
3023        if (parlev == 0)
3024          objdef = omethodparm;
3025        return FALSE;
3026      case omethodparm:
3027        if (parlev == 0)
3028          {
3029            fvdef = fvnone;
3030            objdef = omethodtag;
3031            linebuffer_setlen (&token_name, token_name.len + len);
3032            strncat (token_name.buffer, str, len);
3033            return TRUE;
3034          }
3035        return FALSE;
3036      case oignore:
3037        if (toktype == st_C_objend)
3038          {
3039            /* Memory leakage here: the string pointed by objtag is
3040               never released, because many tests would be needed to
3041               avoid breaking on incorrect input code.  The amount of
3042               memory leaked here is the sum of the lengths of the
3043               class tags.
3044            free (objtag); */
3045            objdef = onone;
3046          }
3047        return FALSE;
3048      }
3049
3050    /* A function, variable or enum constant? */
3051    switch (toktype)
3052      {
3053      case st_C_extern:
3054        fvextern = TRUE;
3055        switch  (fvdef)
3056          {
3057          case finlist:
3058          case flistseen:
3059          case fignore:
3060          case vignore:
3061            break;
3062          default:
3063            fvdef = fvnone;
3064          }
3065        return FALSE;
3066      case st_C_ignore:
3067        fvextern = FALSE;
3068        fvdef = vignore;
3069        return FALSE;
3070      case st_C_operator:
3071        fvdef = foperator;
3072        *is_func_or_var = TRUE;
3073        return TRUE;
3074      case st_none:
3075        if (constantypedefs
3076            && structdef == snone
3077            && structtype == st_C_enum && bracelev > structbracelev)
3078          return TRUE;           /* enum constant */
3079        switch (fvdef)
3080          {
3081          case fdefunkey:
3082            if (bracelev > 0)
3083              break;
3084            fvdef = fdefunname;  /* GNU macro */
3085            *is_func_or_var = TRUE;
3086            return TRUE;
3087          case fvnone:
3088            switch (typdef)
3089              {
3090              case ttypeseen:
3091                return FALSE;
3092              case tnone:
3093                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3094                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3095                  {
3096                    fvdef = vignore;
3097                    return FALSE;
3098                  }
3099                break;
3100              }
3101           /* FALLTHRU */
3102           case fvnameseen:
3103           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3104             {
3105               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3106                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3107               fvdef = foperator;
3108               *is_func_or_var = TRUE;
3109               return TRUE;
3110             }
3111           if (bracelev > 0 && !instruct)
3112             break;
3113           fvdef = fvnameseen;   /* function or variable */
3114           *is_func_or_var = TRUE;
3115           return TRUE;
3116         }
3117       break;
3118     }
3119
3120   return FALSE;
3121 }
3122
3123 \f
3124 /*
3125  * C_entries often keeps pointers to tokens or lines which are older than
3126  * the line currently read.  By keeping two line buffers, and switching
3127  * them at end of line, it is possible to use those pointers.
3128  */
3129 static struct
3130 {
3131   long linepos;
3132   linebuffer lb;
3133 } lbs[2];
3134
3135 #define current_lb_is_new (newndx == curndx)
3136 #define switch_line_buffers() (curndx = 1 - curndx)
3137
3138 #define curlb (lbs[curndx].lb)
3139 #define newlb (lbs[newndx].lb)
3140 #define curlinepos (lbs[curndx].linepos)
3141 #define newlinepos (lbs[newndx].linepos)
3142
3143 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3144 #define cplpl (c_ext & C_PLPL)
3145 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3146
3147 #define CNL_SAVE_DEFINEDEF()                                            \
3148 do {                                                                    \
3149   curlinepos = charno;                                                  \
3150   readline (&curlb, inf);                                               \
3151   lp = curlb.buffer;                                                    \
3152   quotednl = FALSE;                                                     \
3153   newndx = curndx;                                                      \
3154 } while (0)
3155
3156 #define CNL()                                                           \
3157 do {                                                                    \
3158   CNL_SAVE_DEFINEDEF();                                                 \
3159   if (savetoken.valid)                                                  \
3160     {                                                                   \
3161       token = savetoken;                                                \
3162       savetoken.valid = FALSE;                                          \
3163     }                                                                   \
3164   definedef = dnone;                                                    \
3165 } while (0)
3166
3167
3168 static void
3169 make_C_tag (isfun)
3170      bool isfun;
3171 {
3172   /* This function is never called when token.valid is FALSE, but
3173      we must protect against invalid input or internal errors. */
3174   if (!DEBUG && !token.valid)
3175     return;
3176
3177   if (token.valid)
3178     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3179               token.offset+token.length+1, token.lineno, token.linepos);
3180   else                          /* this case is optimised away if !DEBUG */
3181     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3182               token_name.len + 17, isfun, token.line,
3183               token.offset+token.length+1, token.lineno, token.linepos);
3184
3185   token.valid = FALSE;
3186 }
3187
3188
3189 /*
3190  * C_entries ()
3191  *      This routine finds functions, variables, typedefs,
3192  *      #define's, enum constants and struct/union/enum definitions in
3193  *      C syntax and adds them to the list.
3194  */
3195 static void
3196 C_entries (c_ext, inf)
3197      int c_ext;                 /* extension of C */
3198      FILE *inf;                 /* input file */
3199 {
3200   register char c;              /* latest char read; '\0' for end of line */
3201   register char *lp;            /* pointer one beyond the character `c' */
3202   int curndx, newndx;           /* indices for current and new lb */
3203   register int tokoff;          /* offset in line of start of current token */
3204   register int toklen;          /* length of current token */
3205   char *qualifier;              /* string used to qualify names */
3206   int qlen;                     /* length of qualifier */
3207   int bracelev;                 /* current brace level */
3208   int bracketlev;               /* current bracket level */
3209   int parlev;                   /* current parenthesis level */
3210   int attrparlev;               /* __attribute__ parenthesis level */
3211   int templatelev;              /* current template level */
3212   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3213   bool incomm, inquote, inchar, quotednl, midtoken;
3214   bool yacc_rules;              /* in the rules part of a yacc file */
3215   struct tok savetoken = {0};   /* token saved during preprocessor handling */
3216
3217
3218   linebuffer_init (&lbs[0].lb);
3219   linebuffer_init (&lbs[1].lb);
3220   if (cstack.size == 0)
3221     {
3222       cstack.size = (DEBUG) ? 1 : 4;
3223       cstack.nl = 0;
3224       cstack.cname = xnew (cstack.size, char *);
3225       cstack.bracelev = xnew (cstack.size, int);
3226     }
3227
3228   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3229   curndx = newndx = 0;
3230   lp = curlb.buffer;
3231   *lp = 0;
3232
3233   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3234   structdef = snone; definedef = dnone; objdef = onone;
3235   yacc_rules = FALSE;
3236   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3237   token.valid = savetoken.valid = FALSE;
3238   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3239   if (cjava)
3240     { qualifier = "."; qlen = 1; }
3241   else
3242     { qualifier = "::"; qlen = 2; }
3243
3244
3245   while (!feof (inf))
3246     {
3247       c = *lp++;
3248       if (c == '\\')
3249         {
3250           /* If we are at the end of the line, the next character is a
3251              '\0'; do not skip it, because it is what tells us
3252              to read the next line.  */
3253           if (*lp == '\0')
3254             {
3255               quotednl = TRUE;
3256               continue;
3257             }
3258           lp++;
3259           c = ' ';
3260         }
3261       else if (incomm)
3262         {
3263           switch (c)
3264             {
3265             case '*':
3266               if (*lp == '/')
3267                 {
3268                   c = *lp++;
3269                   incomm = FALSE;
3270                 }
3271               break;
3272             case '\0':
3273               /* Newlines inside comments do not end macro definitions in
3274                  traditional cpp. */
3275               CNL_SAVE_DEFINEDEF ();
3276               break;
3277             }
3278           continue;
3279         }
3280       else if (inquote)
3281         {
3282           switch (c)
3283             {
3284             case '"':
3285               inquote = FALSE;
3286               break;
3287             case '\0':
3288               /* Newlines inside strings do not end macro definitions
3289                  in traditional cpp, even though compilers don't
3290                  usually accept them. */
3291               CNL_SAVE_DEFINEDEF ();
3292               break;
3293             }
3294           continue;
3295         }
3296       else if (inchar)
3297         {
3298           switch (c)
3299             {
3300             case '\0':
3301               /* Hmmm, something went wrong. */
3302               CNL ();
3303               /* FALLTHRU */
3304             case '\'':
3305               inchar = FALSE;
3306               break;
3307             }
3308           continue;
3309         }
3310       else if (bracketlev > 0)
3311         {
3312           switch (c)
3313             {
3314             case ']':
3315               if (--bracketlev > 0)
3316                 continue;
3317               break;
3318             case '\0':
3319               CNL_SAVE_DEFINEDEF ();
3320               break;
3321             }
3322           continue;
3323         }
3324       else switch (c)
3325         {
3326         case '"':
3327           inquote = TRUE;
3328           if (inattribute)
3329             break;
3330           switch (fvdef)
3331             {
3332             case fdefunkey:
3333             case fstartlist:
3334             case finlist:
3335             case fignore:
3336             case vignore:
3337               break;
3338             default:
3339               fvextern = FALSE;
3340               fvdef = fvnone;
3341             }
3342           continue;
3343         case '\'':
3344           inchar = TRUE;
3345           if (inattribute)
3346             break;
3347           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3348             {
3349               fvextern = FALSE;
3350               fvdef = fvnone;
3351             }
3352           continue;
3353         case '/':
3354           if (*lp == '*')
3355             {
3356               lp++;
3357               incomm = TRUE;
3358               continue;
3359             }
3360           else if (/* cplpl && */ *lp == '/')
3361             {
3362               c = '\0';
3363               break;
3364             }
3365           else
3366             break;
3367         case '%':
3368           if ((c_ext & YACC) && *lp == '%')
3369             {
3370               /* Entering or exiting rules section in yacc file. */
3371               lp++;
3372               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3373               typdef = tnone; structdef = snone;
3374               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3375               bracelev = 0;
3376               yacc_rules = !yacc_rules;
3377               continue;
3378             }
3379           else
3380             break;
3381         case '#':
3382           if (definedef == dnone)
3383             {
3384               char *cp;
3385               bool cpptoken = TRUE;
3386
3387               /* Look back on this line.  If all blanks, or nonblanks
3388                  followed by an end of comment, this is a preprocessor
3389                  token. */
3390               for (cp = newlb.buffer; cp < lp-1; cp++)
3391                 if (!iswhite (*cp))
3392                   {
3393                     if (*cp == '*' && *(cp+1) == '/')
3394                       {
3395                         cp++;
3396                         cpptoken = TRUE;
3397                       }
3398                     else
3399                       cpptoken = FALSE;
3400                   }
3401               if (cpptoken)
3402                 definedef = dsharpseen;
3403             } /* if (definedef == dnone) */
3404           continue;
3405         case '[':
3406           bracketlev++;
3407             continue;
3408         } /* switch (c) */
3409
3410
3411       /* Consider token only if some involved conditions are satisfied. */
3412       if (typdef != tignore
3413           && definedef != dignorerest
3414           && fvdef != finlist
3415           && templatelev == 0
3416           && (definedef != dnone
3417               || structdef != scolonseen)
3418           && !inattribute)
3419         {
3420           if (midtoken)
3421             {
3422               if (endtoken (c))
3423                 {
3424                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3425                     /* This handles :: in the middle,
3426                        but not at the beginning of an identifier.
3427                        Also, space-separated :: is not recognised. */
3428                     {
3429                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3430                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3431                       lp += 2;
3432                       toklen += 2;
3433                       c = lp[-1];
3434                       goto still_in_token;
3435                     }
3436                   else
3437                     {
3438                       bool funorvar = FALSE;
3439
3440                       if (yacc_rules
3441                           || consider_token (newlb.buffer + tokoff, toklen, c,
3442                                              &c_ext, bracelev, parlev,
3443                                              &funorvar))
3444                         {
3445                           if (fvdef == foperator)
3446                             {
3447                               char *oldlp = lp;
3448                               lp = skip_spaces (lp-1);
3449                               if (*lp != '\0')
3450                                 lp += 1;
3451                               while (*lp != '\0'
3452                                      && !iswhite (*lp) && *lp != '(')
3453                                 lp += 1;
3454                               c = *lp++;
3455                               toklen += lp - oldlp;
3456                             }
3457                           token.named = FALSE;
3458                           if (!plainc
3459                               && nestlev > 0 && definedef == dnone)
3460                             /* in struct body */
3461                             {
3462                               write_classname (&token_name, qualifier);
3463                               linebuffer_setlen (&token_name,
3464                                                  token_name.len+qlen+toklen);
3465                               strcat (token_name.buffer, qualifier);
3466                               strncat (token_name.buffer,
3467                                        newlb.buffer + tokoff, toklen);
3468                               token.named = TRUE;
3469                             }
3470                           else if (objdef == ocatseen)
3471                             /* Objective C category */
3472                             {
3473                               int len = strlen (objtag) + 2 + toklen;
3474                               linebuffer_setlen (&token_name, len);
3475                               strcpy (token_name.buffer, objtag);
3476                               strcat (token_name.buffer, "(");
3477                               strncat (token_name.buffer,
3478                                        newlb.buffer + tokoff, toklen);
3479                               strcat (token_name.buffer, ")");
3480                               token.named = TRUE;
3481                             }
3482                           else if (objdef == omethodtag
3483                                    || objdef == omethodparm)
3484                             /* Objective C method */
3485                             {
3486                               token.named = TRUE;
3487                             }
3488                           else if (fvdef == fdefunname)
3489                             /* GNU DEFUN and similar macros */
3490                             {
3491                               bool defun = (newlb.buffer[tokoff] == 'F');
3492                               int off = tokoff;
3493                               int len = toklen;
3494
3495                               /* Rewrite the tag so that emacs lisp DEFUNs
3496                                  can be found by their elisp name */
3497                               if (defun)
3498                                 {
3499                                   off += 1;
3500                                   len -= 1;
3501                                 }
3502                               linebuffer_setlen (&token_name, len);
3503                               strncpy (token_name.buffer,
3504                                        newlb.buffer + off, len);
3505                               token_name.buffer[len] = '\0';
3506                               if (defun)
3507                                 while (--len >= 0)
3508                                   if (token_name.buffer[len] == '_')
3509                                     token_name.buffer[len] = '-';
3510                               token.named = defun;
3511                             }
3512                           else
3513                             {
3514                               linebuffer_setlen (&token_name, toklen);
3515                               strncpy (token_name.buffer,
3516                                        newlb.buffer + tokoff, toklen);
3517                               token_name.buffer[toklen] = '\0';
3518                               /* Name macros and members. */
3519                               token.named = (structdef == stagseen
3520                                              || typdef == ttypeseen
3521                                              || typdef == tend
3522                                              || (funorvar
3523                                                  && definedef == dignorerest)
3524                                              || (funorvar
3525                                                  && definedef == dnone
3526                                                  && structdef == snone
3527                                                  && bracelev > 0));
3528                             }
3529                           token.lineno = lineno;
3530                           token.offset = tokoff;
3531                           token.length = toklen;
3532                           token.line = newlb.buffer;
3533                           token.linepos = newlinepos;
3534                           token.valid = TRUE;
3535
3536                           if (definedef == dnone
3537                               && (fvdef == fvnameseen
3538                                   || fvdef == foperator
3539                                   || structdef == stagseen
3540                                   || typdef == tend
3541                                   || typdef == ttypeseen
3542                                   || objdef != onone))
3543                             {
3544                               if (current_lb_is_new)
3545                                 switch_line_buffers ();
3546                             }
3547                           else if (definedef != dnone
3548                                    || fvdef == fdefunname
3549                                    || instruct)
3550                             make_C_tag (funorvar);
3551                         }
3552                       else /* not yacc and consider_token failed */
3553                         {
3554                           if (inattribute && fvdef == fignore)
3555                             {
3556                               /* We have just met __attribute__ after a
3557                                  function parameter list: do not tag the
3558                                  function again. */
3559                               fvdef = fvnone;
3560                             }
3561                         }
3562                       midtoken = FALSE;
3563                     }
3564                 } /* if (endtoken (c)) */
3565               else if (intoken (c))
3566                 still_in_token:
3567                 {
3568                   toklen++;
3569                   continue;
3570                 }
3571             } /* if (midtoken) */
3572           else if (begtoken (c))
3573             {
3574               switch (definedef)
3575                 {
3576                 case dnone:
3577                   switch (fvdef)
3578                     {
3579                     case fstartlist:
3580                       /* This prevents tagging fb in
3581                          void (__attribute__((noreturn)) *fb) (void);
3582                          Fixing this is not easy and not very important. */
3583                       fvdef = finlist;
3584                       continue;
3585                     case flistseen:
3586                       if (plainc || declarations)
3587                         {
3588                           make_C_tag (TRUE); /* a function */
3589                           fvdef = fignore;
3590                         }
3591                       break;
3592                     }
3593                   if (structdef == stagseen && !cjava)
3594                     {
3595                       popclass_above (bracelev);
3596                       structdef = snone;
3597                     }
3598                   break;
3599                 case dsharpseen:
3600                   savetoken = token;
3601                   break;
3602                 }
3603               if (!yacc_rules || lp == newlb.buffer + 1)
3604                 {
3605                   tokoff = lp - 1 - newlb.buffer;
3606                   toklen = 1;
3607                   midtoken = TRUE;
3608                 }
3609               continue;
3610             } /* if (begtoken) */
3611         } /* if must look at token */
3612
3613
3614       /* Detect end of line, colon, comma, semicolon and various braces
3615          after having handled a token.*/
3616       switch (c)
3617         {
3618         case ':':
3619           if (inattribute)
3620             break;
3621           if (yacc_rules && token.offset == 0 && token.valid)
3622             {
3623               make_C_tag (FALSE); /* a yacc function */
3624               break;
3625             }
3626           if (definedef != dnone)
3627             break;
3628           switch (objdef)
3629             {
3630             case  otagseen:
3631               objdef = oignore;
3632               make_C_tag (TRUE); /* an Objective C class */
3633               break;
3634             case omethodtag:
3635             case omethodparm:
3636               objdef = omethodcolon;
3637               linebuffer_setlen (&token_name, token_name.len + 1);
3638               strcat (token_name.buffer, ":");
3639               break;
3640             }
3641           if (structdef == stagseen)
3642             {
3643               structdef = scolonseen;
3644               break;
3645             }
3646           /* Should be useless, but may be work as a safety net. */
3647           if (cplpl && fvdef == flistseen)
3648             {
3649               make_C_tag (TRUE); /* a function */
3650               fvdef = fignore;
3651               break;
3652             }
3653           break;
3654         case ';':
3655           if (definedef != dnone || inattribute)
3656             break;
3657           switch (typdef)
3658             {
3659             case tend:
3660             case ttypeseen:
3661               make_C_tag (FALSE); /* a typedef */
3662               typdef = tnone;
3663               fvdef = fvnone;
3664               break;
3665             case tnone:
3666             case tinbody:
3667             case tignore:
3668               switch (fvdef)
3669                 {
3670                 case fignore:
3671                   if (typdef == tignore || cplpl)
3672                     fvdef = fvnone;
3673                   break;
3674                 case fvnameseen:
3675                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3676                       || (members && instruct))
3677                     make_C_tag (FALSE); /* a variable */
3678                   fvextern = FALSE;
3679                   fvdef = fvnone;
3680                   token.valid = FALSE;
3681                   break;
3682                 case flistseen:
3683                   if ((declarations
3684                        && (cplpl || !instruct)
3685                        && (typdef == tnone || (typdef != tignore && instruct)))
3686                       || (members
3687                           && plainc && instruct))
3688                     make_C_tag (TRUE);  /* a function */
3689                   /* FALLTHRU */
3690                 default:
3691                   fvextern = FALSE;
3692                   fvdef = fvnone;
3693                   if (declarations
3694                        && cplpl && structdef == stagseen)
3695                     make_C_tag (FALSE); /* forward declaration */
3696                   else
3697                     token.valid = FALSE;
3698                 } /* switch (fvdef) */
3699               /* FALLTHRU */
3700             default:
3701               if (!instruct)
3702                 typdef = tnone;
3703             }
3704           if (structdef == stagseen)
3705             structdef = snone;
3706           break;
3707         case ',':
3708           if (definedef != dnone || inattribute)
3709             break;
3710           switch (objdef)
3711             {
3712             case omethodtag:
3713             case omethodparm:
3714               make_C_tag (TRUE); /* an Objective C method */
3715               objdef = oinbody;
3716               break;
3717             }
3718           switch (fvdef)
3719             {
3720             case fdefunkey:
3721             case foperator:
3722             case fstartlist:
3723             case finlist:
3724             case fignore:
3725             case vignore:
3726               break;
3727             case fdefunname:
3728               fvdef = fignore;
3729               break;
3730             case fvnameseen:
3731               if (parlev == 0
3732                   && ((globals
3733                        && bracelev == 0
3734                        && templatelev == 0
3735                        && (!fvextern || declarations))
3736                       || (members && instruct)))
3737                   make_C_tag (FALSE); /* a variable */
3738               break;
3739             case flistseen:
3740               if ((declarations && typdef == tnone && !instruct)
3741                   || (members && typdef != tignore && instruct))
3742                 {
3743                   make_C_tag (TRUE); /* a function */
3744                   fvdef = fvnameseen;
3745                 }
3746               else if (!declarations)
3747                 fvdef = fvnone;
3748               token.valid = FALSE;
3749               break;
3750             default:
3751               fvdef = fvnone;
3752             }
3753           if (structdef == stagseen)
3754             structdef = snone;
3755           break;
3756         case ']':
3757           if (definedef != dnone || inattribute)
3758             break;
3759           if (structdef == stagseen)
3760             structdef = snone;
3761           switch (typdef)
3762             {
3763             case ttypeseen:
3764             case tend:
3765               typdef = tignore;
3766               make_C_tag (FALSE);       /* a typedef */
3767               break;
3768             case tnone:
3769             case tinbody:
3770               switch (fvdef)
3771                 {
3772                 case foperator:
3773                 case finlist:
3774                 case fignore:
3775                 case vignore:
3776                   break;
3777                 case fvnameseen:
3778                   if ((members && bracelev == 1)
3779                       || (globals && bracelev == 0
3780                           && (!fvextern || declarations)))
3781                     make_C_tag (FALSE); /* a variable */
3782                   /* FALLTHRU */
3783                 default:
3784                   fvdef = fvnone;
3785                 }
3786               break;
3787             }
3788           break;
3789         case '(':
3790           if (inattribute)
3791             {
3792               attrparlev++;
3793               break;
3794             }
3795           if (definedef != dnone)
3796             break;
3797           if (objdef == otagseen && parlev == 0)
3798             objdef = oparenseen;
3799           switch (fvdef)
3800             {
3801             case fvnameseen:
3802               if (typdef == ttypeseen
3803                   && *lp != '*'
3804                   && !instruct)
3805                 {
3806                   /* This handles constructs like:
3807                      typedef void OperatorFun (int fun); */
3808                   make_C_tag (FALSE);
3809                   typdef = tignore;
3810                   fvdef = fignore;
3811                   break;
3812                 }
3813               /* FALLTHRU */
3814             case foperator:
3815               fvdef = fstartlist;
3816               break;
3817             case flistseen:
3818               fvdef = finlist;
3819               break;
3820             }
3821           parlev++;
3822           break;
3823         case ')':
3824           if (inattribute)
3825             {
3826               if (--attrparlev == 0)
3827                 inattribute = FALSE;
3828               break;
3829             }
3830           if (definedef != dnone)
3831             break;
3832           if (objdef == ocatseen && parlev == 1)
3833             {
3834               make_C_tag (TRUE); /* an Objective C category */
3835               objdef = oignore;
3836             }
3837           if (--parlev == 0)
3838             {
3839               switch (fvdef)
3840                 {
3841                 case fstartlist:
3842                 case finlist:
3843                   fvdef = flistseen;
3844                   break;
3845                 }
3846               if (!instruct
3847                   && (typdef == tend
3848                       || typdef == ttypeseen))
3849                 {
3850                   typdef = tignore;
3851                   make_C_tag (FALSE); /* a typedef */
3852                 }
3853             }
3854           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3855             parlev = 0;
3856           break;
3857         case '{':
3858           if (definedef != dnone)
3859             break;
3860           if (typdef == ttypeseen)
3861             {
3862               /* Whenever typdef is set to tinbody (currently only
3863                  here), typdefbracelev should be set to bracelev. */
3864               typdef = tinbody;
3865               typdefbracelev = bracelev;
3866             }
3867           switch (fvdef)
3868             {
3869             case flistseen:
3870               make_C_tag (TRUE);    /* a function */
3871               /* FALLTHRU */
3872             case fignore:
3873               fvdef = fvnone;
3874               break;
3875             case fvnone:
3876               switch (objdef)
3877                 {
3878                 case otagseen:
3879                   make_C_tag (TRUE); /* an Objective C class */
3880                   objdef = oignore;
3881                   break;
3882                 case omethodtag:
3883                 case omethodparm:
3884                   make_C_tag (TRUE); /* an Objective C method */
3885                   objdef = oinbody;
3886                   break;
3887                 default:
3888                   /* Neutralize `extern "C" {' grot. */
3889                   if (bracelev == 0 && structdef == snone && nestlev == 0
3890                       && typdef == tnone)
3891                     bracelev = -1;
3892                 }
3893               break;
3894             }
3895           switch (structdef)
3896             {
3897             case skeyseen:         /* unnamed struct */
3898               pushclass_above (bracelev, NULL, 0);
3899               structdef = snone;
3900               break;
3901             case stagseen:         /* named struct or enum */
3902             case scolonseen:       /* a class */
3903               pushclass_above (bracelev,token.line+token.offset, token.length);
3904               structdef = snone;
3905               make_C_tag (FALSE);  /* a struct or enum */
3906               break;
3907             }
3908           bracelev++;
3909           break;
3910         case '*':
3911           if (definedef != dnone)
3912             break;
3913           if (fvdef == fstartlist)
3914             {
3915               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3916               token.valid = FALSE;
3917             }
3918           break;
3919         case '}':
3920           if (definedef != dnone)
3921             break;
3922           if (!ignoreindent && lp == newlb.buffer + 1)
3923             {
3924               if (bracelev != 0)
3925                 token.valid = FALSE;
3926               bracelev = 0;     /* reset brace level if first column */
3927               parlev = 0;       /* also reset paren level, just in case... */
3928             }
3929           else if (bracelev > 0)
3930             bracelev--;
3931           else
3932             token.valid = FALSE; /* something gone amiss, token unreliable */
3933           popclass_above (bracelev);
3934           structdef = snone;
3935           /* Only if typdef == tinbody is typdefbracelev significant. */
3936           if (typdef == tinbody && bracelev <= typdefbracelev)
3937             {
3938               assert (bracelev == typdefbracelev);
3939               typdef = tend;
3940             }
3941           break;
3942         case '=':
3943           if (definedef != dnone)
3944             break;
3945           switch (fvdef)
3946             {
3947             case foperator:
3948             case finlist:
3949             case fignore:
3950             case vignore:
3951               break;
3952             case fvnameseen:
3953               if ((members && bracelev == 1)
3954                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3955                 make_C_tag (FALSE); /* a variable */
3956               /* FALLTHRU */
3957             default:
3958               fvdef = vignore;
3959             }
3960           break;
3961         case '<':
3962           if (cplpl
3963               && (structdef == stagseen || fvdef == fvnameseen))
3964             {
3965               templatelev++;
3966               break;
3967             }
3968           goto resetfvdef;
3969         case '>':
3970           if (templatelev > 0)
3971             {
3972               templatelev--;
3973               break;
3974             }
3975           goto resetfvdef;
3976         case '+':
3977         case '-':
3978           if (objdef == oinbody && bracelev == 0)
3979             {
3980               objdef = omethodsign;
3981               break;
3982             }
3983           /* FALLTHRU */
3984         resetfvdef:
3985         case '#': case '~': case '&': case '%': case '/':
3986         case '|': case '^': case '!': case '.': case '?':
3987           if (definedef != dnone)
3988             break;
3989           /* These surely cannot follow a function tag in C. */
3990           switch (fvdef)
3991             {
3992             case foperator:
3993             case finlist:
3994             case fignore:
3995             case vignore:
3996               break;
3997             default:
3998               fvdef = fvnone;
3999             }
4000           break;
4001         case '\0':
4002           if (objdef == otagseen)
4003             {
4004               make_C_tag (TRUE); /* an Objective C class */
4005               objdef = oignore;
4006             }
4007           /* If a macro spans multiple lines don't reset its state. */
4008           if (quotednl)
4009             CNL_SAVE_DEFINEDEF ();
4010           else
4011             CNL ();
4012           break;
4013         } /* switch (c) */
4014
4015     } /* while not eof */
4016
4017   free (lbs[0].lb.buffer);
4018   free (lbs[1].lb.buffer);
4019 }
4020
4021 /*
4022  * Process either a C++ file or a C file depending on the setting
4023  * of a global flag.
4024  */
4025 static void
4026 default_C_entries (inf)
4027      FILE *inf;
4028 {
4029   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4030 }
4031
4032 /* Always do plain C. */
4033 static void
4034 plain_C_entries (inf)
4035      FILE *inf;
4036 {
4037   C_entries (0, inf);
4038 }
4039
4040 /* Always do C++. */
4041 static void
4042 Cplusplus_entries (inf)
4043      FILE *inf;
4044 {
4045   C_entries (C_PLPL, inf);
4046 }
4047
4048 /* Always do Java. */
4049 static void
4050 Cjava_entries (inf)
4051      FILE *inf;
4052 {
4053   C_entries (C_JAVA, inf);
4054 }
4055
4056 /* Always do C*. */
4057 static void
4058 Cstar_entries (inf)
4059      FILE *inf;
4060 {
4061   C_entries (C_STAR, inf);
4062 }
4063
4064 /* Always do Yacc. */
4065 static void
4066 Yacc_entries (inf)
4067      FILE *inf;
4068 {
4069   C_entries (YACC, inf);
4070 }
4071
4072 \f
4073 /* Useful macros. */
4074 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4075   for (;                        /* loop initialization */               \
4076        !feof (file_pointer)     /* loop test */                         \
4077        &&                       /* instructions at start of loop */     \
4078           (readline (&line_buffer, file_pointer),                       \
4079            char_pointer = line_buffer.buffer,                           \
4080            TRUE);                                                       \
4081       )
4082
4083 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4084   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4085    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4086    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4087    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4088
4089 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4090 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4091   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4092    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4093    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4094
4095 /*
4096  * Read a file, but do no processing.  This is used to do regexp
4097  * matching on files that have no language defined.
4098  */
4099 static void
4100 just_read_file (inf)
4101      FILE *inf;
4102 {
4103   register char *dummy;
4104
4105   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4106     continue;
4107 }
4108
4109 \f
4110 /* Fortran parsing */
4111
4112 static void F_takeprec __P((void));
4113 static void F_getit __P((FILE *));
4114
4115 static void
4116 F_takeprec ()
4117 {
4118   dbp = skip_spaces (dbp);
4119   if (*dbp != '*')
4120     return;
4121   dbp++;
4122   dbp = skip_spaces (dbp);
4123   if (strneq (dbp, "(*)", 3))
4124     {
4125       dbp += 3;
4126       return;
4127     }
4128   if (!ISDIGIT (*dbp))
4129     {
4130       --dbp;                    /* force failure */
4131       return;
4132     }
4133   do
4134     dbp++;
4135   while (ISDIGIT (*dbp));
4136 }
4137
4138 static void
4139 F_getit (inf)
4140      FILE *inf;
4141 {
4142   register char *cp;
4143
4144   dbp = skip_spaces (dbp);
4145   if (*dbp == '\0')
4146     {
4147       readline (&lb, inf);
4148       dbp = lb.buffer;
4149       if (dbp[5] != '&')
4150         return;
4151       dbp += 6;
4152       dbp = skip_spaces (dbp);
4153     }
4154   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4155     return;
4156   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4157     continue;
4158   make_tag (dbp, cp-dbp, TRUE,
4159             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4160 }
4161
4162
4163 static void
4164 Fortran_functions (inf)
4165      FILE *inf;
4166 {
4167   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4168     {
4169       if (*dbp == '%')
4170         dbp++;                  /* Ratfor escape to fortran */
4171       dbp = skip_spaces (dbp);
4172       if (*dbp == '\0')
4173         continue;
4174       switch (lowcase (*dbp))
4175         {
4176         case 'i':
4177           if (nocase_tail ("integer"))
4178             F_takeprec ();
4179           break;
4180         case 'r':
4181           if (nocase_tail ("real"))
4182             F_takeprec ();
4183           break;
4184         case 'l':
4185           if (nocase_tail ("logical"))
4186             F_takeprec ();
4187           break;
4188         case 'c':
4189           if (nocase_tail ("complex") || nocase_tail ("character"))
4190             F_takeprec ();
4191           break;
4192         case 'd':
4193           if (nocase_tail ("double"))
4194             {
4195               dbp = skip_spaces (dbp);
4196               if (*dbp == '\0')
4197                 continue;
4198               if (nocase_tail ("precision"))
4199                 break;
4200               continue;
4201             }
4202           break;
4203         }
4204       dbp = skip_spaces (dbp);
4205       if (*dbp == '\0')
4206         continue;
4207       switch (lowcase (*dbp))
4208         {
4209         case 'f':
4210           if (nocase_tail ("function"))
4211             F_getit (inf);
4212           continue;
4213         case 's':
4214           if (nocase_tail ("subroutine"))
4215             F_getit (inf);
4216           continue;
4217         case 'e':
4218           if (nocase_tail ("entry"))
4219             F_getit (inf);
4220           continue;
4221         case 'b':
4222           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4223             {
4224               dbp = skip_spaces (dbp);
4225               if (*dbp == '\0') /* assume un-named */
4226                 make_tag ("blockdata", 9, TRUE,
4227                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4228               else
4229                 F_getit (inf);  /* look for name */
4230             }
4231           continue;
4232         }
4233     }
4234 }
4235
4236 \f
4237 /*
4238  * Ada parsing
4239  * Original code by
4240  * Philippe Waroquiers (1998)
4241  */
4242
4243 static void Ada_getit __P((FILE *, char *));
4244
4245 /* Once we are positioned after an "interesting" keyword, let's get
4246    the real tag value necessary. */
4247 static void
4248 Ada_getit (inf, name_qualifier)
4249      FILE *inf;
4250      char *name_qualifier;
4251 {
4252   register char *cp;
4253   char *name;
4254   char c;
4255
4256   while (!feof (inf))
4257     {
4258       dbp = skip_spaces (dbp);
4259       if (*dbp == '\0'
4260           || (dbp[0] == '-' && dbp[1] == '-'))
4261         {
4262           readline (&lb, inf);
4263           dbp = lb.buffer;
4264         }
4265       switch (lowcase(*dbp))
4266         {
4267         case 'b':
4268           if (nocase_tail ("body"))
4269             {
4270               /* Skipping body of   procedure body   or   package body or ....
4271                  resetting qualifier to body instead of spec. */
4272               name_qualifier = "/b";
4273               continue;
4274             }
4275           break;
4276         case 't':
4277           /* Skipping type of   task type   or   protected type ... */
4278           if (nocase_tail ("type"))
4279             continue;
4280           break;
4281         }
4282       if (*dbp == '"')
4283         {
4284           dbp += 1;
4285           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4286             continue;
4287         }
4288       else
4289         {
4290           dbp = skip_spaces (dbp);
4291           for (cp = dbp;
4292                (*cp != '\0'
4293                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4294                cp++)
4295             continue;
4296           if (cp == dbp)
4297             return;
4298         }
4299       c = *cp;
4300       *cp = '\0';
4301       name = concat (dbp, name_qualifier, "");
4302       *cp = c;
4303       make_tag (name, strlen (name), TRUE,
4304                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4305       free (name);
4306       if (c == '"')
4307         dbp = cp + 1;
4308       return;
4309     }
4310 }
4311
4312 static void
4313 Ada_funcs (inf)
4314      FILE *inf;
4315 {
4316   bool inquote = FALSE;
4317   bool skip_till_semicolumn = FALSE;
4318
4319   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4320     {
4321       while (*dbp != '\0')
4322         {
4323           /* Skip a string i.e. "abcd". */
4324           if (inquote || (*dbp == '"'))
4325             {
4326               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4327               if (dbp != NULL)
4328                 {
4329                   inquote = FALSE;
4330                   dbp += 1;
4331                   continue;     /* advance char */
4332                 }
4333               else
4334                 {
4335                   inquote = TRUE;
4336                   break;        /* advance line */
4337                 }
4338             }
4339
4340           /* Skip comments. */
4341           if (dbp[0] == '-' && dbp[1] == '-')
4342             break;              /* advance line */
4343
4344           /* Skip character enclosed in single quote i.e. 'a'
4345              and skip single quote starting an attribute i.e. 'Image. */
4346           if (*dbp == '\'')
4347             {
4348               dbp++ ;
4349               if (*dbp != '\0')
4350                 dbp++;
4351               continue;
4352             }
4353
4354           if (skip_till_semicolumn)
4355             {
4356               if (*dbp == ';')
4357                 skip_till_semicolumn = FALSE;
4358               dbp++;
4359               continue;         /* advance char */
4360             }
4361
4362           /* Search for beginning of a token.  */
4363           if (!begtoken (*dbp))
4364             {
4365               dbp++;
4366               continue;         /* advance char */
4367             }
4368
4369           /* We are at the beginning of a token. */
4370           switch (lowcase(*dbp))
4371             {
4372             case 'f':
4373               if (!packages_only && nocase_tail ("function"))
4374                 Ada_getit (inf, "/f");
4375               else
4376                 break;          /* from switch */
4377               continue;         /* advance char */
4378             case 'p':
4379               if (!packages_only && nocase_tail ("procedure"))
4380                 Ada_getit (inf, "/p");
4381               else if (nocase_tail ("package"))
4382                 Ada_getit (inf, "/s");
4383               else if (nocase_tail ("protected")) /* protected type */
4384                 Ada_getit (inf, "/t");
4385               else
4386                 break;          /* from switch */
4387               continue;         /* advance char */
4388
4389             case 'u':
4390               if (typedefs && !packages_only && nocase_tail ("use"))
4391                 {
4392                   /* when tagging types, avoid tagging  use type Pack.Typename;
4393                      for this, we will skip everything till a ; */
4394                   skip_till_semicolumn = TRUE;
4395                   continue;     /* advance char */
4396                 }
4397
4398             case 't':
4399               if (!packages_only && nocase_tail ("task"))
4400                 Ada_getit (inf, "/k");
4401               else if (typedefs && !packages_only && nocase_tail ("type"))
4402                 {
4403                   Ada_getit (inf, "/t");
4404                   while (*dbp != '\0')
4405                     dbp += 1;
4406                 }
4407               else
4408                 break;          /* from switch */
4409               continue;         /* advance char */
4410             }
4411
4412           /* Look for the end of the token. */
4413           while (!endtoken (*dbp))
4414             dbp++;
4415
4416         } /* advance char */
4417     } /* advance line */
4418 }
4419
4420 \f
4421 /*
4422  * Unix and microcontroller assembly tag handling
4423  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4424  * Idea by Bob Weiner, Motorola Inc. (1994)
4425  */
4426 static void
4427 Asm_labels (inf)
4428      FILE *inf;
4429 {
4430   register char *cp;
4431
4432   LOOP_ON_INPUT_LINES (inf, lb, cp)
4433     {
4434       /* If first char is alphabetic or one of [_.$], test for colon
4435          following identifier. */
4436       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4437         {
4438           /* Read past label. */
4439           cp++;
4440           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4441             cp++;
4442           if (*cp == ':' || iswhite (*cp))
4443             /* Found end of label, so copy it and add it to the table. */
4444             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4445                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4446         }
4447     }
4448 }
4449
4450 \f
4451 /*
4452  * Perl support
4453  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4454  * Perl variable names: /^(my|local).../
4455  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4456  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4457  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4458  */
4459 static void
4460 Perl_functions (inf)
4461      FILE *inf;
4462 {
4463   char *package = savestr ("main"); /* current package name */
4464   register char *cp;
4465
4466   LOOP_ON_INPUT_LINES (inf, lb, cp)
4467     {
4468       skip_spaces(cp);
4469
4470       if (LOOKING_AT (cp, "package"))
4471         {
4472           free (package);
4473           get_tag (cp, &package);
4474         }
4475       else if (LOOKING_AT (cp, "sub"))
4476         {
4477           char *pos;
4478           char *sp = cp;
4479
4480           while (!notinname (*cp))
4481             cp++;
4482           if (cp == sp)
4483             continue;           /* nothing found */
4484           if ((pos = etags_strchr (sp, ':')) != NULL
4485               && pos < cp && pos[1] == ':')
4486             /* The name is already qualified. */
4487             make_tag (sp, cp - sp, TRUE,
4488                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4489           else
4490             /* Qualify it. */
4491             {
4492               char savechar, *name;
4493
4494               savechar = *cp;
4495               *cp = '\0';
4496               name = concat (package, "::", sp);
4497               *cp = savechar;
4498               make_tag (name, strlen(name), TRUE,
4499                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4500               free (name);
4501             }
4502         }
4503        else if (globals)        /* only if we are tagging global vars */
4504         {
4505           /* Skip a qualifier, if any. */
4506           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4507           /* After "my" or "local", but before any following paren or space. */
4508           char *varstart = cp;
4509
4510           if (qual              /* should this be removed?  If yes, how? */
4511               && (*cp == '$' || *cp == '@' || *cp == '%'))
4512             {
4513               varstart += 1;
4514               do
4515                 cp++;
4516               while (ISALNUM (*cp) || *cp == '_');
4517             }
4518           else if (qual)
4519             {
4520               /* Should be examining a variable list at this point;
4521                  could insist on seeing an open parenthesis. */
4522               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4523                 cp++;
4524             }
4525           else
4526             continue;
4527
4528           make_tag (varstart, cp - varstart, FALSE,
4529                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4530         }
4531     }
4532   free (package);
4533 }
4534
4535
4536 /*
4537  * Python support
4538  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4539  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4540  * More ideas by seb bacon <seb@jamkit.com> (2002)
4541  */
4542 static void
4543 Python_functions (inf)
4544      FILE *inf;
4545 {
4546   register char *cp;
4547
4548   LOOP_ON_INPUT_LINES (inf, lb, cp)
4549     {
4550       cp = skip_spaces (cp);
4551       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4552         {
4553           char *name = cp;
4554           while (!notinname (*cp) && *cp != ':')
4555             cp++;
4556           make_tag (name, cp - name, TRUE,
4557                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4558         }
4559     }
4560 }
4561
4562 \f
4563 /*
4564  * PHP support
4565  * Look for:
4566  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4567  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4568  *  - /^[ \t]*define\(\"[^\"]+/
4569  * Only with --members:
4570  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4571  * Idea by Diez B. Roggisch (2001)
4572  */
4573 static void
4574 PHP_functions (inf)
4575      FILE *inf;
4576 {
4577   register char *cp, *name;
4578   bool search_identifier = FALSE;
4579
4580   LOOP_ON_INPUT_LINES (inf, lb, cp)
4581     {
4582       cp = skip_spaces (cp);
4583       name = cp;
4584       if (search_identifier
4585           && *cp != '\0')
4586         {
4587           while (!notinname (*cp))
4588             cp++;
4589           make_tag (name, cp - name, TRUE,
4590                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4591           search_identifier = FALSE;
4592         }
4593       else if (LOOKING_AT (cp, "function"))
4594         {
4595           if(*cp == '&')
4596             cp = skip_spaces (cp+1);
4597           if(*cp != '\0')
4598             {
4599               name = cp;
4600               while (!notinname (*cp))
4601                 cp++;
4602               make_tag (name, cp - name, TRUE,
4603                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4604             }
4605           else
4606             search_identifier = TRUE;
4607         }
4608       else if (LOOKING_AT (cp, "class"))
4609         {
4610           if (*cp != '\0')
4611             {
4612               name = cp;
4613               while (*cp != '\0' && !iswhite (*cp))
4614                 cp++;
4615               make_tag (name, cp - name, FALSE,
4616                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4617             }
4618           else
4619             search_identifier = TRUE;
4620         }
4621       else if (strneq (cp, "define", 6)
4622                && (cp = skip_spaces (cp+6))
4623                && *cp++ == '('
4624                && (*cp == '"' || *cp == '\''))
4625         {
4626           char quote = *cp++;
4627           name = cp;
4628           while (*cp != quote && *cp != '\0')
4629             cp++;
4630           make_tag (name, cp - name, FALSE,
4631                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4632         }
4633       else if (members
4634                && LOOKING_AT (cp, "var")
4635                && *cp == '$')
4636         {
4637           name = cp;
4638           while (!notinname(*cp))
4639             cp++;
4640           make_tag (name, cp - name, FALSE,
4641                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4642         }
4643     }
4644 }
4645
4646 \f
4647 /*
4648  * Cobol tag functions
4649  * We could look for anything that could be a paragraph name.
4650  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4651  * Idea by Corny de Souza (1993)
4652  */
4653 static void
4654 Cobol_paragraphs (inf)
4655      FILE *inf;
4656 {
4657   register char *bp, *ep;
4658
4659   LOOP_ON_INPUT_LINES (inf, lb, bp)
4660     {
4661       if (lb.len < 9)
4662         continue;
4663       bp += 8;
4664
4665       /* If eoln, compiler option or comment ignore whole line. */
4666       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4667         continue;
4668
4669       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4670         continue;
4671       if (*ep++ == '.')
4672         make_tag (bp, ep - bp, TRUE,
4673                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4674     }
4675 }
4676
4677 \f
4678 /*
4679  * Makefile support
4680  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4681  */
4682 static void
4683 Makefile_targets (inf)
4684      FILE *inf;
4685 {
4686   register char *bp;
4687
4688   LOOP_ON_INPUT_LINES (inf, lb, bp)
4689     {
4690       if (*bp == '\t' || *bp == '#')
4691         continue;
4692       while (*bp != '\0' && *bp != '=' && *bp != ':')
4693         bp++;
4694       if (*bp == ':' || (globals && *bp == '='))
4695         {
4696           /* We should detect if there is more than one tag, but we do not.
4697              We just skip initial and final spaces. */
4698           char * namestart = skip_spaces (lb.buffer);
4699           while (--bp > namestart)
4700             if (!notinname (*bp))
4701               break;
4702           make_tag (namestart, bp - namestart + 1, TRUE,
4703                     lb.buffer, bp - lb.buffer + 2, lineno, linecharno);
4704         }
4705     }
4706 }
4707
4708 \f
4709 /*
4710  * Pascal parsing
4711  * Original code by Mosur K. Mohan (1989)
4712  *
4713  *  Locates tags for procedures & functions.  Doesn't do any type- or
4714  *  var-definitions.  It does look for the keyword "extern" or
4715  *  "forward" immediately following the procedure statement; if found,
4716  *  the tag is skipped.
4717  */
4718 static void
4719 Pascal_functions (inf)
4720      FILE *inf;
4721 {
4722   linebuffer tline;             /* mostly copied from C_entries */
4723   long save_lcno;
4724   int save_lineno, namelen, taglen;
4725   char c, *name;
4726
4727   bool                          /* each of these flags is TRUE iff: */
4728     incomment,                  /* point is inside a comment */
4729     inquote,                    /* point is inside '..' string */
4730     get_tagname,                /* point is after PROCEDURE/FUNCTION
4731                                    keyword, so next item = potential tag */
4732     found_tag,                  /* point is after a potential tag */
4733     inparms,                    /* point is within parameter-list */
4734     verify_tag;                 /* point has passed the parm-list, so the
4735                                    next token will determine whether this
4736                                    is a FORWARD/EXTERN to be ignored, or
4737                                    whether it is a real tag */
4738
4739   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4740   name = NULL;                  /* keep compiler quiet */
4741   dbp = lb.buffer;
4742   *dbp = '\0';
4743   linebuffer_init (&tline);
4744
4745   incomment = inquote = FALSE;
4746   found_tag = FALSE;            /* have a proc name; check if extern */
4747   get_tagname = FALSE;          /* found "procedure" keyword         */
4748   inparms = FALSE;              /* found '(' after "proc"            */
4749   verify_tag = FALSE;           /* check if "extern" is ahead        */
4750
4751
4752   while (!feof (inf))           /* long main loop to get next char */
4753     {
4754       c = *dbp++;
4755       if (c == '\0')            /* if end of line */
4756         {
4757           readline (&lb, inf);
4758           dbp = lb.buffer;
4759           if (*dbp == '\0')
4760             continue;
4761           if (!((found_tag && verify_tag)
4762                 || get_tagname))
4763             c = *dbp++;         /* only if don't need *dbp pointing
4764                                    to the beginning of the name of
4765                                    the procedure or function */
4766         }
4767       if (incomment)
4768         {
4769           if (c == '}')         /* within { } comments */
4770             incomment = FALSE;
4771           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4772             {
4773               dbp++;
4774               incomment = FALSE;
4775             }
4776           continue;
4777         }
4778       else if (inquote)
4779         {
4780           if (c == '\'')
4781             inquote = FALSE;
4782           continue;
4783         }
4784       else
4785         switch (c)
4786           {
4787           case '\'':
4788             inquote = TRUE;     /* found first quote */
4789             continue;
4790           case '{':             /* found open { comment */
4791             incomment = TRUE;
4792             continue;
4793           case '(':
4794             if (*dbp == '*')    /* found open (* comment */
4795               {
4796                 incomment = TRUE;
4797                 dbp++;
4798               }
4799             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4800               inparms = TRUE;
4801             continue;
4802           case ')':             /* end of parms list */
4803             if (inparms)
4804               inparms = FALSE;
4805             continue;
4806           case ';':
4807             if (found_tag && !inparms) /* end of proc or fn stmt */
4808               {
4809                 verify_tag = TRUE;
4810                 break;
4811               }
4812             continue;
4813           }
4814       if (found_tag && verify_tag && (*dbp != ' '))
4815         {
4816           /* Check if this is an "extern" declaration. */
4817           if (*dbp == '\0')
4818             continue;
4819           if (lowcase (*dbp == 'e'))
4820             {
4821               if (nocase_tail ("extern")) /* superfluous, really! */
4822                 {
4823                   found_tag = FALSE;
4824                   verify_tag = FALSE;
4825                 }
4826             }
4827           else if (lowcase (*dbp) == 'f')
4828             {
4829               if (nocase_tail ("forward")) /* check for forward reference */
4830                 {
4831                   found_tag = FALSE;
4832                   verify_tag = FALSE;
4833                 }
4834             }
4835           if (found_tag && verify_tag) /* not external proc, so make tag */
4836             {
4837               found_tag = FALSE;
4838               verify_tag = FALSE;
4839               make_tag (name, namelen, TRUE,
4840                         tline.buffer, taglen, save_lineno, save_lcno);
4841               continue;
4842             }
4843         }
4844       if (get_tagname)          /* grab name of proc or fn */
4845         {
4846           char *cp;
4847
4848           if (*dbp == '\0')
4849             continue;
4850
4851           /* Find block name. */
4852           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4853             continue;
4854
4855           /* Save all values for later tagging. */
4856           linebuffer_setlen (&tline, lb.len);
4857           strcpy (tline.buffer, lb.buffer);
4858           save_lineno = lineno;
4859           save_lcno = linecharno;
4860           name = tline.buffer + (dbp - lb.buffer);
4861           namelen = cp - dbp;
4862           taglen = cp - lb.buffer + 1;
4863
4864           dbp = cp;             /* set dbp to e-o-token */
4865           get_tagname = FALSE;
4866           found_tag = TRUE;
4867           continue;
4868
4869           /* And proceed to check for "extern". */
4870         }
4871       else if (!incomment && !inquote && !found_tag)
4872         {
4873           /* Check for proc/fn keywords. */
4874           switch (lowcase (c))
4875             {
4876             case 'p':
4877               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4878                 get_tagname = TRUE;
4879               continue;
4880             case 'f':
4881               if (nocase_tail ("unction"))
4882                 get_tagname = TRUE;
4883               continue;
4884             }
4885         }
4886     } /* while not eof */
4887
4888   free (tline.buffer);
4889 }
4890
4891 \f
4892 /*
4893  * Lisp tag functions
4894  *  look for (def or (DEF, quote or QUOTE
4895  */
4896
4897 static void L_getit __P((void));
4898
4899 static void
4900 L_getit ()
4901 {
4902   if (*dbp == '\'')             /* Skip prefix quote */
4903     dbp++;
4904   else if (*dbp == '(')
4905   {
4906     dbp++;
4907     /* Try to skip "(quote " */
4908     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4909       /* Ok, then skip "(" before name in (defstruct (foo)) */
4910       dbp = skip_spaces (dbp);
4911   }
4912   get_tag (dbp, NULL);
4913 }
4914
4915 static void
4916 Lisp_functions (inf)
4917      FILE *inf;
4918 {
4919   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4920     {
4921       if (dbp[0] != '(')
4922         continue;
4923
4924       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4925         {
4926           dbp = skip_non_spaces (dbp);
4927           dbp = skip_spaces (dbp);
4928           L_getit ();
4929         }
4930       else
4931         {
4932           /* Check for (foo::defmumble name-defined ... */
4933           do
4934             dbp++;
4935           while (!notinname (*dbp) && *dbp != ':');
4936           if (*dbp == ':')
4937             {
4938               do
4939                 dbp++;
4940               while (*dbp == ':');
4941
4942               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4943                 {
4944                   dbp = skip_non_spaces (dbp);
4945                   dbp = skip_spaces (dbp);
4946                   L_getit ();
4947                 }
4948             }
4949         }
4950     }
4951 }
4952
4953 \f
4954 /*
4955  * Lua script language parsing
4956  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4957  *
4958  *  "function" and "local function" are tags if they start at column 1.
4959  */
4960 static void
4961 Lua_functions (inf)
4962      FILE *inf;
4963 {
4964   register char *bp;
4965
4966   LOOP_ON_INPUT_LINES (inf, lb, bp)
4967     {
4968       if (bp[0] != 'f' && bp[0] != 'l')
4969         continue;
4970
4971       (void)LOOKING_AT (bp, "local"); /* skip possible "local" */
4972
4973       if (LOOKING_AT (bp, "function"))
4974         get_tag (bp, NULL);
4975     }
4976 }
4977
4978 \f
4979 /*
4980  * Postscript tags
4981  * Just look for lines where the first character is '/'
4982  * Also look at "defineps" for PSWrap
4983  * Ideas by:
4984  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4985  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4986  */
4987 static void
4988 PS_functions (inf)
4989      FILE *inf;
4990 {
4991   register char *bp, *ep;
4992
4993   LOOP_ON_INPUT_LINES (inf, lb, bp)
4994     {
4995       if (bp[0] == '/')
4996         {
4997           for (ep = bp+1;
4998                *ep != '\0' && *ep != ' ' && *ep != '{';
4999                ep++)
5000             continue;
5001           make_tag (bp, ep - bp, TRUE,
5002                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5003         }
5004       else if (LOOKING_AT (bp, "defineps"))
5005         get_tag (bp, NULL);
5006     }
5007 }
5008
5009 \f
5010 /*
5011  * Forth tags
5012  * Ignore anything after \ followed by space or in ( )
5013  * Look for words defined by :
5014  * Look for constant, code, create, defer, value, and variable
5015  * OBP extensions:  Look for buffer:, field,
5016  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5017  */
5018 static void
5019 Forth_words (inf)
5020      FILE *inf;
5021 {
5022   register char *bp;
5023
5024   LOOP_ON_INPUT_LINES (inf, lb, bp)
5025     while ((bp = skip_spaces (bp))[0] != '\0')
5026       if (bp[0] == '\\' && iswhite(bp[1]))
5027         break;                  /* read next line */
5028       else if (bp[0] == '(' && iswhite(bp[1]))
5029         do                      /* skip to ) or eol */
5030           bp++;
5031         while (*bp != ')' && *bp != '\0');
5032       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5033                || LOOKING_AT_NOCASE (bp, "constant")
5034                || LOOKING_AT_NOCASE (bp, "code")
5035                || LOOKING_AT_NOCASE (bp, "create")
5036                || LOOKING_AT_NOCASE (bp, "defer")
5037                || LOOKING_AT_NOCASE (bp, "value")
5038                || LOOKING_AT_NOCASE (bp, "variable")
5039                || LOOKING_AT_NOCASE (bp, "buffer:")
5040                || LOOKING_AT_NOCASE (bp, "field"))
5041         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5042       else
5043         bp = skip_non_spaces (bp);
5044 }
5045
5046 \f
5047 /*
5048  * Scheme tag functions
5049  * look for (def... xyzzy
5050  *          (def... (xyzzy
5051  *          (def ... ((...(xyzzy ....
5052  *          (set! xyzzy
5053  * Original code by Ken Haase (1985?)
5054  */
5055 static void
5056 Scheme_functions (inf)
5057      FILE *inf;
5058 {
5059   register char *bp;
5060
5061   LOOP_ON_INPUT_LINES (inf, lb, bp)
5062     {
5063       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5064         {
5065           bp = skip_non_spaces (bp+4);
5066           /* Skip over open parens and white space */
5067           while (notinname (*bp))
5068             bp++;
5069           get_tag (bp, NULL);
5070         }
5071       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5072         get_tag (bp, NULL);
5073     }
5074 }
5075
5076 \f
5077 /* Find tags in TeX and LaTeX input files.  */
5078
5079 /* TEX_toktab is a table of TeX control sequences that define tags.
5080  * Each entry records one such control sequence.
5081  *
5082  * Original code from who knows whom.
5083  * Ideas by:
5084  *   Stefan Monnier (2002)
5085  */
5086
5087 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5088
5089 /* Default set of control sequences to put into TEX_toktab.
5090    The value of environment var TEXTAGS is prepended to this.  */
5091 static char *TEX_defenv = "\
5092 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5093 :part:appendix:entry:index:def\
5094 :newcommand:renewcommand:newenvironment:renewenvironment";
5095
5096 static void TEX_mode __P((FILE *));
5097 static void TEX_decode_env __P((char *, char *));
5098
5099 static char TEX_esc = '\\';
5100 static char TEX_opgrp = '{';
5101 static char TEX_clgrp = '}';
5102
5103 /*
5104  * TeX/LaTeX scanning loop.
5105  */
5106 static void
5107 TeX_commands (inf)
5108      FILE *inf;
5109 {
5110   char *cp;
5111   linebuffer *key;
5112
5113   /* Select either \ or ! as escape character.  */
5114   TEX_mode (inf);
5115
5116   /* Initialize token table once from environment. */
5117   if (TEX_toktab == NULL)
5118     TEX_decode_env ("TEXTAGS", TEX_defenv);
5119
5120   LOOP_ON_INPUT_LINES (inf, lb, cp)
5121     {
5122       /* Look at each TEX keyword in line. */
5123       for (;;)
5124         {
5125           /* Look for a TEX escape. */
5126           while (*cp++ != TEX_esc)
5127             if (cp[-1] == '\0' || cp[-1] == '%')
5128               goto tex_next_line;
5129
5130           for (key = TEX_toktab; key->buffer != NULL; key++)
5131             if (strneq (cp, key->buffer, key->len))
5132               {
5133                 register char *p;
5134                 int namelen, linelen;
5135                 bool opgrp = FALSE;
5136
5137                 cp = skip_spaces (cp + key->len);
5138                 if (*cp == TEX_opgrp)
5139                   {
5140                     opgrp = TRUE;
5141                     cp++;
5142                   }
5143                 for (p = cp;
5144                      (!iswhite (*p) && *p != '#' &&
5145                       *p != TEX_opgrp && *p != TEX_clgrp);
5146                      p++)
5147                   continue;
5148                 namelen = p - cp;
5149                 linelen = lb.len;
5150                 if (!opgrp || *p == TEX_clgrp)
5151                   {
5152                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5153                       p++;
5154                     linelen = p - lb.buffer + 1;
5155                   }
5156                 make_tag (cp, namelen, TRUE,
5157                           lb.buffer, linelen, lineno, linecharno);
5158                 goto tex_next_line; /* We only tag a line once */
5159               }
5160         }
5161     tex_next_line:
5162       ;
5163     }
5164 }
5165
5166 #define TEX_LESC '\\'
5167 #define TEX_SESC '!'
5168
5169 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5170    chars accordingly. */
5171 static void
5172 TEX_mode (inf)
5173      FILE *inf;
5174 {
5175   int c;
5176
5177   while ((c = getc (inf)) != EOF)
5178     {
5179       /* Skip to next line if we hit the TeX comment char. */
5180       if (c == '%')
5181         while (c != '\n' && c != EOF)
5182           c = getc (inf);
5183       else if (c == TEX_LESC || c == TEX_SESC )
5184         break;
5185     }
5186
5187   if (c == TEX_LESC)
5188     {
5189       TEX_esc = TEX_LESC;
5190       TEX_opgrp = '{';
5191       TEX_clgrp = '}';
5192     }
5193   else
5194     {
5195       TEX_esc = TEX_SESC;
5196       TEX_opgrp = '<';
5197       TEX_clgrp = '>';
5198     }
5199   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5200      No attempt is made to correct the situation. */
5201   rewind (inf);
5202 }
5203
5204 /* Read environment and prepend it to the default string.
5205    Build token table. */
5206 static void
5207 TEX_decode_env (evarname, defenv)
5208      char *evarname;
5209      char *defenv;
5210 {
5211   register char *env, *p;
5212   int i, len;
5213
5214   /* Append default string to environment. */
5215   env = getenv (evarname);
5216   if (!env)
5217     env = defenv;
5218   else
5219     {
5220       char *oldenv = env;
5221       env = concat (oldenv, defenv, "");
5222     }
5223
5224   /* Allocate a token table */
5225   for (len = 1, p = env; p;)
5226     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5227       len++;
5228   TEX_toktab = xnew (len, linebuffer);
5229
5230   /* Unpack environment string into token table. Be careful about */
5231   /* zero-length strings (leading ':', "::" and trailing ':') */
5232   for (i = 0; *env != '\0';)
5233     {
5234       p = etags_strchr (env, ':');
5235       if (!p)                   /* End of environment string. */
5236         p = env + strlen (env);
5237       if (p - env > 0)
5238         {                       /* Only non-zero strings. */
5239           TEX_toktab[i].buffer = savenstr (env, p - env);
5240           TEX_toktab[i].len = p - env;
5241           i++;
5242         }
5243       if (*p)
5244         env = p + 1;
5245       else
5246         {
5247           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5248           TEX_toktab[i].len = 0;
5249           break;
5250         }
5251     }
5252 }
5253
5254 \f
5255 /* Texinfo support.  Dave Love, Mar. 2000.  */
5256 static void
5257 Texinfo_nodes (inf)
5258      FILE * inf;
5259 {
5260   char *cp, *start;
5261   LOOP_ON_INPUT_LINES (inf, lb, cp)
5262     if (LOOKING_AT (cp, "@node"))
5263       {
5264         start = cp;
5265         while (*cp != '\0' && *cp != ',')
5266           cp++;
5267         make_tag (start, cp - start, TRUE,
5268                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5269       }
5270 }
5271
5272 \f
5273 /*
5274  * HTML support.
5275  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5276  * Contents of <a name=xxx> are tags with name xxx.
5277  *
5278  * Francesco Potortì, 2002.
5279  */
5280 static void
5281 HTML_labels (inf)
5282      FILE * inf;
5283 {
5284   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5285   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5286   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5287   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5288   char *end;
5289
5290
5291   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5292
5293   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5294     for (;;)                    /* loop on the same line */
5295       {
5296         if (skiptag)            /* skip HTML tag */
5297           {
5298             while (*dbp != '\0' && *dbp != '>')
5299               dbp++;
5300             if (*dbp == '>')
5301               {
5302                 dbp += 1;
5303                 skiptag = FALSE;
5304                 continue;       /* look on the same line */
5305               }
5306             break;              /* go to next line */
5307           }
5308
5309         else if (intag) /* look for "name=" or "id=" */
5310           {
5311             while (*dbp != '\0' && *dbp != '>'
5312                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5313               dbp++;
5314             if (*dbp == '\0')
5315               break;            /* go to next line */
5316             if (*dbp == '>')
5317               {
5318                 dbp += 1;
5319                 intag = FALSE;
5320                 continue;       /* look on the same line */
5321               }
5322             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5323                 || LOOKING_AT_NOCASE (dbp, "id="))
5324               {
5325                 bool quoted = (dbp[0] == '"');
5326
5327                 if (quoted)
5328                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5329                     continue;
5330                 else
5331                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5332                     continue;
5333                 linebuffer_setlen (&token_name, end - dbp);
5334                 strncpy (token_name.buffer, dbp, end - dbp);
5335                 token_name.buffer[end - dbp] = '\0';
5336
5337                 dbp = end;
5338                 intag = FALSE;  /* we found what we looked for */
5339                 skiptag = TRUE; /* skip to the end of the tag */
5340                 getnext = TRUE; /* then grab the text */
5341                 continue;       /* look on the same line */
5342               }
5343             dbp += 1;
5344           }
5345
5346         else if (getnext)       /* grab next tokens and tag them */
5347           {
5348             dbp = skip_spaces (dbp);
5349             if (*dbp == '\0')
5350               break;            /* go to next line */
5351             if (*dbp == '<')
5352               {
5353                 intag = TRUE;
5354                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5355                 continue;       /* look on the same line */
5356               }
5357
5358             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5359               continue;
5360             make_tag (token_name.buffer, token_name.len, TRUE,
5361                       dbp, end - dbp, lineno, linecharno);
5362             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5363             getnext = FALSE;
5364             break;              /* go to next line */
5365           }
5366
5367         else                    /* look for an interesting HTML tag */
5368           {
5369             while (*dbp != '\0' && *dbp != '<')
5370               dbp++;
5371             if (*dbp == '\0')
5372               break;            /* go to next line */
5373             intag = TRUE;
5374             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5375               {
5376                 inanchor = TRUE;
5377                 continue;       /* look on the same line */
5378               }
5379             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5380                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5381                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5382                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5383               {
5384                 intag = FALSE;
5385                 getnext = TRUE;
5386                 continue;       /* look on the same line */
5387               }
5388             dbp += 1;
5389           }
5390       }
5391 }
5392
5393 \f
5394 /*
5395  * Prolog support
5396  *
5397  * Assumes that the predicate or rule starts at column 0.
5398  * Only the first clause of a predicate or rule is added.
5399  * Original code by Sunichirou Sugou (1989)
5400  * Rewritten by Anders Lindgren (1996)
5401  */
5402 static int prolog_pr __P((char *, char *));
5403 static void prolog_skip_comment __P((linebuffer *, FILE *));
5404 static int prolog_atom __P((char *, int));
5405
5406 static void
5407 Prolog_functions (inf)
5408      FILE *inf;
5409 {
5410   char *cp, *last;
5411   int len;
5412   int allocated;
5413
5414   allocated = 0;
5415   len = 0;
5416   last = NULL;
5417
5418   LOOP_ON_INPUT_LINES (inf, lb, cp)
5419     {
5420       if (cp[0] == '\0')        /* Empty line */
5421         continue;
5422       else if (iswhite (cp[0])) /* Not a predicate */
5423         continue;
5424       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5425         prolog_skip_comment (&lb, inf);
5426       else if ((len = prolog_pr (cp, last)) > 0)
5427         {
5428           /* Predicate or rule.  Store the function name so that we
5429              only generate a tag for the first clause.  */
5430           if (last == NULL)
5431             last = xnew(len + 1, char);
5432           else if (len + 1 > allocated)
5433             xrnew (last, len + 1, char);
5434           allocated = len + 1;
5435           strncpy (last, cp, len);
5436           last[len] = '\0';
5437         }
5438     }
5439   if (last != NULL)
5440     free (last);
5441 }
5442
5443
5444 static void
5445 prolog_skip_comment (plb, inf)
5446      linebuffer *plb;
5447      FILE *inf;
5448 {
5449   char *cp;
5450
5451   do
5452     {
5453       for (cp = plb->buffer; *cp != '\0'; cp++)
5454         if (cp[0] == '*' && cp[1] == '/')
5455           return;
5456       readline (plb, inf);
5457     }
5458   while (!feof(inf));
5459 }
5460
5461 /*
5462  * A predicate or rule definition is added if it matches:
5463  *     <beginning of line><Prolog Atom><whitespace>(
5464  * or  <beginning of line><Prolog Atom><whitespace>:-
5465  *
5466  * It is added to the tags database if it doesn't match the
5467  * name of the previous clause header.
5468  *
5469  * Return the size of the name of the predicate or rule, or 0 if no
5470  * header was found.
5471  */
5472 static int
5473 prolog_pr (s, last)
5474      char *s;
5475      char *last;                /* Name of last clause. */
5476 {
5477   int pos;
5478   int len;
5479
5480   pos = prolog_atom (s, 0);
5481   if (pos < 1)
5482     return 0;
5483
5484   len = pos;
5485   pos = skip_spaces (s + pos) - s;
5486
5487   if ((s[pos] == '.'
5488        || (s[pos] == '(' && (pos += 1))
5489        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5490       && (last == NULL          /* save only the first clause */
5491           || len != (int)strlen (last)
5492           || !strneq (s, last, len)))
5493         {
5494           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5495           return len;
5496         }
5497   else
5498     return 0;
5499 }
5500
5501 /*
5502  * Consume a Prolog atom.
5503  * Return the number of bytes consumed, or -1 if there was an error.
5504  *
5505  * A prolog atom, in this context, could be one of:
5506  * - An alphanumeric sequence, starting with a lower case letter.
5507  * - A quoted arbitrary string. Single quotes can escape themselves.
5508  *   Backslash quotes everything.
5509  */
5510 static int
5511 prolog_atom (s, pos)
5512      char *s;
5513      int pos;
5514 {
5515   int origpos;
5516
5517   origpos = pos;
5518
5519   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5520     {
5521       /* The atom is unquoted. */
5522       pos++;
5523       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5524         {
5525           pos++;
5526         }
5527       return pos - origpos;
5528     }
5529   else if (s[pos] == '\'')
5530     {
5531       pos++;
5532
5533       for (;;)
5534         {
5535           if (s[pos] == '\'')
5536             {
5537               pos++;
5538               if (s[pos] != '\'')
5539                 break;
5540               pos++;            /* A double quote */
5541             }
5542           else if (s[pos] == '\0')
5543             /* Multiline quoted atoms are ignored. */
5544             return -1;
5545           else if (s[pos] == '\\')
5546             {
5547               if (s[pos+1] == '\0')
5548                 return -1;
5549               pos += 2;
5550             }
5551           else
5552             pos++;
5553         }
5554       return pos - origpos;
5555     }
5556   else
5557     return -1;
5558 }
5559
5560 \f
5561 /*
5562  * Support for Erlang
5563  *
5564  * Generates tags for functions, defines, and records.
5565  * Assumes that Erlang functions start at column 0.
5566  * Original code by Anders Lindgren (1996)
5567  */
5568 static int erlang_func __P((char *, char *));
5569 static void erlang_attribute __P((char *));
5570 static int erlang_atom __P((char *));
5571
5572 static void
5573 Erlang_functions (inf)
5574      FILE *inf;
5575 {
5576   char *cp, *last;
5577   int len;
5578   int allocated;
5579
5580   allocated = 0;
5581   len = 0;
5582   last = NULL;
5583
5584   LOOP_ON_INPUT_LINES (inf, lb, cp)
5585     {
5586       if (cp[0] == '\0')        /* Empty line */
5587         continue;
5588       else if (iswhite (cp[0])) /* Not function nor attribute */
5589         continue;
5590       else if (cp[0] == '%')    /* comment */
5591         continue;
5592       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5593         continue;
5594       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5595         {
5596           erlang_attribute (cp);
5597           if (last != NULL)
5598             {
5599               free (last);
5600               last = NULL;
5601             }
5602         }
5603       else if ((len = erlang_func (cp, last)) > 0)
5604         {
5605           /*
5606            * Function.  Store the function name so that we only
5607            * generates a tag for the first clause.
5608            */
5609           if (last == NULL)
5610             last = xnew (len + 1, char);
5611           else if (len + 1 > allocated)
5612             xrnew (last, len + 1, char);
5613           allocated = len + 1;
5614           strncpy (last, cp, len);
5615           last[len] = '\0';
5616         }
5617     }
5618   if (last != NULL)
5619     free (last);
5620 }
5621
5622
5623 /*
5624  * A function definition is added if it matches:
5625  *     <beginning of line><Erlang Atom><whitespace>(
5626  *
5627  * It is added to the tags database if it doesn't match the
5628  * name of the previous clause header.
5629  *
5630  * Return the size of the name of the function, or 0 if no function
5631  * was found.
5632  */
5633 static int
5634 erlang_func (s, last)
5635      char *s;
5636      char *last;                /* Name of last clause. */
5637 {
5638   int pos;
5639   int len;
5640
5641   pos = erlang_atom (s);
5642   if (pos < 1)
5643     return 0;
5644
5645   len = pos;
5646   pos = skip_spaces (s + pos) - s;
5647
5648   /* Save only the first clause. */
5649   if (s[pos++] == '('
5650       && (last == NULL
5651           || len != (int)strlen (last)
5652           || !strneq (s, last, len)))
5653         {
5654           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5655           return len;
5656         }
5657
5658   return 0;
5659 }
5660
5661
5662 /*
5663  * Handle attributes.  Currently, tags are generated for defines
5664  * and records.
5665  *
5666  * They are on the form:
5667  * -define(foo, bar).
5668  * -define(Foo(M, N), M+N).
5669  * -record(graph, {vtab = notable, cyclic = true}).
5670  */
5671 static void
5672 erlang_attribute (s)
5673      char *s;
5674 {
5675   char *cp = s;
5676
5677   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5678       && *cp++ == '(')
5679     {
5680       int len = erlang_atom (skip_spaces (cp));
5681       if (len > 0)
5682         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5683     }
5684   return;
5685 }
5686
5687
5688 /*
5689  * Consume an Erlang atom (or variable).
5690  * Return the number of bytes consumed, or -1 if there was an error.
5691  */
5692 static int
5693 erlang_atom (s)
5694      char *s;
5695 {
5696   int pos = 0;
5697
5698   if (ISALPHA (s[pos]) || s[pos] == '_')
5699     {
5700       /* The atom is unquoted. */
5701       do
5702         pos++;
5703       while (ISALNUM (s[pos]) || s[pos] == '_');
5704     }
5705   else if (s[pos] == '\'')
5706     {
5707       for (pos++; s[pos] != '\''; pos++)
5708         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5709             || (s[pos] == '\\' && s[++pos] == '\0'))
5710           return 0;
5711       pos++;
5712     }
5713
5714   return pos;
5715 }
5716
5717 \f
5718 static char *scan_separators __P((char *));
5719 static void add_regex __P((char *, language *));
5720 static char *substitute __P((char *, char *, struct re_registers *));
5721
5722 /*
5723  * Take a string like "/blah/" and turn it into "blah", verifying
5724  * that the first and last characters are the same, and handling
5725  * quoted separator characters.  Actually, stops on the occurrence of
5726  * an unquoted separator.  Also process \t, \n, etc. and turn into
5727  * appropriate characters. Works in place.  Null terminates name string.
5728  * Returns pointer to terminating separator, or NULL for
5729  * unterminated regexps.
5730  */
5731 static char *
5732 scan_separators (name)
5733      char *name;
5734 {
5735   char sep = name[0];
5736   char *copyto = name;
5737   bool quoted = FALSE;
5738
5739   for (++name; *name != '\0'; ++name)
5740     {
5741       if (quoted)
5742         {
5743           switch (*name)
5744             {
5745             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5746             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5747             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5748             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5749             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5750             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5751             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5752             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5753             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5754             default:
5755               if (*name == sep)
5756                 *copyto++ = sep;
5757               else
5758                 {
5759                   /* Something else is quoted, so preserve the quote. */
5760                   *copyto++ = '\\';
5761                   *copyto++ = *name;
5762                 }
5763               break;
5764             }
5765           quoted = FALSE;
5766         }
5767       else if (*name == '\\')
5768         quoted = TRUE;
5769       else if (*name == sep)
5770         break;
5771       else
5772         *copyto++ = *name;
5773     }
5774   if (*name != sep)
5775     name = NULL;                /* signal unterminated regexp */
5776
5777   /* Terminate copied string. */
5778   *copyto = '\0';
5779   return name;
5780 }
5781
5782 /* Look at the argument of --regex or --no-regex and do the right
5783    thing.  Same for each line of a regexp file. */
5784 static void
5785 analyse_regex (regex_arg)
5786      char *regex_arg;
5787 {
5788   if (regex_arg == NULL)
5789     {
5790       free_regexps ();          /* --no-regex: remove existing regexps */
5791       return;
5792     }
5793
5794   /* A real --regexp option or a line in a regexp file. */
5795   switch (regex_arg[0])
5796     {
5797       /* Comments in regexp file or null arg to --regex. */
5798     case '\0':
5799     case ' ':
5800     case '\t':
5801       break;
5802
5803       /* Read a regex file.  This is recursive and may result in a
5804          loop, which will stop when the file descriptors are exhausted. */
5805     case '@':
5806       {
5807         FILE *regexfp;
5808         linebuffer regexbuf;
5809         char *regexfile = regex_arg + 1;
5810
5811         /* regexfile is a file containing regexps, one per line. */
5812         regexfp = fopen (regexfile, "r");
5813         if (regexfp == NULL)
5814           {
5815             pfatal (regexfile);
5816             return;
5817           }
5818         linebuffer_init (&regexbuf);
5819         while (readline_internal (&regexbuf, regexfp) > 0)
5820           analyse_regex (regexbuf.buffer);
5821         free (regexbuf.buffer);
5822         fclose (regexfp);
5823       }
5824       break;
5825
5826       /* Regexp to be used for a specific language only. */
5827     case '{':
5828       {
5829         language *lang;
5830         char *lang_name = regex_arg + 1;
5831         char *cp;
5832
5833         for (cp = lang_name; *cp != '}'; cp++)
5834           if (*cp == '\0')
5835             {
5836               error ("unterminated language name in regex: %s", regex_arg);
5837               return;
5838             }
5839         *cp++ = '\0';
5840         lang = get_language_from_langname (lang_name);
5841         if (lang == NULL)
5842           return;
5843         add_regex (cp, lang);
5844       }
5845       break;
5846
5847       /* Regexp to be used for any language. */
5848     default:
5849       add_regex (regex_arg, NULL);
5850       break;
5851     }
5852 }
5853
5854 /* Separate the regexp pattern, compile it,
5855    and care for optional name and modifiers. */
5856 static void
5857 add_regex (regexp_pattern, lang)
5858      char *regexp_pattern;
5859      language *lang;
5860 {
5861   static struct re_pattern_buffer zeropattern;
5862   char sep, *pat, *name, *modifiers;
5863   const char *err;
5864   struct re_pattern_buffer *patbuf;
5865   regexp *rp;
5866   bool
5867     force_explicit_name = TRUE, /* do not use implicit tag names */
5868     ignore_case = FALSE,        /* case is significant */
5869     multi_line = FALSE,         /* matches are done one line at a time */
5870     single_line = FALSE;        /* dot does not match newline */
5871
5872
5873   if (strlen(regexp_pattern) < 3)
5874     {
5875       error ("null regexp", (char *)NULL);
5876       return;
5877     }
5878   sep = regexp_pattern[0];
5879   name = scan_separators (regexp_pattern);
5880   if (name == NULL)
5881     {
5882       error ("%s: unterminated regexp", regexp_pattern);
5883       return;
5884     }
5885   if (name[1] == sep)
5886     {
5887       error ("null name for regexp \"%s\"", regexp_pattern);
5888       return;
5889     }
5890   modifiers = scan_separators (name);
5891   if (modifiers == NULL)        /* no terminating separator --> no name */
5892     {
5893       modifiers = name;
5894       name = "";
5895     }
5896   else
5897     modifiers += 1;             /* skip separator */
5898
5899   /* Parse regex modifiers. */
5900   for (; modifiers[0] != '\0'; modifiers++)
5901     switch (modifiers[0])
5902       {
5903       case 'N':
5904         if (modifiers == name)
5905           error ("forcing explicit tag name but no name, ignoring", NULL);
5906         force_explicit_name = TRUE;
5907         break;
5908       case 'i':
5909         ignore_case = TRUE;
5910         break;
5911       case 's':
5912         single_line = TRUE;
5913         /* FALLTHRU */
5914       case 'm':
5915         multi_line = TRUE;
5916         need_filebuf = TRUE;
5917         break;
5918       default:
5919         {
5920           char wrongmod [2];
5921           wrongmod[0] = modifiers[0];
5922           wrongmod[1] = '\0';
5923           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5924         }
5925         break;
5926       }
5927
5928   patbuf = xnew (1, struct re_pattern_buffer);
5929   *patbuf = zeropattern;
5930   if (ignore_case)
5931     {
5932       static char lc_trans[CHARS];
5933       int i;
5934       for (i = 0; i < CHARS; i++)
5935         lc_trans[i] = lowcase (i);
5936       patbuf->translate = lc_trans;     /* translation table to fold case  */
5937     }
5938
5939   if (multi_line)
5940     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5941   else
5942     pat = regexp_pattern;
5943
5944   if (single_line)
5945     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5946   else
5947     re_set_syntax (RE_SYNTAX_EMACS);
5948
5949   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5950   if (multi_line)
5951     free (pat);
5952   if (err != NULL)
5953     {
5954       error ("%s while compiling pattern", err);
5955       return;
5956     }
5957
5958   rp = p_head;
5959   p_head = xnew (1, regexp);
5960   p_head->pattern = savestr (regexp_pattern);
5961   p_head->p_next = rp;
5962   p_head->lang = lang;
5963   p_head->pat = patbuf;
5964   p_head->name = savestr (name);
5965   p_head->error_signaled = FALSE;
5966   p_head->force_explicit_name = force_explicit_name;
5967   p_head->ignore_case = ignore_case;
5968   p_head->multi_line = multi_line;
5969 }
5970
5971 /*
5972  * Do the substitutions indicated by the regular expression and
5973  * arguments.
5974  */
5975 static char *
5976 substitute (in, out, regs)
5977      char *in, *out;
5978      struct re_registers *regs;
5979 {
5980   char *result, *t;
5981   int size, dig, diglen;
5982
5983   result = NULL;
5984   size = strlen (out);
5985
5986   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5987   if (out[size - 1] == '\\')
5988     fatal ("pattern error in \"%s\"", out);
5989   for (t = etags_strchr (out, '\\');
5990        t != NULL;
5991        t = etags_strchr (t + 2, '\\'))
5992     if (ISDIGIT (t[1]))
5993       {
5994         dig = t[1] - '0';
5995         diglen = regs->end[dig] - regs->start[dig];
5996         size += diglen - 2;
5997       }
5998     else
5999       size -= 1;
6000
6001   /* Allocate space and do the substitutions. */
6002   assert (size >= 0);
6003   result = xnew (size + 1, char);
6004
6005   for (t = result; *out != '\0'; out++)
6006     if (*out == '\\' && ISDIGIT (*++out))
6007       {
6008         dig = *out - '0';
6009         diglen = regs->end[dig] - regs->start[dig];
6010         strncpy (t, in + regs->start[dig], diglen);
6011         t += diglen;
6012       }
6013     else
6014       *t++ = *out;
6015   *t = '\0';
6016
6017   assert (t <= result + size);
6018   assert (t - result == (int)strlen (result));
6019
6020   return result;
6021 }
6022
6023 /* Deallocate all regexps. */
6024 static void
6025 free_regexps ()
6026 {
6027   regexp *rp;
6028   while (p_head != NULL)
6029     {
6030       rp = p_head->p_next;
6031       free (p_head->pattern);
6032       free (p_head->name);
6033       free (p_head);
6034       p_head = rp;
6035     }
6036   return;
6037 }
6038
6039 /*
6040  * Reads the whole file as a single string from `filebuf' and looks for
6041  * multi-line regular expressions, creating tags on matches.
6042  * readline already dealt with normal regexps.
6043  *
6044  * Idea by Ben Wing <ben@666.com> (2002).
6045  */
6046 static void
6047 regex_tag_multiline ()
6048 {
6049   char *buffer = filebuf.buffer;
6050   regexp *rp;
6051   char *name;
6052
6053   for (rp = p_head; rp != NULL; rp = rp->p_next)
6054     {
6055       int match = 0;
6056
6057       if (!rp->multi_line)
6058         continue;               /* skip normal regexps */
6059
6060       /* Generic initialisations before parsing file from memory. */
6061       lineno = 1;               /* reset global line number */
6062       charno = 0;               /* reset global char number */
6063       linecharno = 0;           /* reset global char number of line start */
6064
6065       /* Only use generic regexps or those for the current language. */
6066       if (rp->lang != NULL && rp->lang != curfdp->lang)
6067         continue;
6068
6069       while (match >= 0 && match < filebuf.len)
6070         {
6071           match = re_search (rp->pat, buffer, filebuf.len, charno,
6072                              filebuf.len - match, &rp->regs);
6073           switch (match)
6074             {
6075             case -2:
6076               /* Some error. */
6077               if (!rp->error_signaled)
6078                 {
6079                   error ("regexp stack overflow while matching \"%s\"",
6080                          rp->pattern);
6081                   rp->error_signaled = TRUE;
6082                 }
6083               break;
6084             case -1:
6085               /* No match. */
6086               break;
6087             default:
6088               if (match == rp->regs.end[0])
6089                 {
6090                   if (!rp->error_signaled)
6091                     {
6092                       error ("regexp matches the empty string: \"%s\"",
6093                              rp->pattern);
6094                       rp->error_signaled = TRUE;
6095                     }
6096                   match = -3;   /* exit from while loop */
6097                   break;
6098                 }
6099
6100               /* Match occurred.  Construct a tag. */
6101               while (charno < rp->regs.end[0])
6102                 if (buffer[charno++] == '\n')
6103                   lineno++, linecharno = charno;
6104               name = rp->name;
6105               if (name[0] == '\0')
6106                 name = NULL;
6107               else /* make a named tag */
6108                 name = substitute (buffer, rp->name, &rp->regs);
6109               if (rp->force_explicit_name)
6110                 /* Force explicit tag name, if a name is there. */
6111                 pfnote (name, TRUE, buffer + linecharno,
6112                         charno - linecharno + 1, lineno, linecharno);
6113               else
6114                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6115                           charno - linecharno + 1, lineno, linecharno);
6116               break;
6117             }
6118         }
6119     }
6120 }
6121
6122 \f
6123 static bool
6124 nocase_tail (cp)
6125      char *cp;
6126 {
6127   register int len = 0;
6128
6129   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6130     cp++, len++;
6131   if (*cp == '\0' && !intoken (dbp[len]))
6132     {
6133       dbp += len;
6134       return TRUE;
6135     }
6136   return FALSE;
6137 }
6138
6139 static void
6140 get_tag (bp, namepp)
6141      register char *bp;
6142      char **namepp;
6143 {
6144   register char *cp = bp;
6145
6146   if (*bp != '\0')
6147     {
6148       /* Go till you get to white space or a syntactic break */
6149       for (cp = bp + 1; !notinname (*cp); cp++)
6150         continue;
6151       make_tag (bp, cp - bp, TRUE,
6152                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6153     }
6154
6155   if (namepp != NULL)
6156     *namepp = savenstr (bp, cp - bp);
6157 }
6158
6159 /*
6160  * Read a line of text from `stream' into `lbp', excluding the
6161  * newline or CR-NL, if any.  Return the number of characters read from
6162  * `stream', which is the length of the line including the newline.
6163  *
6164  * On DOS or Windows we do not count the CR character, if any before the
6165  * NL, in the returned length; this mirrors the behavior of Emacs on those
6166  * platforms (for text files, it translates CR-NL to NL as it reads in the
6167  * file).
6168  *
6169  * If multi-line regular expressions are requested, each line read is
6170  * appended to `filebuf'.
6171  */
6172 static long
6173 readline_internal (lbp, stream)
6174      linebuffer *lbp;
6175      register FILE *stream;
6176 {
6177   char *buffer = lbp->buffer;
6178   register char *p = lbp->buffer;
6179   register char *pend;
6180   int chars_deleted;
6181
6182   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6183
6184   for (;;)
6185     {
6186       register int c = getc (stream);
6187       if (p == pend)
6188         {
6189           /* We're at the end of linebuffer: expand it. */
6190           lbp->size *= 2;
6191           xrnew (buffer, lbp->size, char);
6192           p += buffer - lbp->buffer;
6193           pend = buffer + lbp->size;
6194           lbp->buffer = buffer;
6195         }
6196       if (c == EOF)
6197         {
6198           *p = '\0';
6199           chars_deleted = 0;
6200           break;
6201         }
6202       if (c == '\n')
6203         {
6204           if (p > buffer && p[-1] == '\r')
6205             {
6206               p -= 1;
6207 #ifdef DOS_NT
6208              /* Assume CRLF->LF translation will be performed by Emacs
6209                 when loading this file, so CRs won't appear in the buffer.
6210                 It would be cleaner to compensate within Emacs;
6211                 however, Emacs does not know how many CRs were deleted
6212                 before any given point in the file.  */
6213               chars_deleted = 1;
6214 #else
6215               chars_deleted = 2;
6216 #endif
6217             }
6218           else
6219             {
6220               chars_deleted = 1;
6221             }
6222           *p = '\0';
6223           break;
6224         }
6225       *p++ = c;
6226     }
6227   lbp->len = p - buffer;
6228
6229   if (need_filebuf              /* we need filebuf for multi-line regexps */
6230       && chars_deleted > 0)     /* not at EOF */
6231     {
6232       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6233         {
6234           /* Expand filebuf. */
6235           filebuf.size *= 2;
6236           xrnew (filebuf.buffer, filebuf.size, char);
6237         }
6238       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6239       filebuf.len += lbp->len;
6240       filebuf.buffer[filebuf.len++] = '\n';
6241       filebuf.buffer[filebuf.len] = '\0';
6242     }
6243
6244   return lbp->len + chars_deleted;
6245 }
6246
6247 /*
6248  * Like readline_internal, above, but in addition try to match the
6249  * input line against relevant regular expressions and manage #line
6250  * directives.
6251  */
6252 static void
6253 readline (lbp, stream)
6254      linebuffer *lbp;
6255      FILE *stream;
6256 {
6257   long result;
6258
6259   linecharno = charno;          /* update global char number of line start */
6260   result = readline_internal (lbp, stream); /* read line */
6261   lineno += 1;                  /* increment global line number */
6262   charno += result;             /* increment global char number */
6263
6264   /* Honour #line directives. */
6265   if (!no_line_directive)
6266     {
6267       static bool discard_until_line_directive;
6268
6269       /* Check whether this is a #line directive. */
6270       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6271         {
6272           unsigned int lno;
6273           int start = 0;
6274
6275           if (sscanf (lbp->buffer, "#line %u \"%n", &lno, &start) >= 1
6276               && start > 0)     /* double quote character found */
6277             {
6278               char *endp = lbp->buffer + start;
6279
6280               while ((endp = etags_strchr (endp, '"')) != NULL
6281                      && endp[-1] == '\\')
6282                 endp++;
6283               if (endp != NULL)
6284                 /* Ok, this is a real #line directive.  Let's deal with it. */
6285                 {
6286                   char *taggedabsname;  /* absolute name of original file */
6287                   char *taggedfname;    /* name of original file as given */
6288                   char *name;           /* temp var */
6289
6290                   discard_until_line_directive = FALSE; /* found it */
6291                   name = lbp->buffer + start;
6292                   *endp = '\0';
6293                   canonicalize_filename (name); /* for DOS */
6294                   taggedabsname = absolute_filename (name, tagfiledir);
6295                   if (filename_is_absolute (name)
6296                       || filename_is_absolute (curfdp->infname))
6297                     taggedfname = savestr (taggedabsname);
6298                   else
6299                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6300
6301                   if (streq (curfdp->taggedfname, taggedfname))
6302                     /* The #line directive is only a line number change.  We
6303                        deal with this afterwards. */
6304                     free (taggedfname);
6305                   else
6306                     /* The tags following this #line directive should be
6307                        attributed to taggedfname.  In order to do this, set
6308                        curfdp accordingly. */
6309                     {
6310                       fdesc *fdp; /* file description pointer */
6311
6312                       /* Go look for a file description already set up for the
6313                          file indicated in the #line directive.  If there is
6314                          one, use it from now until the next #line
6315                          directive. */
6316                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6317                         if (streq (fdp->infname, curfdp->infname)
6318                             && streq (fdp->taggedfname, taggedfname))
6319                           /* If we remove the second test above (after the &&)
6320                              then all entries pertaining to the same file are
6321                              coalesced in the tags file.  If we use it, then
6322                              entries pertaining to the same file but generated
6323                              from different files (via #line directives) will
6324                              go into separate sections in the tags file.  These
6325                              alternatives look equivalent.  The first one
6326                              destroys some apparently useless information. */
6327                           {
6328                             curfdp = fdp;
6329                             free (taggedfname);
6330                             break;
6331                           }
6332                       /* Else, if we already tagged the real file, skip all
6333                          input lines until the next #line directive. */
6334                       if (fdp == NULL) /* not found */
6335                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6336                           if (streq (fdp->infabsname, taggedabsname))
6337                             {
6338                               discard_until_line_directive = TRUE;
6339                               free (taggedfname);
6340                               break;
6341                             }
6342                       /* Else create a new file description and use that from
6343                          now on, until the next #line directive. */
6344                       if (fdp == NULL) /* not found */
6345                         {
6346                           fdp = fdhead;
6347                           fdhead = xnew (1, fdesc);
6348                           *fdhead = *curfdp; /* copy curr. file description */
6349                           fdhead->next = fdp;
6350                           fdhead->infname = savestr (curfdp->infname);
6351                           fdhead->infabsname = savestr (curfdp->infabsname);
6352                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6353                           fdhead->taggedfname = taggedfname;
6354                           fdhead->usecharno = FALSE;
6355                           fdhead->prop = NULL;
6356                           fdhead->written = FALSE;
6357                           curfdp = fdhead;
6358                         }
6359                     }
6360                   free (taggedabsname);
6361                   lineno = lno - 1;
6362                   readline (lbp, stream);
6363                   return;
6364                 } /* if a real #line directive */
6365             } /* if #line is followed by a a number */
6366         } /* if line begins with "#line " */
6367
6368       /* If we are here, no #line directive was found. */
6369       if (discard_until_line_directive)
6370         {
6371           if (result > 0)
6372             {
6373               /* Do a tail recursion on ourselves, thus discarding the contents
6374                  of the line buffer. */
6375               readline (lbp, stream);
6376               return;
6377             }
6378           /* End of file. */
6379           discard_until_line_directive = FALSE;
6380           return;
6381         }
6382     } /* if #line directives should be considered */
6383
6384   {
6385     int match;
6386     regexp *rp;
6387     char *name;
6388
6389     /* Match against relevant regexps. */
6390     if (lbp->len > 0)
6391       for (rp = p_head; rp != NULL; rp = rp->p_next)
6392         {
6393           /* Only use generic regexps or those for the current language.
6394              Also do not use multiline regexps, which is the job of
6395              regex_tag_multiline. */
6396           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6397               || rp->multi_line)
6398             continue;
6399
6400           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6401           switch (match)
6402             {
6403             case -2:
6404               /* Some error. */
6405               if (!rp->error_signaled)
6406                 {
6407                   error ("regexp stack overflow while matching \"%s\"",
6408                          rp->pattern);
6409                   rp->error_signaled = TRUE;
6410                 }
6411               break;
6412             case -1:
6413               /* No match. */
6414               break;
6415             case 0:
6416               /* Empty string matched. */
6417               if (!rp->error_signaled)
6418                 {
6419                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6420                   rp->error_signaled = TRUE;
6421                 }
6422               break;
6423             default:
6424               /* Match occurred.  Construct a tag. */
6425               name = rp->name;
6426               if (name[0] == '\0')
6427                 name = NULL;
6428               else /* make a named tag */
6429                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6430               if (rp->force_explicit_name)
6431                 /* Force explicit tag name, if a name is there. */
6432                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6433               else
6434                 make_tag (name, strlen (name), TRUE,
6435                           lbp->buffer, match, lineno, linecharno);
6436               break;
6437             }
6438         }
6439   }
6440 }
6441
6442 \f
6443 /*
6444  * Return a pointer to a space of size strlen(cp)+1 allocated
6445  * with xnew where the string CP has been copied.
6446  */
6447 static char *
6448 savestr (cp)
6449      char *cp;
6450 {
6451   return savenstr (cp, strlen (cp));
6452 }
6453
6454 /*
6455  * Return a pointer to a space of size LEN+1 allocated with xnew where
6456  * the string CP has been copied for at most the first LEN characters.
6457  */
6458 static char *
6459 savenstr (cp, len)
6460      char *cp;
6461      int len;
6462 {
6463   register char *dp;
6464
6465   dp = xnew (len + 1, char);
6466   strncpy (dp, cp, len);
6467   dp[len] = '\0';
6468   return dp;
6469 }
6470
6471 /*
6472  * Return the ptr in sp at which the character c last
6473  * appears; NULL if not found
6474  *
6475  * Identical to POSIX strrchr, included for portability.
6476  */
6477 static char *
6478 etags_strrchr (sp, c)
6479      register const char *sp;
6480      register int c;
6481 {
6482   register const char *r;
6483
6484   r = NULL;
6485   do
6486     {
6487       if (*sp == c)
6488         r = sp;
6489   } while (*sp++);
6490   return (char *)r;
6491 }
6492
6493 /*
6494  * Return the ptr in sp at which the character c first
6495  * appears; NULL if not found
6496  *
6497  * Identical to POSIX strchr, included for portability.
6498  */
6499 static char *
6500 etags_strchr (sp, c)
6501      register const char *sp;
6502      register int c;
6503 {
6504   do
6505     {
6506       if (*sp == c)
6507         return (char *)sp;
6508     } while (*sp++);
6509   return NULL;
6510 }
6511
6512 /*
6513  * Compare two strings, ignoring case for alphabetic characters.
6514  *
6515  * Same as BSD's strcasecmp, included for portability.
6516  */
6517 static int
6518 etags_strcasecmp (s1, s2)
6519      register const char *s1;
6520      register const char *s2;
6521 {
6522   while (*s1 != '\0'
6523          && (ISALPHA (*s1) && ISALPHA (*s2)
6524              ? lowcase (*s1) == lowcase (*s2)
6525              : *s1 == *s2))
6526     s1++, s2++;
6527
6528   return (ISALPHA (*s1) && ISALPHA (*s2)
6529           ? lowcase (*s1) - lowcase (*s2)
6530           : *s1 - *s2);
6531 }
6532
6533 /*
6534  * Compare two strings, ignoring case for alphabetic characters.
6535  * Stop after a given number of characters
6536  *
6537  * Same as BSD's strncasecmp, included for portability.
6538  */
6539 static int
6540 etags_strncasecmp (s1, s2, n)
6541      register const char *s1;
6542      register const char *s2;
6543      register int n;
6544 {
6545   while (*s1 != '\0' && n-- > 0
6546          && (ISALPHA (*s1) && ISALPHA (*s2)
6547              ? lowcase (*s1) == lowcase (*s2)
6548              : *s1 == *s2))
6549     s1++, s2++;
6550
6551   if (n < 0)
6552     return 0;
6553   else
6554     return (ISALPHA (*s1) && ISALPHA (*s2)
6555             ? lowcase (*s1) - lowcase (*s2)
6556             : *s1 - *s2);
6557 }
6558
6559 /* Skip spaces (end of string is not space), return new pointer. */
6560 static char *
6561 skip_spaces (cp)
6562      char *cp;
6563 {
6564   while (iswhite (*cp))
6565     cp++;
6566   return cp;
6567 }
6568
6569 /* Skip non spaces, except end of string, return new pointer. */
6570 static char *
6571 skip_non_spaces (cp)
6572      char *cp;
6573 {
6574   while (*cp != '\0' && !iswhite (*cp))
6575     cp++;
6576   return cp;
6577 }
6578
6579 /* Print error message and exit.  */
6580 void
6581 fatal (s1, s2)
6582      char *s1, *s2;
6583 {
6584   error (s1, s2);
6585   exit (EXIT_FAILURE);
6586 }
6587
6588 static void
6589 pfatal (s1)
6590      char *s1;
6591 {
6592   perror (s1);
6593   exit (EXIT_FAILURE);
6594 }
6595
6596 static void
6597 suggest_asking_for_help ()
6598 {
6599   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6600            progname, NO_LONG_OPTIONS ? "-h" : "--help");
6601   exit (EXIT_FAILURE);
6602 }
6603
6604 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6605 static void
6606 error (s1, s2)
6607      const char *s1, *s2;
6608 {
6609   fprintf (stderr, "%s: ", progname);
6610   fprintf (stderr, s1, s2);
6611   fprintf (stderr, "\n");
6612 }
6613
6614 /* Return a newly-allocated string whose contents
6615    concatenate those of s1, s2, s3.  */
6616 static char *
6617 concat (s1, s2, s3)
6618      char *s1, *s2, *s3;
6619 {
6620   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6621   char *result = xnew (len1 + len2 + len3 + 1, char);
6622
6623   strcpy (result, s1);
6624   strcpy (result + len1, s2);
6625   strcpy (result + len1 + len2, s3);
6626   result[len1 + len2 + len3] = '\0';
6627
6628   return result;
6629 }
6630
6631 \f
6632 /* Does the same work as the system V getcwd, but does not need to
6633    guess the buffer size in advance. */
6634 static char *
6635 etags_getcwd ()
6636 {
6637 #ifdef HAVE_GETCWD
6638   int bufsize = 200;
6639   char *path = xnew (bufsize, char);
6640
6641   while (getcwd (path, bufsize) == NULL)
6642     {
6643       if (errno != ERANGE)
6644         pfatal ("getcwd");
6645       bufsize *= 2;
6646       free (path);
6647       path = xnew (bufsize, char);
6648     }
6649
6650   canonicalize_filename (path);
6651   return path;
6652
6653 #else /* not HAVE_GETCWD */
6654 #if MSDOS
6655
6656   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6657
6658   getwd (path);
6659
6660   for (p = path; *p != '\0'; p++)
6661     if (*p == '\\')
6662       *p = '/';
6663     else
6664       *p = lowcase (*p);
6665
6666   return strdup (path);
6667 #else /* not MSDOS */
6668   linebuffer path;
6669   FILE *pipe;
6670
6671   linebuffer_init (&path);
6672   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6673   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6674     pfatal ("pwd");
6675   pclose (pipe);
6676
6677   return path.buffer;
6678 #endif /* not MSDOS */
6679 #endif /* not HAVE_GETCWD */
6680 }
6681
6682 /* Return a newly allocated string containing the file name of FILE
6683    relative to the absolute directory DIR (which should end with a slash). */
6684 static char *
6685 relative_filename (file, dir)
6686      char *file, *dir;
6687 {
6688   char *fp, *dp, *afn, *res;
6689   int i;
6690
6691   /* Find the common root of file and dir (with a trailing slash). */
6692   afn = absolute_filename (file, cwd);
6693   fp = afn;
6694   dp = dir;
6695   while (*fp++ == *dp++)
6696     continue;
6697   fp--, dp--;                   /* back to the first differing char */
6698 #ifdef DOS_NT
6699   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6700     return afn;
6701 #endif
6702   do                            /* look at the equal chars until '/' */
6703     fp--, dp--;
6704   while (*fp != '/');
6705
6706   /* Build a sequence of "../" strings for the resulting relative file name. */
6707   i = 0;
6708   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6709     i += 1;
6710   res = xnew (3*i + strlen (fp + 1) + 1, char);
6711   res[0] = '\0';
6712   while (i-- > 0)
6713     strcat (res, "../");
6714
6715   /* Add the file name relative to the common root of file and dir. */
6716   strcat (res, fp + 1);
6717   free (afn);
6718
6719   return res;
6720 }
6721
6722 /* Return a newly allocated string containing the absolute file name
6723    of FILE given DIR (which should end with a slash). */
6724 static char *
6725 absolute_filename (file, dir)
6726      char *file, *dir;
6727 {
6728   char *slashp, *cp, *res;
6729
6730   if (filename_is_absolute (file))
6731     res = savestr (file);
6732 #ifdef DOS_NT
6733   /* We don't support non-absolute file names with a drive
6734      letter, like `d:NAME' (it's too much hassle).  */
6735   else if (file[1] == ':')
6736     fatal ("%s: relative file names with drive letters not supported", file);
6737 #endif
6738   else
6739     res = concat (dir, file, "");
6740
6741   /* Delete the "/dirname/.." and "/." substrings. */
6742   slashp = etags_strchr (res, '/');
6743   while (slashp != NULL && slashp[0] != '\0')
6744     {
6745       if (slashp[1] == '.')
6746         {
6747           if (slashp[2] == '.'
6748               && (slashp[3] == '/' || slashp[3] == '\0'))
6749             {
6750               cp = slashp;
6751               do
6752                 cp--;
6753               while (cp >= res && !filename_is_absolute (cp));
6754               if (cp < res)
6755                 cp = slashp;    /* the absolute name begins with "/.." */
6756 #ifdef DOS_NT
6757               /* Under MSDOS and NT we get `d:/NAME' as absolute
6758                  file name, so the luser could say `d:/../NAME'.
6759                  We silently treat this as `d:/NAME'.  */
6760               else if (cp[0] != '/')
6761                 cp = slashp;
6762 #endif
6763               strcpy (cp, slashp + 3);
6764               slashp = cp;
6765               continue;
6766             }
6767           else if (slashp[2] == '/' || slashp[2] == '\0')
6768             {
6769               strcpy (slashp, slashp + 2);
6770               continue;
6771             }
6772         }
6773
6774       slashp = etags_strchr (slashp + 1, '/');
6775     }
6776
6777   if (res[0] == '\0')           /* just a safety net: should never happen */
6778     {
6779       free (res);
6780       return savestr ("/");
6781     }
6782   else
6783     return res;
6784 }
6785
6786 /* Return a newly allocated string containing the absolute
6787    file name of dir where FILE resides given DIR (which should
6788    end with a slash). */
6789 static char *
6790 absolute_dirname (file, dir)
6791      char *file, *dir;
6792 {
6793   char *slashp, *res;
6794   char save;
6795
6796   canonicalize_filename (file);
6797   slashp = etags_strrchr (file, '/');
6798   if (slashp == NULL)
6799     return savestr (dir);
6800   save = slashp[1];
6801   slashp[1] = '\0';
6802   res = absolute_filename (file, dir);
6803   slashp[1] = save;
6804
6805   return res;
6806 }
6807
6808 /* Whether the argument string is an absolute file name.  The argument
6809    string must have been canonicalized with canonicalize_filename. */
6810 static bool
6811 filename_is_absolute (fn)
6812      char *fn;
6813 {
6814   return (fn[0] == '/'
6815 #ifdef DOS_NT
6816           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6817 #endif
6818           );
6819 }
6820
6821 /* Translate backslashes into slashes.  Works in place. */
6822 static void
6823 canonicalize_filename (fn)
6824      register char *fn;
6825 {
6826 #ifdef DOS_NT
6827   /* Canonicalize drive letter case.  */
6828   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6829     fn[0] = upcase (fn[0]);
6830   /* Convert backslashes to slashes.  */
6831   for (; *fn != '\0'; fn++)
6832     if (*fn == '\\')
6833       *fn = '/';
6834 #else
6835   /* No action. */
6836   fn = NULL;                    /* shut up the compiler */
6837 #endif
6838 }
6839
6840 \f
6841 /* Initialize a linebuffer for use */
6842 static void
6843 linebuffer_init (lbp)
6844      linebuffer *lbp;
6845 {
6846   lbp->size = (DEBUG) ? 3 : 200;
6847   lbp->buffer = xnew (lbp->size, char);
6848   lbp->buffer[0] = '\0';
6849   lbp->len = 0;
6850 }
6851
6852 /* Set the minimum size of a string contained in a linebuffer. */
6853 static void
6854 linebuffer_setlen (lbp, toksize)
6855      linebuffer *lbp;
6856      int toksize;
6857 {
6858   while (lbp->size <= toksize)
6859     {
6860       lbp->size *= 2;
6861       xrnew (lbp->buffer, lbp->size, char);
6862     }
6863   lbp->len = toksize;
6864 }
6865
6866 /* Like malloc but get fatal error if memory is exhausted. */
6867 static PTR
6868 xmalloc (size)
6869      unsigned int size;
6870 {
6871   PTR result = (PTR) malloc (size);
6872   if (result == NULL)
6873     fatal ("virtual memory exhausted", (char *)NULL);
6874   return result;
6875 }
6876
6877 static PTR
6878 xrealloc (ptr, size)
6879      char *ptr;
6880      unsigned int size;
6881 {
6882   PTR result = (PTR) realloc (ptr, size);
6883   if (result == NULL)
6884     fatal ("virtual memory exhausted", (char *)NULL);
6885   return result;
6886 }
6887
6888 /*
6889  * Local Variables:
6890  * indent-tabs-mode: t
6891  * tab-width: 8
6892  * fill-column: 79
6893  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6894  * End:
6895  */
6896
6897 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6898    (do not change this comment) */
6899
6900 /* etags.c ends here */