lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs
   2    Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99, 2000
   3    Free Software Foundation, Inc. and Ken Arnold
   4
   5 This file is not considered part of GNU Emacs.
   6
   7 This program is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 This program is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with this program; if not, write to the Free Software Foundation,
  19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  20
  21 /*
  22  * Authors:
  23  *      Ctags originally by Ken Arnold.
  24  *      Fortran added by Jim Kleckner.
  25  *      Ed Pelegri-Llopart added C typedefs.
  26  *      Gnu Emacs TAGS format and modifications by RMS?
  27  *      Sam Kendall added C++.
  28  *      Francesco Potorti` reorganised C and C++ based on work by Joe Wells.
  29  *      Regexp tags by Tom Tromey.
  30  *
  31  *      Francesco Potorti` (pot@gnu.org) is the current maintainer.
  32  */
  33
  34 char pot_etags_version[] = "@(#) pot revision number is 13.47";
  35
  36 #define TRUE    1
  37 #define FALSE   0
  38
  39 #ifndef DEBUG
  40 # define DEBUG FALSE
  41 #endif
  42
  43 #if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  44 # define P_(proto) proto
  45 #else
  46 # define P_(proto) ()
  47 #endif
  48
  49 #ifdef HAVE_CONFIG_H
  50 # include <config.h>
  51   /* On some systems, Emacs defines static as nothing for the sake
  52      of unexec.  We don't want that here since we don't use unexec. */
  53 # undef static
  54 # define ETAGS_REGEXPS          /* use the regexp features */
  55 # define LONG_OPTIONS           /* accept long options */
  56 #endif /* HAVE_CONFIG_H */
  57
  58 #ifndef _GNU_SOURCE
  59 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  60 #endif
  61
  62 #ifdef MSDOS
  63 # undef MSDOS
  64 # define MSDOS TRUE
  65 # include <fcntl.h>
  66 # include <sys/param.h>
  67 # include <io.h>
  68 # ifndef HAVE_CONFIG_H
  69 #   define DOS_NT
  70 #   include <sys/config.h>
  71 # endif
  72 #else
  73 # define MSDOS FALSE
  74 #endif /* MSDOS */
  75
  76 #ifdef WINDOWSNT
  77 # include <stdlib.h>
  78 # include <fcntl.h>
  79 # include <string.h>
  80 # include <direct.h>
  81 # include <io.h>
  82 # define MAXPATHLEN _MAX_PATH
  83 # ifdef HAVE_CONFIG_H
  84 #   undef HAVE_NTGUI
  85 # else
  86 #   define DOS_NT
  87 # endif /* not HAVE_CONFIG_H */
  88 # ifndef HAVE_GETCWD
  89 #   define HAVE_GETCWD
  90 # endif /* undef HAVE_GETCWD */
  91 #else /* !WINDOWSNT */
  92 # ifdef STDC_HEADERS
  93 #  include <stdlib.h>
  94 #  include <string.h>
  95 # else
  96     extern char *getenv ();
  97 # endif
  98 #endif /* !WINDOWSNT */
  99
 100 #ifdef HAVE_UNISTD_H
 101 # include <unistd.h>
 102 #else
 103 # if defined (HAVE_GETCWD) && !WINDOWSNT
 104     extern char *getcwd (char *buf, size_t size);
 105 # endif
 106 #endif /* HAVE_UNISTD_H */
 107
 108 #include <stdio.h>
 109 #include <ctype.h>
 110 #include <errno.h>
 111 #ifndef errno
 112   extern int errno;
 113 #endif
 114 #include <sys/types.h>
 115 #include <sys/stat.h>
 116
 117 #if !defined (S_ISREG) && defined (S_IFREG)
 118 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 119 #endif
 120
 121 #ifdef LONG_OPTIONS
 122 # include <getopt.h>
 123 #else
 124 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 125   extern char *optarg;
 126   extern int optind, opterr;
 127 #endif /* LONG_OPTIONS */
 128
 129 #ifdef ETAGS_REGEXPS
 130 # include <regex.h>
 131 #endif /* ETAGS_REGEXPS */
 132
 133 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 134  Leave it undefined to make the program "etags", which makes emacs-style
 135  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 136 #ifdef CTAGS
 137 # undef  CTAGS
 138 # define CTAGS TRUE
 139 #else
 140 # define CTAGS FALSE
 141 #endif
 142
 143 /* Exit codes for success and failure.  */
 144 #ifdef VMS
 145 # define        GOOD    1
 146 # define        BAD     0
 147 #else
 148 # define        GOOD    0
 149 # define        BAD     1
 150 #endif
 151
 152 /* C extensions. */
 153 #define C_PLPL  0x00001         /* C++ */
 154 #define C_STAR  0x00003         /* C* */
 155 #define C_JAVA  0x00005         /* JAVA */
 156 #define YACC    0x10000         /* yacc file */
 157
 158 #define streq(s,t)      ((DEBUG && (s) == NULL && (t) == NULL   \
 159                           && (abort (), 1)) || !strcmp (s, t))
 160 #define strneq(s,t,n)   ((DEBUG && (s) == NULL && (t) == NULL   \
 161                           && (abort (), 1)) || !strncmp (s, t, n))
 162
 163 #define CHARS 256               /* 2^sizeof(char) */
 164 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 165 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white */
 166 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name */
 167 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token */
 168 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token */
 169 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens */
 170
 171 #define ISALNUM(c)      isalnum (CHAR(c))
 172 #define ISALPHA(c)      isalpha (CHAR(c))
 173 #define ISDIGIT(c)      isdigit (CHAR(c))
 174 #define ISLOWER(c)      islower (CHAR(c))
 175
 176 #define lowcase(c)      tolower (CHAR(c))
 177 #define upcase(c)       toupper (CHAR(c))
 178
 179
 180 /*
 181  *      xnew, xrnew -- allocate, reallocate storage
 182  *
 183  * SYNOPSIS:    Type *xnew (int n, Type);
 184  *              Type *xrnew (OldPointer, int n, Type);
 185  */
 186 #ifdef chkmalloc
 187 # include "chkmalloc.h"
 188 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 189                                                   (n) * sizeof (Type)))
 190 # define xrnew(op,n,Type) ((Type *) trace_realloc (__FILE__, __LINE__, \
 191                                                    (op), (n) * sizeof (Type)))
 192 #else
 193 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 194 # define xrnew(op,n,Type) ((Type *) xrealloc ((op), (n) * sizeof (Type)))
 195 #endif
 196
 197 typedef int bool;
 198
 199 typedef void Lang_function P_((FILE *));
 200
 201 typedef struct
 202 {
 203   char *suffix;
 204   char *command;                /* Takes one arg and decompresses to stdout */
 205 } compressor;
 206
 207 typedef struct
 208 {
 209   char *name;
 210   Lang_function *function;
 211   char **filenames;
 212   char **suffixes;
 213   char **interpreters;
 214 } language;
 215
 216 typedef struct node_st
 217 {                               /* sorting structure            */
 218   char *name;                   /* function or type name        */
 219   char *file;                   /* file name                    */
 220   bool is_func;                 /* use pattern or line no       */
 221   bool been_warned;             /* set if noticed dup           */
 222   int lno;                      /* line number tag is on        */
 223   long cno;                     /* character number line starts on */
 224   char *pat;                    /* search pattern               */
 225   struct node_st *left, *right; /* left and right sons          */
 226 } node;
 227
 228 /*
 229  * A `linebuffer' is a structure which holds a line of text.
 230  * `readline_internal' reads a line from a stream into a linebuffer
 231  * and works regardless of the length of the line.
 232  * SIZE is the size of BUFFER, LEN is the length of the string in
 233  * BUFFER after readline reads it.
 234  */
 235 typedef struct
 236 {
 237   long size;
 238   int len;
 239   char *buffer;
 240 } linebuffer;
 241
 242 /* Many compilers barf on this:
 243         Lang_function Ada_funcs;
 244    so let's write it this way */
 245 static void Ada_funcs P_((FILE *));
 246 static void Asm_labels P_((FILE *));
 247 static void C_entries P_((int c_ext, FILE *));
 248 static void default_C_entries P_((FILE *));
 249 static void plain_C_entries P_((FILE *));
 250 static void Cjava_entries P_((FILE *));
 251 static void Cobol_paragraphs P_((FILE *));
 252 static void Cplusplus_entries P_((FILE *));
 253 static void Cstar_entries P_((FILE *));
 254 static void Erlang_functions P_((FILE *));
 255 static void Fortran_functions P_((FILE *));
 256 static void Yacc_entries P_((FILE *));
 257 static void Lisp_functions P_((FILE *));
 258 static void Makefile_targets P_((FILE *));
 259 static void Pascal_functions P_((FILE *));
 260 static void Perl_functions P_((FILE *));
 261 static void Postscript_functions P_((FILE *));
 262 static void Prolog_functions P_((FILE *));
 263 static void Python_functions P_((FILE *));
 264 static void Scheme_functions P_((FILE *));
 265 static void TeX_commands P_((FILE *));
 266 static void Texinfo_nodes P_((FILE *));
 267 static void just_read_file P_((FILE *));
 268
 269 static void print_language_names P_((void));
 270 static void print_version P_((void));
 271 static void print_help P_((void));
 272 int main P_((int, char **));
 273 static int number_len P_((long));
 274
 275 static compressor *get_compressor_from_suffix P_((char *, char **));
 276 static language *get_language_from_langname P_((char *));
 277 static language *get_language_from_interpreter P_((char *));
 278 static language *get_language_from_filename P_((char *));
 279 static int total_size_of_entries P_((node *));
 280 static long readline P_((linebuffer *, FILE *));
 281 static long readline_internal P_((linebuffer *, FILE *));
 282 static void get_tag P_((char *));
 283
 284 #ifdef ETAGS_REGEXPS
 285 static void analyse_regex P_((char *, bool));
 286 static void add_regex P_((char *, bool, language *));
 287 static void free_patterns P_((void));
 288 #endif /* ETAGS_REGEXPS */
 289 static void error P_((const char *, const char *));
 290 static void suggest_asking_for_help P_((void));
 291 static void fatal P_((char *, char *));
 292 static void pfatal P_((char *));
 293 static void add_node P_((node *, node **));
 294
 295 static void init P_((void));
 296 static void initbuffer P_((linebuffer *));
 297 static void find_entries P_((char *, FILE *));
 298 static void free_tree P_((node *));
 299 static void pfnote P_((char *, bool, char *, int, int, long));
 300 static void new_pfnote P_((char *, int, bool, char *, int, int, long));
 301 static void process_file P_((char *));
 302 static void put_entries P_((node *));
 303 static void takeprec P_((void));
 304
 305 static char *concat P_((char *, char *, char *));
 306 static char *skip_spaces P_((char *));
 307 static char *skip_non_spaces P_((char *));
 308 static char *savenstr P_((char *, int));
 309 static char *savestr P_((char *));
 310 static char *etags_strchr P_((const char *, int));
 311 static char *etags_strrchr P_((const char *, int));
 312 static char *etags_getcwd P_((void));
 313 static char *relative_filename P_((char *, char *));
 314 static char *absolute_filename P_((char *, char *));
 315 static char *absolute_dirname P_((char *, char *));
 316 static bool filename_is_absolute P_((char *f));
 317 static void canonicalize_filename P_((char *));
 318 static void grow_linebuffer P_((linebuffer *, int));
 319 long *xmalloc P_((unsigned int));
 320 long *xrealloc P_((char *, unsigned int));
 321
 322 \f
 323 char searchar = '/';            /* use /.../ searches */
 324
 325 char *tagfile;                  /* output file */
 326 char *progname;                 /* name this program was invoked with */
 327 char *cwd;                      /* current working directory */
 328 char *tagfiledir;               /* directory of tagfile */
 329 FILE *tagf;                     /* ioptr for tags file */
 330
 331 char *curfile;                  /* current input file name */
 332 language *curlang;              /* current language */
 333
 334 int lineno;                     /* line number of current line */
 335 long charno;                    /* current character number */
 336 long linecharno;                /* charno of start of current line */
 337 char *dbp;                      /* pointer to start of current tag */
 338
 339 node *head;                     /* the head of the binary tree of tags */
 340
 341 linebuffer lb;                  /* the current line */
 342 linebuffer token_name;          /* used by C_entries as a temporary area */
 343 struct
 344 {
 345   long linepos;
 346   linebuffer lb;                /* used by C_entries instead of lb */
 347 } lbs[2];
 348
 349 /* boolean "functions" (see init)       */
 350 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 351 char
 352   /* white chars */
 353   *white = " \f\t\n\r\v",
 354   /* not in a name */
 355   *nonam = " \f\t\n\r(=,[;",
 356   /* token ending chars */
 357   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 358   /* token starting chars */
 359   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 360   /* valid in-token chars */
 361   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 362
 363 bool append_to_tagfile;         /* -a: append to tags */
 364 /* The following four default to TRUE for etags, but to FALSE for ctags.  */
 365 bool typedefs;                  /* -t: create tags for C and Ada typedefs */
 366 bool typedefs_and_cplusplus;    /* -T: create tags for C typedefs, level */
 367                                 /* 0 struct/enum/union decls, and C++ */
 368                                 /* member functions. */
 369 bool constantypedefs;           /* -d: create tags for C #define, enum */
 370                                 /* constants and variables. */
 371                                 /* -D: opposite of -d.  Default under ctags. */
 372 bool declarations;              /* --declarations: tag them and extern in C&Co*/
 373 bool globals;                   /* create tags for global variables */
 374 bool members;                   /* create tags for C member variables */
 375 bool update;                    /* -u: update tags */
 376 bool vgrind_style;              /* -v: create vgrind style index output */
 377 bool no_warnings;               /* -w: suppress warnings */
 378 bool cxref_style;               /* -x: create cxref style output */
 379 bool cplusplus;                 /* .[hc] means C++, not C */
 380 bool noindentypedefs;           /* -I: ignore indentation in C */
 381 bool packages_only;             /* --packages-only: in Ada, only tag packages*/
 382
 383 #ifdef LONG_OPTIONS
 384 struct option longopts[] =
 385 {
 386   { "packages-only",      no_argument,       &packages_only, TRUE  },
 387   { "append",             no_argument,       NULL,           'a'   },
 388   { "backward-search",    no_argument,       NULL,           'B'   },
 389   { "c++",                no_argument,       NULL,           'C'   },
 390   { "cxref",              no_argument,       NULL,           'x'   },
 391   { "defines",            no_argument,       NULL,           'd'   },
 392   { "declarations",       no_argument,       &declarations,  TRUE  },
 393   { "no-defines",         no_argument,       NULL,           'D'   },
 394   { "globals",            no_argument,       &globals,       TRUE  },
 395   { "no-globals",         no_argument,       &globals,       FALSE },
 396   { "help",               no_argument,       NULL,           'h'   },
 397   { "help",               no_argument,       NULL,           'H'   },
 398   { "ignore-indentation", no_argument,       NULL,           'I'   },
 399   { "include",            required_argument, NULL,           'i'   },
 400   { "language",           required_argument, NULL,           'l'   },
 401   { "members",            no_argument,       &members,       TRUE  },
 402   { "no-members",         no_argument,       &members,       FALSE },
 403   { "no-warn",            no_argument,       NULL,           'w'   },
 404   { "output",             required_argument, NULL,           'o'   },
 405 #ifdef ETAGS_REGEXPS
 406   { "regex",              required_argument, NULL,           'r'   },
 407   { "no-regex",           no_argument,       NULL,           'R'   },
 408   { "ignore-case-regex",  required_argument, NULL,           'c'   },
 409 #endif /* ETAGS_REGEXPS */
 410   { "typedefs",           no_argument,       NULL,           't'   },
 411   { "typedefs-and-c++",   no_argument,       NULL,           'T'   },
 412   { "update",             no_argument,       NULL,           'u'   },
 413   { "version",            no_argument,       NULL,           'V'   },
 414   { "vgrind",             no_argument,       NULL,           'v'   },
 415   { NULL }
 416 };
 417 #endif /* LONG_OPTIONS */
 418
 419 #ifdef ETAGS_REGEXPS
 420 /* Structure defining a regular expression.  Elements are
 421    the compiled pattern, and the name string. */
 422 typedef struct pattern
 423 {
 424   struct pattern *p_next;
 425   language *language;
 426   char *regex;
 427   struct re_pattern_buffer *pattern;
 428   struct re_registers regs;
 429   char *name_pattern;
 430   bool error_signaled;
 431 } pattern;
 432
 433 /* List of all regexps. */
 434 pattern *p_head = NULL;
 435
 436 /* How many characters in the character set.  (From regex.c.)  */
 437 #define CHAR_SET_SIZE 256
 438 /* Translation table for case-insensitive matching. */
 439 char lc_trans[CHAR_SET_SIZE];
 440 #endif /* ETAGS_REGEXPS */
 441
 442 compressor compressors[] =
 443 {
 444   { "z", "gzip -d -c"},
 445   { "Z", "gzip -d -c"},
 446   { "gz", "gzip -d -c"},
 447   { "GZ", "gzip -d -c"},
 448   { "bz2", "bzip2 -d -c" },
 449   { NULL }
 450 };
 451
 452 /*
 453  * Language stuff.
 454  */
 455
 456 /* Non-NULL if language fixed. */
 457 language *forced_lang = NULL;
 458
 459 /* Ada code */
 460 char *Ada_suffixes [] =
 461   { "ads", "adb", "ada", NULL };
 462
 463 /* Assembly code */
 464 char *Asm_suffixes [] = { "a",  /* Unix assembler */
 465                           "asm", /* Microcontroller assembly */
 466                           "def", /* BSO/Tasking definition includes  */
 467                           "inc", /* Microcontroller include files */
 468                           "ins", /* Microcontroller include files */
 469                           "s", "sa", /* Unix assembler */
 470                           "S",   /* cpp-processed Unix assembler */
 471                           "src", /* BSO/Tasking C compiler output */
 472                           NULL
 473                         };
 474
 475 /* Note that .c and .h can be considered C++, if the --c++ flag was
 476    given.  That is why default_C_entries is called here. */
 477 char *default_C_suffixes [] =
 478   { "c", "h", NULL };
 479
 480 char *Cplusplus_suffixes [] =
 481   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 482     "M",                        /* Objective C++ */
 483     "pdb",                      /* Postscript with C syntax */
 484     NULL };
 485
 486 char *Cjava_suffixes [] =
 487   { "java", NULL };
 488
 489 char *Cobol_suffixes [] =
 490   { "COB", "cob", NULL };
 491
 492 char *Cstar_suffixes [] =
 493   { "cs", "hs", NULL };
 494
 495 char *Erlang_suffixes [] =
 496   { "erl", "hrl", NULL };
 497
 498 char *Fortran_suffixes [] =
 499   { "F", "f", "f90", "for", NULL };
 500
 501 char *Lisp_suffixes [] =
 502   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 503
 504 char *Makefile_filenames [] =
 505   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 506
 507 char *Pascal_suffixes [] =
 508   { "p", "pas", NULL };
 509
 510 char *Perl_suffixes [] =
 511   { "pl", "pm", NULL };
 512 char *Perl_interpreters [] =
 513   { "perl", "@PERL@", NULL };
 514
 515 char *plain_C_suffixes [] =
 516   { "lm",                       /* Objective lex file */
 517     "m",                        /* Objective C file */
 518     "pc",                       /* Pro*C file */
 519      NULL };
 520
 521 char *Postscript_suffixes [] =
 522   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 523
 524 char *Prolog_suffixes [] =
 525   { "prolog", NULL };
 526
 527 char *Python_suffixes [] =
 528   { "py", NULL };
 529
 530 /* Can't do the `SCM' or `scm' prefix with a version number. */
 531 char *Scheme_suffixes [] =
 532   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 533
 534 char *TeX_suffixes [] =
 535   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 536
 537 char *Texinfo_suffixes [] =
 538   { "texi", "texinfo", "txi", NULL };
 539
 540 char *Yacc_suffixes [] =
 541   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 542
 543 /*
 544  * Table of languages.
 545  *
 546  * It is ok for a given function to be listed under more than one
 547  * name.  I just didn't.
 548  */
 549
 550 language lang_names [] =
 551 {
 552   { "ada",        Ada_funcs,            NULL, Ada_suffixes,             NULL },
 553   { "asm",        Asm_labels,           NULL, Asm_suffixes,             NULL },
 554   { "c",          default_C_entries,    NULL, default_C_suffixes,       NULL },
 555   { "c++",        Cplusplus_entries,    NULL, Cplusplus_suffixes,       NULL },
 556   { "c*",         Cstar_entries,        NULL, Cstar_suffixes,           NULL },
 557   { "cobol",      Cobol_paragraphs,     NULL, Cobol_suffixes,           NULL },
 558   { "erlang",     Erlang_functions,     NULL, Erlang_suffixes,          NULL },
 559   { "fortran",    Fortran_functions,    NULL, Fortran_suffixes,         NULL },
 560   { "java",       Cjava_entries,        NULL, Cjava_suffixes,           NULL },
 561   { "lisp",       Lisp_functions,       NULL, Lisp_suffixes,            NULL },
 562   { "makefile",   Makefile_targets,     Makefile_filenames, NULL,       NULL },
 563   { "pascal",     Pascal_functions,     NULL, Pascal_suffixes,          NULL },
 564   { "perl",       Perl_functions,       NULL, Perl_suffixes, Perl_interpreters },
 565   { "postscript", Postscript_functions, NULL, Postscript_suffixes,      NULL },
 566   { "proc",       plain_C_entries,      NULL, plain_C_suffixes,         NULL },
 567   { "prolog",     Prolog_functions,     NULL, Prolog_suffixes,          NULL },
 568   { "python",     Python_functions,     NULL, Python_suffixes,          NULL },
 569   { "scheme",     Scheme_functions,     NULL, Scheme_suffixes,          NULL },
 570   { "tex",        TeX_commands,         NULL, TeX_suffixes,             NULL },
 571   { "texinfo",    Texinfo_nodes,        NULL, Texinfo_suffixes,         NULL },
 572   { "yacc",       Yacc_entries,         NULL, Yacc_suffixes,            NULL },
 573   { "auto", NULL },             /* default guessing scheme */
 574   { "none", just_read_file },   /* regexp matching only */
 575   { NULL, NULL }                /* end of list */
 576 };
 577 \f
 578 static void
 579 print_language_names ()
 580 {
 581   language *lang;
 582   char **ext;
 583
 584   puts ("\nThese are the currently supported languages, along with the\n\
 585 default file name suffixes:");
 586   for (lang = lang_names; lang->name != NULL; lang++)
 587     {
 588       printf ("\t%s\t", lang->name);
 589       if (lang->suffixes != NULL)
 590         for (ext = lang->suffixes; *ext != NULL; ext++)
 591           printf (" .%s", *ext);
 592       puts ("");
 593     }
 594   puts ("Where `auto' means use default language for files based on file\n\
 595 name suffix, and `none' means only do regexp processing on files.\n\
 596 If no language is specified and no matching suffix is found,\n\
 597 the first line of the file is read for a sharp-bang (#!) sequence\n\
 598 followed by the name of an interpreter.  If no such sequence is found,\n\
 599 Fortran is tried first; if no tags are found, C is tried next.\n\
 600 Compressed files are supported using gzip and bzip2.");
 601 }
 602
 603 #ifndef EMACS_NAME
 604 # define EMACS_NAME "GNU Emacs"
 605 #endif
 606 #ifndef VERSION
 607 # define VERSION "21"
 608 #endif
 609 static void
 610 print_version ()
 611 {
 612   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 613   puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
 614   puts ("This program is distributed under the same terms as Emacs");
 615
 616   exit (GOOD);
 617 }
 618
 619 static void
 620 print_help ()
 621 {
 622   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 623 \n\
 624 These are the options accepted by %s.\n", progname, progname);
 625 #ifdef LONG_OPTIONS
 626   puts ("You may use unambiguous abbreviations for the long option names.");
 627 #else
 628   puts ("Long option names do not work with this executable, as it is not\n\
 629 linked with GNU getopt.");
 630 #endif /* LONG_OPTIONS */
 631   puts ("A - as file name means read names from stdin (one per line).");
 632   if (!CTAGS)
 633     printf ("  Absolute names are stored in the output file as they are.\n\
 634 Relative ones are stored relative to the output file's directory.");
 635   puts ("\n");
 636
 637   puts ("-a, --append\n\
 638         Append tag entries to existing tags file.");
 639
 640   puts ("--packages-only\n\
 641         For Ada files, only generate tags for packages .");
 642
 643   if (CTAGS)
 644     puts ("-B, --backward-search\n\
 645         Write the search commands for the tag entries using '?', the\n\
 646         backward-search command instead of '/', the forward-search command.");
 647
 648   puts ("-C, --c++\n\
 649         Treat files whose name suffix defaults to C language as C++ files.");
 650
 651   puts ("--declarations\n\
 652         In C and derived languages, create tags for function declarations,");
 653   if (CTAGS)
 654     puts ("\tand create tags for extern variables if --globals is used.");
 655   else
 656     puts
 657       ("\tand create tags for extern variables unless --no-globals is used.");
 658
 659   if (CTAGS)
 660     puts ("-d, --defines\n\
 661         Create tag entries for C #define constants and enum constants, too.");
 662   else
 663     puts ("-D, --no-defines\n\
 664         Don't create tag entries for C #define constants and enum constants.\n\
 665         This makes the tags file smaller.");
 666
 667   if (!CTAGS)
 668     {
 669       puts ("-i FILE, --include=FILE\n\
 670         Include a note in tag file indicating that, when searching for\n\
 671         a tag, one should also consult the tags file FILE after\n\
 672         checking the current file.");
 673       puts ("-l LANG, --language=LANG\n\
 674         Force the following files to be considered as written in the\n\
 675         named language up to the next --language=LANG option.");
 676     }
 677
 678   if (CTAGS)
 679     puts ("--globals\n\
 680         Create tag entries for global variables in some languages.");
 681   else
 682     puts ("--no-globals\n\
 683         Do not create tag entries for global variables in some\n\
 684         languages.  This makes the tags file smaller.");
 685   puts ("--members\n\
 686         Create tag entries for member variables in C and derived languages.");
 687
 688 #ifdef ETAGS_REGEXPS
 689   puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
 690         Make a tag for each line matching pattern REGEXP in the following\n\
 691         files.  {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
 692         regexfile is a file containing one REGEXP per line.\n\
 693         REGEXP is anchored (as if preceded by ^).\n\
 694         The form /REGEXP/NAME/ creates a named tag.\n\
 695         For example Tcl named tags can be created with:\n\
 696         --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
 697   puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
 698         Like -r, --regex but ignore case when matching expressions.");
 699   puts ("-R, --no-regex\n\
 700         Don't create tags from regexps for the following files.");
 701 #endif /* ETAGS_REGEXPS */
 702   puts ("-o FILE, --output=FILE\n\
 703         Write the tags to FILE.");
 704   puts ("-I, --ignore-indentation\n\
 705         Don't rely on indentation quite as much as normal.  Currently,\n\
 706         this means not to assume that a closing brace in the first\n\
 707         column is the final brace of a function or structure\n\
 708         definition in C and C++.");
 709
 710   if (CTAGS)
 711     {
 712       puts ("-t, --typedefs\n\
 713         Generate tag entries for C and Ada typedefs.");
 714       puts ("-T, --typedefs-and-c++\n\
 715         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 716         and C++ member functions.");
 717       puts ("-u, --update\n\
 718         Update the tag entries for the given files, leaving tag\n\
 719         entries for other files in place.  Currently, this is\n\
 720         implemented by deleting the existing entries for the given\n\
 721         files and then rewriting the new entries at the end of the\n\
 722         tags file.  It is often faster to simply rebuild the entire\n\
 723         tag file than to use this.");
 724       puts ("-v, --vgrind\n\
 725         Generates an index of items intended for human consumption,\n\
 726         similar to the output of vgrind.  The index is sorted, and\n\
 727         gives the page number of each item.");
 728       puts ("-w, --no-warn\n\
 729         Suppress warning messages about entries defined in multiple\n\
 730         files.");
 731       puts ("-x, --cxref\n\
 732         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 733         The output uses line numbers instead of page numbers, but\n\
 734         beyond that the differences are cosmetic; try both to see\n\
 735         which you like.");
 736     }
 737
 738   puts ("-V, --version\n\
 739         Print the version of the program.\n\
 740 -h, --help\n\
 741         Print this help message.");
 742
 743   print_language_names ();
 744
 745   puts ("");
 746   puts ("Report bugs to bug-gnu-emacs@gnu.org");
 747
 748   exit (GOOD);
 749 }
 750
 751 \f
 752 enum argument_type
 753 {
 754   at_language,
 755   at_regexp,
 756   at_filename,
 757   at_icregexp
 758 };
 759
 760 /* This structure helps us allow mixing of --lang and file names. */
 761 typedef struct
 762 {
 763   enum argument_type arg_type;
 764   char *what;
 765   language *lang;               /* language of the regexp */
 766 } argument;
 767
 768 #ifdef VMS                      /* VMS specific functions */
 769
 770 #define EOS     '\0'
 771
 772 /* This is a BUG!  ANY arbitrary limit is a BUG!
 773    Won't someone please fix this?  */
 774 #define MAX_FILE_SPEC_LEN       255
 775 typedef struct  {
 776   short   curlen;
 777   char    body[MAX_FILE_SPEC_LEN + 1];
 778 } vspec;
 779
 780 /*
 781  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
 782  returning in each successive call the next file name matching the input
 783  spec. The function expects that each in_spec passed
 784  to it will be processed to completion; in particular, up to and
 785  including the call following that in which the last matching name
 786  is returned, the function ignores the value of in_spec, and will
 787  only start processing a new spec with the following call.
 788  If an error occurs, on return out_spec contains the value
 789  of in_spec when the error occurred.
 790
 791  With each successive file name returned in out_spec, the
 792  function's return value is one. When there are no more matching
 793  names the function returns zero. If on the first call no file
 794  matches in_spec, or there is any other error, -1 is returned.
 795 */
 796
 797 #include        <rmsdef.h>
 798 #include        <descrip.h>
 799 #define         OUTSIZE MAX_FILE_SPEC_LEN
 800 static short
 801 fn_exp (out, in)
 802      vspec *out;
 803      char *in;
 804 {
 805   static long context = 0;
 806   static struct dsc$descriptor_s o;
 807   static struct dsc$descriptor_s i;
 808   static bool pass1 = TRUE;
 809   long status;
 810   short retval;
 811
 812   if (pass1)
 813     {
 814       pass1 = FALSE;
 815       o.dsc$a_pointer = (char *) out;
 816       o.dsc$w_length = (short)OUTSIZE;
 817       i.dsc$a_pointer = in;
 818       i.dsc$w_length = (short)strlen(in);
 819       i.dsc$b_dtype = DSC$K_DTYPE_T;
 820       i.dsc$b_class = DSC$K_CLASS_S;
 821       o.dsc$b_dtype = DSC$K_DTYPE_VT;
 822       o.dsc$b_class = DSC$K_CLASS_VS;
 823     }
 824   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
 825     {
 826       out->body[out->curlen] = EOS;
 827       return 1;
 828     }
 829   else if (status == RMS$_NMF)
 830     retval = 0;
 831   else
 832     {
 833       strcpy(out->body, in);
 834       retval = -1;
 835     }
 836   lib$find_file_end(&context);
 837   pass1 = TRUE;
 838   return retval;
 839 }
 840
 841 /*
 842   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
 843   name of each file specified by the provided arg expanding wildcards.
 844 */
 845 static char *
 846 gfnames (arg, p_error)
 847      char *arg;
 848      bool *p_error;
 849 {
 850   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
 851
 852   switch (fn_exp (&filename, arg))
 853     {
 854     case 1:
 855       *p_error = FALSE;
 856       return filename.body;
 857     case 0:
 858       *p_error = FALSE;
 859       return NULL;
 860     default:
 861       *p_error = TRUE;
 862       return filename.body;
 863     }
 864 }
 865
 866 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
 867 system (cmd)
 868      char *cmd;
 869 {
 870   error ("%s", "system() function not implemented under VMS");
 871 }
 872 #endif
 873
 874 #define VERSION_DELIM   ';'
 875 char *massage_name (s)
 876      char *s;
 877 {
 878   char *start = s;
 879
 880   for ( ; *s; s++)
 881     if (*s == VERSION_DELIM)
 882       {
 883         *s = EOS;
 884         break;
 885       }
 886     else
 887       *s = lowcase (*s);
 888   return start;
 889 }
 890 #endif /* VMS */
 891
 892 \f
 893 int
 894 main (argc, argv)
 895      int argc;
 896      char *argv[];
 897 {
 898   int i;
 899   unsigned int nincluded_files;
 900   char **included_files;
 901   char *this_file;
 902   argument *argbuffer;
 903   int current_arg, file_count;
 904   linebuffer filename_lb;
 905 #ifdef VMS
 906   bool got_err;
 907 #endif
 908
 909 #ifdef DOS_NT
 910   _fmode = O_BINARY;   /* all of files are treated as binary files */
 911 #endif /* DOS_NT */
 912
 913   progname = argv[0];
 914   nincluded_files = 0;
 915   included_files = xnew (argc, char *);
 916   current_arg = 0;
 917   file_count = 0;
 918
 919   /* Allocate enough no matter what happens.  Overkill, but each one
 920      is small. */
 921   argbuffer = xnew (argc, argument);
 922
 923 #ifdef ETAGS_REGEXPS
 924   /* Set syntax for regular expression routines. */
 925   re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
 926   /* Translation table for case-insensitive search. */
 927   for (i = 0; i < CHAR_SET_SIZE; i++)
 928     lc_trans[i] = lowcase (i);
 929 #endif /* ETAGS_REGEXPS */
 930
 931   /*
 932    * If etags, always find typedefs and structure tags.  Why not?
 933    * Also default is to find macro constants, enum constants and
 934    * global variables.
 935    */
 936   if (!CTAGS)
 937     {
 938       typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
 939       globals = TRUE;
 940       members = FALSE;
 941     }
 942
 943   while (1)
 944     {
 945       int opt;
 946       char *optstring;
 947
 948 #ifdef ETAGS_REGEXPS
 949       optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
 950 #else
 951       optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
 952 #endif /* ETAGS_REGEXPS */
 953
 954 #ifndef LONG_OPTIONS
 955       optstring = optstring + 1;
 956 #endif /* LONG_OPTIONS */
 957
 958       opt = getopt_long (argc, argv, optstring, longopts, 0);
 959       if (opt == EOF)
 960         break;
 961
 962       switch (opt)
 963         {
 964         case 0:
 965           /* If getopt returns 0, then it has already processed a
 966              long-named option.  We should do nothing.  */
 967           break;
 968
 969         case 1:
 970           /* This means that a file name has been seen.  Record it. */
 971           argbuffer[current_arg].arg_type = at_filename;
 972           argbuffer[current_arg].what = optarg;
 973           ++current_arg;
 974           ++file_count;
 975           break;
 976
 977           /* Common options. */
 978         case 'a': append_to_tagfile = TRUE;     break;
 979         case 'C': cplusplus = TRUE;             break;
 980         case 'd': constantypedefs = TRUE;       break;
 981         case 'D': constantypedefs = FALSE;      break;
 982         case 'f':               /* for compatibility with old makefiles */
 983         case 'o':
 984           if (tagfile)
 985             {
 986               error ("-o option may only be given once.", (char *)NULL);
 987               suggest_asking_for_help ();
 988             }
 989           tagfile = optarg;
 990           break;
 991         case 'I':
 992         case 'S':               /* for backward compatibility */
 993           noindentypedefs = TRUE;
 994           break;
 995         case 'l':
 996           {
 997             language *lang = get_language_from_langname (optarg);
 998             if (lang != NULL)
 999               {
1000                 argbuffer[current_arg].lang = lang;
1001                 argbuffer[current_arg].arg_type = at_language;
1002                 ++current_arg;
1003               }
1004           }
1005           break;
1006 #ifdef ETAGS_REGEXPS
1007         case 'r':
1008           argbuffer[current_arg].arg_type = at_regexp;
1009           argbuffer[current_arg].what = optarg;
1010           ++current_arg;
1011           break;
1012         case 'R':
1013           argbuffer[current_arg].arg_type = at_regexp;
1014           argbuffer[current_arg].what = NULL;
1015           ++current_arg;
1016           break;
1017         case 'c':
1018           argbuffer[current_arg].arg_type = at_icregexp;
1019           argbuffer[current_arg].what = optarg;
1020           ++current_arg;
1021           break;
1022 #endif /* ETAGS_REGEXPS */
1023         case 'V':
1024           print_version ();
1025           break;
1026         case 'h':
1027         case 'H':
1028           print_help ();
1029           break;
1030         case 't':
1031           typedefs = TRUE;
1032           break;
1033         case 'T':
1034           typedefs = typedefs_and_cplusplus = TRUE;
1035           break;
1036 #if (!CTAGS)
1037           /* Etags options */
1038         case 'i':
1039           included_files[nincluded_files++] = optarg;
1040           break;
1041 #else /* CTAGS */
1042           /* Ctags options. */
1043         case 'B': searchar = '?';       break;
1044         case 'u': update = TRUE;        break;
1045         case 'v': vgrind_style = TRUE;  /*FALLTHRU*/
1046         case 'x': cxref_style = TRUE;   break;
1047         case 'w': no_warnings = TRUE;   break;
1048 #endif /* CTAGS */
1049         default:
1050           suggest_asking_for_help ();
1051         }
1052     }
1053
1054   for (; optind < argc; ++optind)
1055     {
1056       argbuffer[current_arg].arg_type = at_filename;
1057       argbuffer[current_arg].what = argv[optind];
1058       ++current_arg;
1059       ++file_count;
1060     }
1061
1062   if (nincluded_files == 0 && file_count == 0)
1063     {
1064       error ("no input files specified.", (char *)NULL);
1065       suggest_asking_for_help ();
1066     }
1067
1068   if (tagfile == NULL)
1069     tagfile = CTAGS ? "tags" : "TAGS";
1070   cwd = etags_getcwd ();        /* the current working directory */
1071   if (cwd[strlen (cwd) - 1] != '/')
1072     {
1073       char *oldcwd = cwd;
1074       cwd = concat (oldcwd, "/", "");
1075       free (oldcwd);
1076     }
1077   if (streq (tagfile, "-"))
1078     tagfiledir = cwd;
1079   else
1080     tagfiledir = absolute_dirname (tagfile, cwd);
1081
1082   init ();                      /* set up boolean "functions" */
1083
1084   initbuffer (&lb);
1085   initbuffer (&token_name);
1086   initbuffer (&lbs[0].lb);
1087   initbuffer (&lbs[1].lb);
1088   initbuffer (&filename_lb);
1089
1090   if (!CTAGS)
1091     {
1092       if (streq (tagfile, "-"))
1093         {
1094           tagf = stdout;
1095 #ifdef DOS_NT
1096           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1097              doesn't take effect until after `stdout' is already open). */
1098           if (!isatty (fileno (stdout)))
1099             setmode (fileno (stdout), O_BINARY);
1100 #endif /* DOS_NT */
1101         }
1102       else
1103         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1104       if (tagf == NULL)
1105         pfatal (tagfile);
1106     }
1107
1108   /*
1109    * Loop through files finding functions.
1110    */
1111   for (i = 0; i < current_arg; ++i)
1112     {
1113       switch (argbuffer[i].arg_type)
1114         {
1115         case at_language:
1116           forced_lang = argbuffer[i].lang;
1117           break;
1118 #ifdef ETAGS_REGEXPS
1119         case at_regexp:
1120           analyse_regex (argbuffer[i].what, FALSE);
1121           break;
1122         case at_icregexp:
1123           analyse_regex (argbuffer[i].what, TRUE);
1124           break;
1125 #endif
1126         case at_filename:
1127 #ifdef VMS
1128           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1129             {
1130               if (got_err)
1131                 {
1132                   error ("can't find file %s\n", this_file);
1133                   argc--, argv++;
1134                 }
1135               else
1136                 {
1137                   this_file = massage_name (this_file);
1138                 }
1139 #else
1140               this_file = argbuffer[i].what;
1141 #endif
1142               /* Input file named "-" means read file names from stdin
1143                  (one per line) and use them. */
1144               if (streq (this_file, "-"))
1145                 while (readline_internal (&filename_lb, stdin) > 0)
1146                   process_file (filename_lb.buffer);
1147               else
1148                 process_file (this_file);
1149 #ifdef VMS
1150             }
1151 #endif
1152           break;
1153         }
1154     }
1155
1156 #ifdef ETAGS_REGEXPS
1157   free_patterns ();
1158 #endif /* ETAGS_REGEXPS */
1159
1160   if (!CTAGS)
1161     {
1162       while (nincluded_files-- > 0)
1163         fprintf (tagf, "\f\n%s,include\n", *included_files++);
1164
1165       fclose (tagf);
1166       exit (GOOD);
1167     }
1168
1169   /* If CTAGS, we are here.  process_file did not write the tags yet,
1170      because we want them ordered.  Let's do it now. */
1171   if (cxref_style)
1172     {
1173       put_entries (head);
1174       free_tree (head);
1175       head = NULL;
1176       exit (GOOD);
1177     }
1178
1179   if (update)
1180     {
1181       char cmd[BUFSIZ];
1182       for (i = 0; i < current_arg; ++i)
1183         {
1184           if (argbuffer[i].arg_type != at_filename)
1185             continue;
1186           sprintf (cmd,
1187                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1188                    tagfile, argbuffer[i].what, tagfile);
1189           if (system (cmd) != GOOD)
1190             fatal ("failed to execute shell command", (char *)NULL);
1191         }
1192       append_to_tagfile = TRUE;
1193     }
1194
1195   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1196   if (tagf == NULL)
1197     pfatal (tagfile);
1198   put_entries (head);
1199   free_tree (head);
1200   head = NULL;
1201   fclose (tagf);
1202
1203   if (update)
1204     {
1205       char cmd[BUFSIZ];
1206       sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1207       exit (system (cmd));
1208     }
1209   return GOOD;
1210 }
1211
1212
1213
1214 /*
1215  * Return a compressor given the file name.  If EXTPTR is non-zero,
1216  * return a pointer into FILE where the compressor-specific
1217  * extension begins.  If no compressor is found, NULL is returned
1218  * and EXTPTR is not significant.
1219  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca>
1220  */
1221 static compressor *
1222 get_compressor_from_suffix (file, extptr)
1223      char *file;
1224      char **extptr;
1225 {
1226   compressor *compr;
1227   char *slash, *suffix;
1228
1229   /* This relies on FN to be after canonicalize_filename,
1230      so we don't need to consider backslashes on DOS_NT.  */
1231   slash = etags_strrchr (file, '/');
1232   suffix = etags_strrchr (file, '.');
1233   if (suffix == NULL || suffix < slash)
1234     return NULL;
1235   if (extptr != NULL)
1236     *extptr = suffix;
1237   suffix += 1;
1238   /* Let those poor souls who live with DOS 8+3 file name limits get
1239      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1240      Only the first do loop is run if not MSDOS */
1241   do
1242     {
1243       for (compr = compressors; compr->suffix != NULL; compr++)
1244         if (streq (compr->suffix, suffix))
1245           return compr;
1246       if (!MSDOS)
1247         break;                  /* do it only once: not really a loop */
1248       if (extptr != NULL)
1249         *extptr = ++suffix;
1250     } while (*suffix != '\0');
1251   return NULL;
1252 }
1253
1254
1255
1256 /*
1257  * Return a language given the name.
1258  */
1259 static language *
1260 get_language_from_langname (name)
1261      char *name;
1262 {
1263   language *lang;
1264
1265   if (name == NULL)
1266     error ("empty language name", (char *)NULL);
1267   else
1268     {
1269       for (lang = lang_names; lang->name != NULL; lang++)
1270         if (streq (name, lang->name))
1271           return lang;
1272       error ("unknown language \"%s\"", name);
1273     }
1274
1275   return NULL;
1276 }
1277
1278
1279 /*
1280  * Return a language given the interpreter name.
1281  */
1282 static language *
1283 get_language_from_interpreter (interpreter)
1284      char *interpreter;
1285 {
1286   language *lang;
1287   char **iname;
1288
1289   if (interpreter == NULL)
1290     return NULL;
1291   for (lang = lang_names; lang->name != NULL; lang++)
1292     if (lang->interpreters != NULL)
1293       for (iname = lang->interpreters; *iname != NULL; iname++)
1294         if (streq (*iname, interpreter))
1295             return lang;
1296
1297   return NULL;
1298 }
1299
1300
1301
1302 /*
1303  * Return a language given the file name.
1304  */
1305 static language *
1306 get_language_from_filename (file)
1307      char *file;
1308 {
1309   language *lang;
1310   char **name, **ext, *suffix;
1311
1312   /* Try whole file name first. */
1313   for (lang = lang_names; lang->name != NULL; lang++)
1314     if (lang->filenames != NULL)
1315       for (name = lang->filenames; *name != NULL; name++)
1316         if (streq (*name, file))
1317           return lang;
1318
1319   /* If not found, try suffix after last dot. */
1320   suffix = etags_strrchr (file, '.');
1321   if (suffix == NULL)
1322     return NULL;
1323   suffix += 1;
1324   for (lang = lang_names; lang->name != NULL; lang++)
1325     if (lang->suffixes != NULL)
1326       for (ext = lang->suffixes; *ext != NULL; ext++)
1327         if (streq (*ext, suffix))
1328           return lang;
1329   return NULL;
1330 }
1331
1332
1333
1334 /*
1335  * This routine is called on each file argument.
1336  */
1337 static void
1338 process_file (file)
1339      char *file;
1340 {
1341   struct stat stat_buf;
1342   FILE *inf;
1343   compressor *compr;
1344   char *compressed_name, *uncompressed_name;
1345   char *ext, *real_name;
1346
1347
1348   canonicalize_filename (file);
1349   if (streq (file, tagfile) && !streq (tagfile, "-"))
1350     {
1351       error ("skipping inclusion of %s in self.", file);
1352       return;
1353     }
1354   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1355     {
1356       compressed_name = NULL;
1357       real_name = uncompressed_name = savestr (file);
1358     }
1359   else
1360     {
1361       real_name = compressed_name = savestr (file);
1362       uncompressed_name = savenstr (file, ext - file);
1363     }
1364
1365   /* If the canonicalised uncompressed name has already be dealt with,
1366      skip it silently, else add it to the list. */
1367   {
1368     typedef struct processed_file
1369     {
1370       char *filename;
1371       struct processed_file *next;
1372     } processed_file;
1373     static processed_file *pf_head = NULL;
1374     register processed_file *fnp;
1375
1376     for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1377       if (streq (uncompressed_name, fnp->filename))
1378         goto exit;
1379     fnp = pf_head;
1380     pf_head = xnew (1, struct processed_file);
1381     pf_head->filename = savestr (uncompressed_name);
1382     pf_head->next = fnp;
1383   }
1384
1385   if (stat (real_name, &stat_buf) != 0)
1386     {
1387       /* Reset real_name and try with a different name. */
1388       real_name = NULL;
1389       if (compressed_name != NULL) /* try with the given suffix */
1390         {
1391           if (stat (uncompressed_name, &stat_buf) == 0)
1392             real_name = uncompressed_name;
1393         }
1394       else                      /* try all possible suffixes */
1395         {
1396           for (compr = compressors; compr->suffix != NULL; compr++)
1397             {
1398               compressed_name = concat (file, ".", compr->suffix);
1399               if (stat (compressed_name, &stat_buf) != 0)
1400                 {
1401                   if (MSDOS)
1402                     {
1403                       char *suf = compressed_name + strlen (file);
1404                       size_t suflen = strlen (compr->suffix) + 1;
1405                       for ( ; suf[1]; suf++, suflen--)
1406                         {
1407                           memmove (suf, suf + 1, suflen);
1408                           if (stat (compressed_name, &stat_buf) == 0)
1409                             {
1410                               real_name = compressed_name;
1411                               break;
1412                             }
1413                         }
1414                       if (real_name != NULL)
1415                         break;
1416                     } /* MSDOS */
1417                   free (compressed_name);
1418                   compressed_name = NULL;
1419                 }
1420               else
1421                 {
1422                   real_name = compressed_name;
1423                   break;
1424                 }
1425             }
1426         }
1427       if (real_name == NULL)
1428         {
1429           perror (file);
1430           goto exit;
1431         }
1432     } /* try with a different name */
1433
1434   if (!S_ISREG (stat_buf.st_mode))
1435     {
1436       error ("skipping %s: it is not a regular file.", real_name);
1437       goto exit;
1438     }
1439   if (real_name == compressed_name)
1440     {
1441       char *cmd = concat (compr->command, " ", real_name);
1442       inf = popen (cmd, "r");
1443       free (cmd);
1444     }
1445   else
1446     inf = fopen (real_name, "r");
1447   if (inf == NULL)
1448     {
1449       perror (real_name);
1450       goto exit;
1451     }
1452
1453   find_entries (uncompressed_name, inf);
1454
1455   if (real_name == compressed_name)
1456     pclose (inf);
1457   else
1458     fclose (inf);
1459
1460   if (!CTAGS)
1461     {
1462       char *filename;
1463
1464       if (filename_is_absolute (uncompressed_name))
1465         {
1466           /* file is an absolute file name.  Canonicalise it. */
1467           filename = absolute_filename (uncompressed_name, cwd);
1468         }
1469       else
1470         {
1471           /* file is a file name relative to cwd.  Make it relative
1472              to the directory of the tags file. */
1473           filename = relative_filename (uncompressed_name, tagfiledir);
1474         }
1475       fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1476       free (filename);
1477       put_entries (head);
1478       free_tree (head);
1479       head = NULL;
1480     }
1481
1482  exit:
1483   if (compressed_name) free(compressed_name);
1484   if (uncompressed_name) free(uncompressed_name);
1485   return;
1486 }
1487
1488 /*
1489  * This routine sets up the boolean pseudo-functions which work
1490  * by setting boolean flags dependent upon the corresponding character.
1491  * Every char which is NOT in that string is not a white char.  Therefore,
1492  * all of the array "_wht" is set to FALSE, and then the elements
1493  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1494  * of a char is TRUE if it is the string "white", else FALSE.
1495  */
1496 static void
1497 init ()
1498 {
1499   register char *sp;
1500   register int i;
1501
1502   for (i = 0; i < CHARS; i++)
1503     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1504   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1505   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1506   notinname('\0') = notinname('\n');
1507   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1508   begtoken('\0') = begtoken('\n');
1509   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1510   intoken('\0') = intoken('\n');
1511   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1512   endtoken('\0') = endtoken('\n');
1513 }
1514
1515 /*
1516  * This routine opens the specified file and calls the function
1517  * which finds the function and type definitions.
1518  */
1519 node *last_node = NULL;
1520
1521 static void
1522 find_entries (file, inf)
1523      char *file;
1524      FILE *inf;
1525 {
1526   char *cp;
1527   language *lang;
1528   node *old_last_node;
1529
1530   /* Memory leakage here: the string pointed by curfile is
1531      never released, because curfile is copied into np->file
1532      for each node, to be used in CTAGS mode.  The amount of
1533      memory leaked here is the sum of the lengths of the
1534      file names. */
1535   curfile = savestr (file);
1536
1537   /* If user specified a language, use it. */
1538   lang = forced_lang;
1539   if (lang != NULL && lang->function != NULL)
1540     {
1541       curlang = lang;
1542       lang->function (inf);
1543       return;
1544     }
1545
1546   /* Try to guess the language given the file name. */
1547   lang = get_language_from_filename (file);
1548   if (lang != NULL && lang->function != NULL)
1549     {
1550       curlang = lang;
1551       lang->function (inf);
1552       return;
1553     }
1554
1555   /* Look for sharp-bang as the first two characters. */
1556   if (readline_internal (&lb, inf) > 0
1557       && lb.len >= 2
1558       && lb.buffer[0] == '#'
1559       && lb.buffer[1] == '!')
1560     {
1561       char *lp;
1562
1563       /* Set lp to point at the first char after the last slash in the
1564          line or, if no slashes, at the first nonblank.  Then set cp to
1565          the first successive blank and terminate the string. */
1566       lp = etags_strrchr (lb.buffer+2, '/');
1567       if (lp != NULL)
1568         lp += 1;
1569       else
1570         lp = skip_spaces (lb.buffer + 2);
1571       cp = skip_non_spaces (lp);
1572       *cp = '\0';
1573
1574       if (strlen (lp) > 0)
1575         {
1576           lang = get_language_from_interpreter (lp);
1577           if (lang != NULL && lang->function != NULL)
1578             {
1579               curlang = lang;
1580               lang->function (inf);
1581               return;
1582             }
1583         }
1584     }
1585   /* We rewind here, even if inf may be a pipe.  We fail if the
1586      length of the first line is longer than the pipe block size,
1587      which is unlikely. */
1588   rewind (inf);
1589
1590   /* Try Fortran. */
1591   old_last_node = last_node;
1592   curlang = get_language_from_langname ("fortran");
1593   Fortran_functions (inf);
1594
1595   /* No Fortran entries found.  Try C. */
1596   if (old_last_node == last_node)
1597     {
1598       /* We do not tag if rewind fails.
1599          Only the file name will be recorded in the tags file. */
1600       rewind (inf);
1601       curlang = get_language_from_langname (cplusplus ? "c++" : "c");
1602       default_C_entries (inf);
1603     }
1604   return;
1605 }
1606 \f
1607 /* Record a tag. */
1608 static void
1609 pfnote (name, is_func, linestart, linelen, lno, cno)
1610      char *name;                /* tag name, or NULL if unnamed */
1611      bool is_func;              /* tag is a function */
1612      char *linestart;           /* start of the line where tag is */
1613      int linelen;               /* length of the line where tag is */
1614      int lno;                   /* line number */
1615      long cno;                  /* character number */
1616 {
1617   register node *np;
1618
1619   if (CTAGS && name == NULL)
1620     return;
1621
1622   np = xnew (1, node);
1623
1624   /* If ctags mode, change name "main" to M<thisfilename>. */
1625   if (CTAGS && !cxref_style && streq (name, "main"))
1626     {
1627       register char *fp = etags_strrchr (curfile, '/');
1628       np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1629       fp = etags_strrchr (np->name, '.');
1630       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1631         fp[0] = '\0';
1632     }
1633   else
1634     np->name = name;
1635   np->been_warned = FALSE;
1636   np->file = curfile;
1637   np->is_func = is_func;
1638   np->lno = lno;
1639   /* Our char numbers are 0-base, because of C language tradition?
1640      ctags compatibility?  old versions compatibility?   I don't know.
1641      Anyway, since emacs's are 1-base we expect etags.el to take care
1642      of the difference.  If we wanted to have 1-based numbers, we would
1643      uncomment the +1 below. */
1644   np->cno = cno /* + 1 */ ;
1645   np->left = np->right = NULL;
1646   if (CTAGS && !cxref_style)
1647     {
1648       if (strlen (linestart) < 50)
1649         np->pat = concat (linestart, "$", "");
1650       else
1651         np->pat = savenstr (linestart, 50);
1652     }
1653   else
1654     np->pat = savenstr (linestart, linelen);
1655
1656   add_node (np, &head);
1657 }
1658
1659 /* Date: Wed, 22 Jan 1997 02:56:31 -0500 [last amended 18 Sep 1997]
1660  * From: Sam Kendall <kendall@mv.mv.com>
1661  * Subject: Proposal for firming up the TAGS format specification
1662  * To: F.Potorti@cnuce.cnr.it
1663  *
1664  * pfnote should emit the optimized form [unnamed tag] only if:
1665  *  1. name does not contain any of the characters " \t\r\n(),;";
1666  *  2. linestart contains name as either a rightmost, or rightmost but
1667  *     one character, substring;
1668  *  3. the character, if any, immediately before name in linestart must
1669  *     be one of the characters " \t(),;";
1670  *  4. the character, if any, immediately after name in linestart must
1671  *     also be one of the characters " \t(),;".
1672  *
1673  * The real implementation uses the notinname() macro, which recognises
1674  * characters slightly different form " \t\r\n(),;".  See the variable
1675  * `nonam'.
1676  */
1677 #define traditional_tag_style TRUE
1678 static void
1679 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1680      char *name;                /* tag name, or NULL if unnamed */
1681      int namelen;               /* tag length */
1682      bool is_func;              /* tag is a function */
1683      char *linestart;           /* start of the line where tag is */
1684      int linelen;               /* length of the line where tag is */
1685      int lno;                   /* line number */
1686      long cno;                  /* character number */
1687 {
1688   register char *cp;
1689   bool named;
1690
1691   named = TRUE;
1692   if (!CTAGS)
1693     {
1694       for (cp = name; !notinname (*cp); cp++)
1695         continue;
1696       if (*cp == '\0')                          /* rule #1 */
1697         {
1698           cp = linestart + linelen - namelen;
1699           if (notinname (linestart[linelen-1]))
1700             cp -= 1;                            /* rule #4 */
1701           if (cp >= linestart                   /* rule #2 */
1702               && (cp == linestart
1703                   || notinname (cp[-1]))        /* rule #3 */
1704               && strneq (name, cp, namelen))    /* rule #2 */
1705             named = FALSE;      /* use unnamed tag */
1706         }
1707     }
1708
1709   if (named)
1710     name = savenstr (name, namelen);
1711   else
1712     name = NULL;
1713   pfnote (name, is_func, linestart, linelen, lno, cno);
1714 }
1715
1716 /*
1717  * free_tree ()
1718  *      recurse on left children, iterate on right children.
1719  */
1720 static void
1721 free_tree (np)
1722      register node *np;
1723 {
1724   while (np)
1725     {
1726       register node *node_right = np->right;
1727       free_tree (np->left);
1728       if (np->name != NULL)
1729         free (np->name);
1730       free (np->pat);
1731       free (np);
1732       np = node_right;
1733     }
1734 }
1735
1736 /*
1737  * add_node ()
1738  *      Adds a node to the tree of nodes.  In etags mode, we don't keep
1739  *      it sorted; we just keep a linear list.  In ctags mode, maintain
1740  *      an ordered tree, with no attempt at balancing.
1741  *
1742  *      add_node is the only function allowed to add nodes, so it can
1743  *      maintain state.
1744  */
1745 static void
1746 add_node (np, cur_node_p)
1747      node *np, **cur_node_p;
1748 {
1749   register int dif;
1750   register node *cur_node = *cur_node_p;
1751
1752   if (cur_node == NULL)
1753     {
1754       *cur_node_p = np;
1755       last_node = np;
1756       return;
1757     }
1758
1759   if (!CTAGS)
1760     {
1761       /* Etags Mode */
1762       if (last_node == NULL)
1763         fatal ("internal error in add_node", (char *)NULL);
1764       last_node->right = np;
1765       last_node = np;
1766     }
1767   else
1768     {
1769       /* Ctags Mode */
1770       dif = strcmp (np->name, cur_node->name);
1771
1772       /*
1773        * If this tag name matches an existing one, then
1774        * do not add the node, but maybe print a warning.
1775        */
1776       if (!dif)
1777         {
1778           if (streq (np->file, cur_node->file))
1779             {
1780               if (!no_warnings)
1781                 {
1782                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1783                            np->file, lineno, np->name);
1784                   fprintf (stderr, "Second entry ignored\n");
1785                 }
1786             }
1787           else if (!cur_node->been_warned && !no_warnings)
1788             {
1789               fprintf
1790                 (stderr,
1791                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
1792                  np->file, cur_node->file, np->name);
1793               cur_node->been_warned = TRUE;
1794             }
1795           return;
1796         }
1797
1798       /* Actually add the node */
1799       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1800     }
1801 }
1802 \f
1803 static void
1804 put_entries (np)
1805      register node *np;
1806 {
1807   register char *sp;
1808
1809   if (np == NULL)
1810     return;
1811
1812   /* Output subentries that precede this one */
1813   put_entries (np->left);
1814
1815   /* Output this entry */
1816
1817   if (!CTAGS)
1818     {
1819       if (np->name != NULL)
1820         fprintf (tagf, "%s\177%s\001%d,%ld\n",
1821                  np->pat, np->name, np->lno, np->cno);
1822       else
1823         fprintf (tagf, "%s\177%d,%ld\n",
1824                  np->pat, np->lno, np->cno);
1825     }
1826   else
1827     {
1828       if (np->name == NULL)
1829         error ("internal error: NULL name in ctags mode.", (char *)NULL);
1830
1831       if (cxref_style)
1832         {
1833           if (vgrind_style)
1834             fprintf (stdout, "%s %s %d\n",
1835                      np->name, np->file, (np->lno + 63) / 64);
1836           else
1837             fprintf (stdout, "%-16s %3d %-16s %s\n",
1838                      np->name, np->lno, np->file, np->pat);
1839         }
1840       else
1841         {
1842           fprintf (tagf, "%s\t%s\t", np->name, np->file);
1843
1844           if (np->is_func)
1845             {                   /* a function */
1846               putc (searchar, tagf);
1847               putc ('^', tagf);
1848
1849               for (sp = np->pat; *sp; sp++)
1850                 {
1851                   if (*sp == '\\' || *sp == searchar)
1852                     putc ('\\', tagf);
1853                   putc (*sp, tagf);
1854                 }
1855               putc (searchar, tagf);
1856             }
1857           else
1858             {                   /* a typedef; text pattern inadequate */
1859               fprintf (tagf, "%d", np->lno);
1860             }
1861           putc ('\n', tagf);
1862         }
1863     }
1864
1865   /* Output subentries that follow this one */
1866   put_entries (np->right);
1867 }
1868
1869 /* Length of a number's decimal representation. */
1870 static int
1871 number_len (num)
1872      long num;
1873 {
1874   int len = 1;
1875   while ((num /= 10) > 0)
1876     len += 1;
1877   return len;
1878 }
1879
1880 /*
1881  * Return total number of characters that put_entries will output for
1882  * the nodes in the subtree of the specified node.  Works only if
1883  * we are not ctags, but called only in that case.  This count
1884  * is irrelevant with the new tags.el, but is still supplied for
1885  * backward compatibility.
1886  */
1887 static int
1888 total_size_of_entries (np)
1889      register node *np;
1890 {
1891   register int total;
1892
1893   if (np == NULL)
1894     return 0;
1895
1896   for (total = 0; np != NULL; np = np->right)
1897     {
1898       /* Count left subentries. */
1899       total += total_size_of_entries (np->left);
1900
1901       /* Count this entry */
1902       total += strlen (np->pat) + 1;
1903       total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1904       if (np->name != NULL)
1905         total += 1 + strlen (np->name); /* \001name */
1906     }
1907
1908   return total;
1909 }
1910 \f
1911 /*
1912  * The C symbol tables.
1913  */
1914 enum sym_type
1915 {
1916   st_none,
1917   st_C_objprot, st_C_objimpl, st_C_objend,
1918   st_C_gnumacro,
1919   st_C_ignore,
1920   st_C_javastruct,
1921   st_C_operator,
1922   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1923 };
1924
1925 static unsigned int hash P_((const char *, unsigned int));
1926 static struct C_stab_entry * in_word_set P_((const char *, unsigned int));
1927 static enum sym_type C_symtype P_((char *, int, int));
1928
1929 /* Feed stuff between (but not including) %[ and %] lines to:
1930       gperf -c -k 1,3 -o -p -r -t
1931 %[
1932 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1933 %%
1934 if,             0,      st_C_ignore
1935 for,            0,      st_C_ignore
1936 while,          0,      st_C_ignore
1937 switch,         0,      st_C_ignore
1938 return,         0,      st_C_ignore
1939 @interface,     0,      st_C_objprot
1940 @protocol,      0,      st_C_objprot
1941 @implementation,0,      st_C_objimpl
1942 @end,           0,      st_C_objend
1943 import,         C_JAVA, st_C_ignore
1944 package,        C_JAVA, st_C_ignore
1945 friend,         C_PLPL, st_C_ignore
1946 extends,        C_JAVA, st_C_javastruct
1947 implements,     C_JAVA, st_C_javastruct
1948 interface,      C_JAVA, st_C_struct
1949 class,          C_PLPL, st_C_struct
1950 namespace,      C_PLPL, st_C_struct
1951 domain,         C_STAR, st_C_struct
1952 union,          0,      st_C_struct
1953 struct,         0,      st_C_struct
1954 extern,         0,      st_C_extern
1955 enum,           0,      st_C_enum
1956 typedef,        0,      st_C_typedef
1957 define,         0,      st_C_define
1958 operator,       C_PLPL, st_C_operator
1959 bool,           C_PLPL, st_C_typespec
1960 long,           0,      st_C_typespec
1961 short,          0,      st_C_typespec
1962 int,            0,      st_C_typespec
1963 char,           0,      st_C_typespec
1964 float,          0,      st_C_typespec
1965 double,         0,      st_C_typespec
1966 signed,         0,      st_C_typespec
1967 unsigned,       0,      st_C_typespec
1968 auto,           0,      st_C_typespec
1969 void,           0,      st_C_typespec
1970 static,         0,      st_C_typespec
1971 const,          0,      st_C_typespec
1972 volatile,       0,      st_C_typespec
1973 explicit,       C_PLPL, st_C_typespec
1974 mutable,        C_PLPL, st_C_typespec
1975 typename,       C_PLPL, st_C_typespec
1976 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
1977 DEFUN,          0,      st_C_gnumacro
1978 SYSCALL,        0,      st_C_gnumacro
1979 ENTRY,          0,      st_C_gnumacro
1980 PSEUDO,         0,      st_C_gnumacro
1981 # These are defined inside C functions, so currently they are not met.
1982 # EXFUN used in glibc, DEFVAR_* in emacs.
1983 #EXFUN,         0,      st_C_gnumacro
1984 #DEFVAR_,       0,      st_C_gnumacro
1985 %]
1986 and replace lines between %< and %> with its output. */
1987 /*%<*/
1988 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
1989 /* Command-line: gperf -c -k 1,3 -o -p -r -t  */
1990 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
1991
1992 #define TOTAL_KEYWORDS 46
1993 #define MIN_WORD_LENGTH 2
1994 #define MAX_WORD_LENGTH 15
1995 #define MIN_HASH_VALUE 13
1996 #define MAX_HASH_VALUE 123
1997 /* maximum key range = 111, duplicates = 0 */
1998
1999 #ifdef __GNUC__
2000 __inline
2001 #endif
2002 static unsigned int
2003 hash (str, len)
2004      register const char *str;
2005      register unsigned int len;
2006 {
2007   static unsigned char asso_values[] =
2008     {
2009       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2010       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2011       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2012       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2013       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2014       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2015       124, 124, 124, 124,   3, 124, 124, 124,  43,   6,
2016        11, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2017        11, 124, 124,  58,   7, 124, 124, 124, 124, 124,
2018       124, 124, 124, 124, 124, 124, 124,  57,   7,  42,
2019         4,  14,  52,   0, 124,  53, 124, 124,  29,  11,
2020         6,  35,  32, 124,  29,  34,  59,  58,  51,  24,
2021       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2022       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2023       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2024       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2025       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2026       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2027       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2028       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2029       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2030       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2031       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2032       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2033       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2034       124, 124, 124, 124, 124, 124
2035     };
2036   register int hval = len;
2037
2038   switch (hval)
2039     {
2040       default:
2041       case 3:
2042         hval += asso_values[(unsigned char)str[2]];
2043       case 2:
2044       case 1:
2045         hval += asso_values[(unsigned char)str[0]];
2046         break;
2047     }
2048   return hval;
2049 }
2050
2051 #ifdef __GNUC__
2052 __inline
2053 #endif
2054 static struct C_stab_entry *
2055 in_word_set (str, len)
2056      register const char *str;
2057      register unsigned int len;
2058 {
2059   static struct C_stab_entry wordlist[] =
2060     {
2061       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2062       {""}, {""}, {""}, {""},
2063       {"@end",          0,      st_C_objend},
2064       {""}, {""}, {""}, {""},
2065       {"ENTRY",         0,      st_C_gnumacro},
2066       {"@interface",    0,      st_C_objprot},
2067       {""},
2068       {"domain",        C_STAR, st_C_struct},
2069       {""},
2070       {"PSEUDO",                0,      st_C_gnumacro},
2071       {""}, {""},
2072       {"namespace",     C_PLPL, st_C_struct},
2073       {""}, {""},
2074       {"@implementation",0,     st_C_objimpl},
2075       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2076       {"long",          0,      st_C_typespec},
2077       {"signed",        0,      st_C_typespec},
2078       {"@protocol",     0,      st_C_objprot},
2079       {""}, {""}, {""}, {""},
2080       {"bool",          C_PLPL, st_C_typespec},
2081       {""}, {""}, {""}, {""}, {""}, {""},
2082       {"const",         0,      st_C_typespec},
2083       {"explicit",      C_PLPL, st_C_typespec},
2084       {"if",            0,      st_C_ignore},
2085       {""},
2086       {"operator",      C_PLPL, st_C_operator},
2087       {""},
2088       {"DEFUN",         0,      st_C_gnumacro},
2089       {""}, {""},
2090       {"define",        0,      st_C_define},
2091       {""}, {""}, {""}, {""}, {""},
2092       {"double",        0,      st_C_typespec},
2093       {"struct",        0,      st_C_struct},
2094       {""}, {""}, {""}, {""},
2095       {"short",         0,      st_C_typespec},
2096       {""},
2097       {"enum",          0,      st_C_enum},
2098       {"mutable",       C_PLPL, st_C_typespec},
2099       {""},
2100       {"extern",        0,      st_C_extern},
2101       {"extends",       C_JAVA, st_C_javastruct},
2102       {"package",       C_JAVA, st_C_ignore},
2103       {"while",         0,      st_C_ignore},
2104       {""},
2105       {"for",           0,      st_C_ignore},
2106       {""}, {""}, {""},
2107       {"volatile",      0,      st_C_typespec},
2108       {""}, {""},
2109       {"import",                C_JAVA, st_C_ignore},
2110       {"float",         0,      st_C_typespec},
2111       {"switch",                0,      st_C_ignore},
2112       {"return",                0,      st_C_ignore},
2113       {"implements",    C_JAVA, st_C_javastruct},
2114       {""},
2115       {"static",        0,      st_C_typespec},
2116       {"typedef",       0,      st_C_typedef},
2117       {"typename",      C_PLPL, st_C_typespec},
2118       {"unsigned",      0,      st_C_typespec},
2119       {""}, {""},
2120       {"char",          0,      st_C_typespec},
2121       {"class",         C_PLPL, st_C_struct},
2122       {""}, {""}, {""},
2123       {"void",          0,      st_C_typespec},
2124       {""}, {""},
2125       {"friend",                C_PLPL, st_C_ignore},
2126       {""}, {""}, {""},
2127       {"int",           0,      st_C_typespec},
2128       {"union",         0,      st_C_struct},
2129       {""}, {""}, {""},
2130       {"auto",          0,      st_C_typespec},
2131       {"interface",     C_JAVA, st_C_struct},
2132       {""},
2133       {"SYSCALL",       0,      st_C_gnumacro}
2134     };
2135
2136   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2137     {
2138       register int key = hash (str, len);
2139
2140       if (key <= MAX_HASH_VALUE && key >= 0)
2141         {
2142           register const char *s = wordlist[key].name;
2143
2144           if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2145             return &wordlist[key];
2146         }
2147     }
2148   return 0;
2149 }
2150 /*%>*/
2151
2152 static enum sym_type
2153 C_symtype (str, len, c_ext)
2154      char *str;
2155      int len;
2156      int c_ext;
2157 {
2158   register struct C_stab_entry *se = in_word_set (str, len);
2159
2160   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2161     return st_none;
2162   return se->type;
2163 }
2164 \f
2165  /*
2166   * C functions and variables are recognized using a simple
2167   * finite automaton.  fvdef is its state variable.
2168   */
2169 enum
2170 {
2171   fvnone,                       /* nothing seen */
2172   foperator,                    /* func: operator keyword seen (cplpl) */
2173   fvnameseen,                   /* function or variable name seen */
2174   fstartlist,                   /* func: just after open parenthesis */
2175   finlist,                      /* func: in parameter list */
2176   flistseen,                    /* func: after parameter list */
2177   fignore,                      /* func: before open brace */
2178   vignore                       /* var-like: ignore until ';' */
2179 } fvdef;
2180
2181 bool fvextern;                  /* func or var: extern keyword seen; */
2182
2183  /*
2184   * typedefs are recognized using a simple finite automaton.
2185   * typdef is its state variable.
2186   */
2187 enum
2188 {
2189   tnone,                        /* nothing seen */
2190   tkeyseen,                     /* typedef keyword seen */
2191   ttypeseen,                    /* defined type seen */
2192   tinbody,                      /* inside typedef body */
2193   tend,                         /* just before typedef tag */
2194   tignore                       /* junk after typedef tag */
2195 } typdef;
2196
2197
2198  /*
2199   * struct-like structures (enum, struct and union) are recognized
2200   * using another simple finite automaton.  `structdef' is its state
2201   * variable.
2202   */
2203 enum
2204 {
2205   snone,                        /* nothing seen yet */
2206   skeyseen,                     /* struct-like keyword seen */
2207   stagseen,                     /* struct-like tag seen */
2208   scolonseen,                   /* colon seen after struct-like tag */
2209   sinbody                       /* in struct body: recognize member func defs*/
2210 } structdef;
2211
2212 /*
2213  * When structdef is stagseen, scolonseen, or sinbody, structtag is the
2214  * struct tag, and structtype is the type of the preceding struct-like
2215  * keyword.
2216  */
2217 char *structtag = "<uninited>";
2218 enum sym_type structtype;
2219
2220 /*
2221  * When objdef is different from onone, objtag is the name of the class.
2222  */
2223 char *objtag = "<uninited>";
2224
2225 /*
2226  * Yet another little state machine to deal with preprocessor lines.
2227  */
2228 enum
2229 {
2230   dnone,                        /* nothing seen */
2231   dsharpseen,                   /* '#' seen as first char on line */
2232   ddefineseen,                  /* '#' and 'define' seen */
2233   dignorerest                   /* ignore rest of line */
2234 } definedef;
2235
2236 /*
2237  * State machine for Objective C protocols and implementations.
2238  * Tom R.Hageman <tom@basil.icce.rug.nl>
2239  */
2240 enum
2241 {
2242   onone,                        /* nothing seen */
2243   oprotocol,                    /* @interface or @protocol seen */
2244   oimplementation,              /* @implementations seen */
2245   otagseen,                     /* class name seen */
2246   oparenseen,                   /* parenthesis before category seen */
2247   ocatseen,                     /* category name seen */
2248   oinbody,                      /* in @implementation body */
2249   omethodsign,                  /* in @implementation body, after +/- */
2250   omethodtag,                   /* after method name */
2251   omethodcolon,                 /* after method colon */
2252   omethodparm,                  /* after method parameter */
2253   oignore                       /* wait for @end */
2254 } objdef;
2255
2256
2257 /*
2258  * Use this structure to keep info about the token read, and how it
2259  * should be tagged.  Used by the make_C_tag function to build a tag.
2260  */
2261 typedef struct
2262 {
2263   bool valid;
2264   char *str;
2265   bool named;
2266   int linelen;
2267   int lineno;
2268   long linepos;
2269   char *buffer;
2270 } token;
2271
2272 token tok;                      /* latest token read */
2273
2274 /*
2275  * Set this to TRUE, and the next token considered is called a function.
2276  * Used only for GNU emacs's function-defining macros.
2277  */
2278 bool next_token_is_func;
2279
2280 /*
2281  * TRUE in the rules part of a yacc file, FALSE outside (parse as C).
2282  */
2283 bool yacc_rules;
2284
2285 /*
2286  * methodlen is the length of the method name stored in token_name.
2287  */
2288 int methodlen;
2289
2290 static bool consider_token P_((char *, int, int, int, int, int, bool *));
2291 static void make_C_tag P_((bool));
2292
2293 /*
2294  * consider_token ()
2295  *      checks to see if the current token is at the start of a
2296  *      function or variable, or corresponds to a typedef, or
2297  *      is a struct/union/enum tag, or #define, or an enum constant.
2298  *
2299  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2300  *      with args.  C_EXT is which language we are looking at.
2301  *
2302  * Globals
2303  *      fvdef                   IN OUT
2304  *      structdef               IN OUT
2305  *      definedef               IN OUT
2306  *      typdef                  IN OUT
2307  *      objdef                  IN OUT
2308  *      next_token_is_func      IN OUT
2309  */
2310
2311 static bool
2312 consider_token (str, len, c, c_ext, cblev, parlev, is_func_or_var)
2313      register char *str;        /* IN: token pointer */
2314      register int len;          /* IN: token length */
2315      register int c;            /* IN: first char after the token */
2316      int c_ext;                 /* IN: C extensions mask */
2317      int cblev;                 /* IN: curly brace level */
2318      int parlev;                /* IN: parenthesis level */
2319      bool *is_func_or_var;      /* OUT: function or variable found */
2320 {
2321   enum sym_type toktype = C_symtype (str, len, c_ext);
2322
2323   /*
2324    * Advance the definedef state machine.
2325    */
2326   switch (definedef)
2327     {
2328     case dnone:
2329       /* We're not on a preprocessor line. */
2330       break;
2331     case dsharpseen:
2332       if (toktype == st_C_define)
2333         {
2334           definedef = ddefineseen;
2335         }
2336       else
2337         {
2338           definedef = dignorerest;
2339         }
2340       return FALSE;
2341     case ddefineseen:
2342       /*
2343        * Make a tag for any macro, unless it is a constant
2344        * and constantypedefs is FALSE.
2345        */
2346       definedef = dignorerest;
2347       *is_func_or_var = (c == '(');
2348       if (!*is_func_or_var && !constantypedefs)
2349         return FALSE;
2350       else
2351         return TRUE;
2352     case dignorerest:
2353       return FALSE;
2354     default:
2355       error ("internal error: definedef value.", (char *)NULL);
2356     }
2357
2358   /*
2359    * Now typedefs
2360    */
2361   switch (typdef)
2362     {
2363     case tnone:
2364       if (toktype == st_C_typedef)
2365         {
2366           if (typedefs)
2367             typdef = tkeyseen;
2368           fvextern = FALSE;
2369           fvdef = fvnone;
2370           return FALSE;
2371         }
2372       break;
2373     case tkeyseen:
2374       switch (toktype)
2375         {
2376         case st_none:
2377         case st_C_typespec:
2378         case st_C_struct:
2379         case st_C_enum:
2380           typdef = ttypeseen;
2381           break;
2382         }
2383       /* Do not return here, so the structdef stuff has a chance. */
2384       break;
2385     case tend:
2386       switch (toktype)
2387         {
2388         case st_C_typespec:
2389         case st_C_struct:
2390         case st_C_enum:
2391           return FALSE;
2392         }
2393       return TRUE;
2394     }
2395
2396   /*
2397    * This structdef business is currently only invoked when cblev==0.
2398    * It should be recursively invoked whatever the curly brace level,
2399    * and a stack of states kept, to allow for definitions of structs
2400    * within structs.
2401    *
2402    * This structdef business is NOT invoked when we are ctags and the
2403    * file is plain C.  This is because a struct tag may have the same
2404    * name as another tag, and this loses with ctags.
2405    */
2406   switch (toktype)
2407     {
2408     case st_C_javastruct:
2409       if (structdef == stagseen)
2410         structdef = scolonseen;
2411       return FALSE;
2412     case st_C_struct:
2413     case st_C_enum:
2414       if (typdef == tkeyseen
2415           || (typedefs_and_cplusplus && cblev == 0 && structdef == snone))
2416         {
2417           structdef = skeyseen;
2418           structtype = toktype;
2419         }
2420       return FALSE;
2421     }
2422
2423   if (structdef == skeyseen)
2424     {
2425       /* Save the tag for struct/union/class, for functions and variables
2426          that may be defined inside. */
2427       if (structtype == st_C_struct)
2428         structtag = savenstr (str, len);
2429       else
2430         structtag = "<enum>";
2431       structdef = stagseen;
2432       return TRUE;
2433     }
2434
2435   if (typdef != tnone)
2436     definedef = dnone;
2437
2438   /* Detect GNU macros.
2439
2440      Writers of emacs code are recommended to put the
2441      first two args of a DEFUN on the same line.
2442
2443       The DEFUN macro, used in emacs C source code, has a first arg
2444      that is a string (the lisp function name), and a second arg that
2445      is a C function name.  Since etags skips strings, the second arg
2446      is tagged.  This is unfortunate, as it would be better to tag the
2447      first arg.  The simplest way to deal with this problem would be
2448      to name the tag with a name built from the function name, by
2449      removing the initial 'F' character and substituting '-' for '_'.
2450      Anyway, this assumes that the conventions of naming lisp
2451      functions will never change.  Currently, this method is not
2452      implemented. */
2453   if (definedef == dnone && toktype == st_C_gnumacro)
2454     {
2455       next_token_is_func = TRUE;
2456       return FALSE;
2457     }
2458   if (next_token_is_func)
2459     {
2460       next_token_is_func = FALSE;
2461       fvdef = fignore;
2462       *is_func_or_var = TRUE;
2463       return TRUE;
2464     }
2465
2466   /* Detect Objective C constructs. */
2467   switch (objdef)
2468     {
2469     case onone:
2470       switch (toktype)
2471         {
2472         case st_C_objprot:
2473           objdef = oprotocol;
2474           return FALSE;
2475         case st_C_objimpl:
2476           objdef = oimplementation;
2477           return FALSE;
2478         }
2479       break;
2480     case oimplementation:
2481       /* Save the class tag for functions or variables defined inside. */
2482       objtag = savenstr (str, len);
2483       objdef = oinbody;
2484       return FALSE;
2485     case oprotocol:
2486       /* Save the class tag for categories. */
2487       objtag = savenstr (str, len);
2488       objdef = otagseen;
2489       *is_func_or_var = TRUE;
2490       return TRUE;
2491     case oparenseen:
2492       objdef = ocatseen;
2493       *is_func_or_var = TRUE;
2494       return TRUE;
2495     case oinbody:
2496       break;
2497     case omethodsign:
2498       if (parlev == 0)
2499         {
2500           objdef = omethodtag;
2501           methodlen = len;
2502           grow_linebuffer (&token_name, methodlen + 1);
2503           strncpy (token_name.buffer, str, len);
2504           token_name.buffer[methodlen] = '\0';
2505           token_name.len = methodlen;
2506           return TRUE;
2507         }
2508       return FALSE;
2509     case omethodcolon:
2510       if (parlev == 0)
2511         objdef = omethodparm;
2512       return FALSE;
2513     case omethodparm:
2514       if (parlev == 0)
2515         {
2516           objdef = omethodtag;
2517           methodlen += len;
2518           grow_linebuffer (&token_name, methodlen + 1);
2519           strncat (token_name.buffer, str, len);
2520           token_name.len = methodlen;
2521           return TRUE;
2522         }
2523       return FALSE;
2524     case oignore:
2525       if (toktype == st_C_objend)
2526         {
2527           /* Memory leakage here: the string pointed by objtag is
2528              never released, because many tests would be needed to
2529              avoid breaking on incorrect input code.  The amount of
2530              memory leaked here is the sum of the lengths of the
2531              class tags.
2532           free (objtag); */
2533           objdef = onone;
2534         }
2535       return FALSE;
2536     }
2537
2538   /* A function, variable or enum constant? */
2539   switch (toktype)
2540     {
2541     case st_C_extern:
2542       fvextern = TRUE;
2543       /* FALLTHRU */
2544     case st_C_typespec:
2545       if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2546         fvdef = fvnone;         /* should be useless */
2547       return FALSE;
2548     case st_C_ignore:
2549       fvextern = FALSE;
2550       fvdef = vignore;
2551       return FALSE;
2552     case st_C_operator:
2553       fvdef = foperator;
2554       *is_func_or_var = TRUE;
2555       return TRUE;
2556     case st_none:
2557       if ((c_ext & C_PLPL) && strneq (str+len-10, "::operator", 10))
2558         {
2559           fvdef = foperator;
2560           *is_func_or_var = TRUE;
2561           return TRUE;
2562         }
2563       if (constantypedefs && structdef == sinbody && structtype == st_C_enum)
2564         return TRUE;
2565       if (fvdef == fvnone)
2566         {
2567           fvdef = fvnameseen;   /* function or variable */
2568           *is_func_or_var = TRUE;
2569           return TRUE;
2570         }
2571       break;
2572     }
2573
2574   return FALSE;
2575 }
2576
2577 /*
2578  * C_entries ()
2579  *      This routine finds functions, variables, typedefs,
2580  *      #define's, enum constants and struct/union/enum definitions in
2581  *      C syntax and adds them to the list.
2582  */
2583 #define current_lb_is_new (newndx == curndx)
2584 #define switch_line_buffers() (curndx = 1 - curndx)
2585
2586 #define curlb (lbs[curndx].lb)
2587 #define othlb (lbs[1-curndx].lb)
2588 #define newlb (lbs[newndx].lb)
2589 #define curlinepos (lbs[curndx].linepos)
2590 #define othlinepos (lbs[1-curndx].linepos)
2591 #define newlinepos (lbs[newndx].linepos)
2592
2593 #define CNL_SAVE_DEFINEDEF()                                            \
2594 do {                                                                    \
2595   curlinepos = charno;                                                  \
2596   lineno++;                                                             \
2597   linecharno = charno;                                                  \
2598   charno += readline (&curlb, inf);                                     \
2599   lp = curlb.buffer;                                                    \
2600   quotednl = FALSE;                                                     \
2601   newndx = curndx;                                                      \
2602 } while (0)
2603
2604 #define CNL()                                                           \
2605 do {                                                                    \
2606   CNL_SAVE_DEFINEDEF();                                                 \
2607   if (savetok.valid)                                                    \
2608     {                                                                   \
2609       tok = savetok;                                                    \
2610       savetok.valid = FALSE;                                            \
2611     }                                                                   \
2612   definedef = dnone;                                                    \
2613 } while (0)
2614
2615
2616 static void
2617 make_C_tag (isfun)
2618      bool isfun;
2619 {
2620   /* This function should never be called when tok.valid is FALSE, but
2621      we must protect against invalid input or internal errors. */
2622   if (tok.valid)
2623     {
2624       if (traditional_tag_style)
2625         {
2626           /* This was the original code.  Now we call new_pfnote instead,
2627              which uses the new method for naming tags (see new_pfnote). */
2628           char *name = NULL;
2629
2630           if (CTAGS || tok.named)
2631             name = savestr (token_name.buffer);
2632           pfnote (name, isfun,
2633                   tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2634         }
2635       else
2636         new_pfnote (token_name.buffer, token_name.len, isfun,
2637                     tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2638       tok.valid = FALSE;
2639     }
2640   else if (DEBUG)
2641     abort ();
2642 }
2643
2644
2645 static void
2646 C_entries (c_ext, inf)
2647      int c_ext;                 /* extension of C */
2648      FILE *inf;                 /* input file */
2649 {
2650   register char c;              /* latest char read; '\0' for end of line */
2651   register char *lp;            /* pointer one beyond the character `c' */
2652   int curndx, newndx;           /* indices for current and new lb */
2653   register int tokoff;          /* offset in line of start of current token */
2654   register int toklen;          /* length of current token */
2655   char *qualifier;              /* string used to qualify names */
2656   int qlen;                     /* length of qualifier */
2657   int cblev;                    /* current curly brace level */
2658   int parlev;                   /* current parenthesis level */
2659   bool incomm, inquote, inchar, quotednl, midtoken;
2660   bool purec, cplpl, cjava;
2661   token savetok;                /* token saved during preprocessor handling */
2662
2663
2664   tokoff = toklen = 0;          /* keep compiler quiet */
2665   curndx = newndx = 0;
2666   lineno = 0;
2667   charno = 0;
2668   lp = curlb.buffer;
2669   *lp = 0;
2670
2671   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2672   structdef = snone; definedef = dnone; objdef = onone;
2673   next_token_is_func = yacc_rules = FALSE;
2674   midtoken = inquote = inchar = incomm = quotednl = FALSE;
2675   tok.valid = savetok.valid = FALSE;
2676   cblev = 0;
2677   parlev = 0;
2678   purec = !(c_ext & ~YACC);     /* no extensions (apart from possibly yacc) */
2679   cplpl = (c_ext & C_PLPL) == C_PLPL;
2680   cjava = (c_ext & C_JAVA) == C_JAVA;
2681   if (cjava)
2682     { qualifier = "."; qlen = 1; }
2683   else
2684     { qualifier = "::"; qlen = 2; }
2685
2686   while (!feof (inf))
2687     {
2688       c = *lp++;
2689       if (c == '\\')
2690         {
2691           /* If we're at the end of the line, the next character is a
2692              '\0'; don't skip it, because it's the thing that tells us
2693              to read the next line.  */
2694           if (*lp == '\0')
2695             {
2696               quotednl = TRUE;
2697               continue;
2698             }
2699           lp++;
2700           c = ' ';
2701         }
2702       else if (incomm)
2703         {
2704           switch (c)
2705             {
2706             case '*':
2707               if (*lp == '/')
2708                 {
2709                   c = *lp++;
2710                   incomm = FALSE;
2711                 }
2712               break;
2713             case '\0':
2714               /* Newlines inside comments do not end macro definitions in
2715                  traditional cpp. */
2716               CNL_SAVE_DEFINEDEF ();
2717               break;
2718             }
2719           continue;
2720         }
2721       else if (inquote)
2722         {
2723           switch (c)
2724             {
2725             case '"':
2726               inquote = FALSE;
2727               break;
2728             case '\0':
2729               /* Newlines inside strings do not end macro definitions
2730                  in traditional cpp, even though compilers don't
2731                  usually accept them. */
2732               CNL_SAVE_DEFINEDEF ();
2733               break;
2734             }
2735           continue;
2736         }
2737       else if (inchar)
2738         {
2739           switch (c)
2740             {
2741             case '\0':
2742               /* Hmmm, something went wrong. */
2743               CNL ();
2744               /* FALLTHRU */
2745             case '\'':
2746               inchar = FALSE;
2747               break;
2748             }
2749           continue;
2750         }
2751       else
2752         switch (c)
2753           {
2754           case '"':
2755             inquote = TRUE;
2756             if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2757               {
2758                 fvextern = FALSE;
2759                 fvdef = fvnone;
2760               }
2761             continue;
2762           case '\'':
2763             inchar = TRUE;
2764             if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2765               {
2766                 fvextern = FALSE;
2767                 fvdef = fvnone;
2768               }
2769             continue;
2770           case '/':
2771             if (*lp == '*')
2772               {
2773                 lp++;
2774                 incomm = TRUE;
2775                 continue;
2776               }
2777             else if (/* cplpl && */ *lp == '/')
2778               {
2779                 c = '\0';
2780                 break;
2781               }
2782             else
2783               break;
2784           case '%':
2785             if ((c_ext & YACC) && *lp == '%')
2786               {
2787                 /* entering or exiting rules section in yacc file */
2788                 lp++;
2789                 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2790                 typdef = tnone; structdef = snone;
2791                 next_token_is_func = FALSE;
2792                 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2793                 cblev = 0;
2794                 yacc_rules = !yacc_rules;
2795                 continue;
2796               }
2797             else
2798               break;
2799           case '#':
2800             if (definedef == dnone)
2801               {
2802                 char *cp;
2803                 bool cpptoken = TRUE;
2804
2805                 /* Look back on this line.  If all blanks, or nonblanks
2806                    followed by an end of comment, this is a preprocessor
2807                    token. */
2808                 for (cp = newlb.buffer; cp < lp-1; cp++)
2809                   if (!iswhite (*cp))
2810                     {
2811                       if (*cp == '*' && *(cp+1) == '/')
2812                         {
2813                           cp++;
2814                           cpptoken = TRUE;
2815                         }
2816                       else
2817                         cpptoken = FALSE;
2818                     }
2819                 if (cpptoken)
2820                   definedef = dsharpseen;
2821               } /* if (definedef == dnone) */
2822
2823             continue;
2824           } /* switch (c) */
2825
2826
2827       /* Consider token only if some complicated conditions are satisfied. */
2828       if ((definedef != dnone
2829            || (cblev == 0 && structdef != scolonseen)
2830            || (cblev == 1 && cplpl && structdef == sinbody)
2831            || (structdef == sinbody && purec))
2832           && typdef != tignore
2833           && definedef != dignorerest
2834           && fvdef != finlist)
2835         {
2836           if (midtoken)
2837             {
2838               if (endtoken (c))
2839                 {
2840                   bool funorvar = FALSE;
2841
2842                   if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2843                     {
2844                       /*
2845                        * This handles :: in the middle, but not at the
2846                        * beginning of an identifier.  Also, space-separated
2847                        * :: is not recognised.
2848                        */
2849                       lp += 2;
2850                       toklen += 2;
2851                       c = lp[-1];
2852                       goto intok;
2853                     }
2854                   else
2855                     {
2856                       if (yacc_rules
2857                           || consider_token (newlb.buffer + tokoff, toklen, c,
2858                                              c_ext, cblev, parlev, &funorvar))
2859                         {
2860                           if (fvdef == foperator)
2861                             {
2862                               char *oldlp = lp;
2863                               lp = skip_spaces (lp-1);
2864                               if (*lp != '\0')
2865                                 lp += 1;
2866                               while (*lp != '\0'
2867                                      && !iswhite (*lp) && *lp != '(')
2868                                 lp += 1;
2869                               c = *lp++;
2870                               toklen += lp - oldlp;
2871                             }
2872                           tok.named = FALSE;
2873                           if (!purec
2874                               && funorvar
2875                               && definedef == dnone
2876                               && structdef == sinbody)
2877                             /* function or var defined in C++ class body */
2878                             {
2879                               int len = strlen (structtag) + qlen + toklen;
2880                               grow_linebuffer (&token_name, len + 1);
2881                               strcpy (token_name.buffer, structtag);
2882                               strcat (token_name.buffer, qualifier);
2883                               strncat (token_name.buffer,
2884                                        newlb.buffer + tokoff, toklen);
2885                               token_name.len = len;
2886                               tok.named = TRUE;
2887                             }
2888                           else if (objdef == ocatseen)
2889                             /* Objective C category */
2890                             {
2891                               int len = strlen (objtag) + 2 + toklen;
2892                               grow_linebuffer (&token_name, len + 1);
2893                               strcpy (token_name.buffer, objtag);
2894                               strcat (token_name.buffer, "(");
2895                               strncat (token_name.buffer,
2896                                        newlb.buffer + tokoff, toklen);
2897                               strcat (token_name.buffer, ")");
2898                               token_name.len = len;
2899                               tok.named = TRUE;
2900                             }
2901                           else if (objdef == omethodtag
2902                                    || objdef == omethodparm)
2903                             /* Objective C method */
2904                             {
2905                               tok.named = TRUE;
2906                             }
2907                           else
2908                             {
2909                               grow_linebuffer (&token_name, toklen + 1);
2910                               strncpy (token_name.buffer,
2911                                        newlb.buffer + tokoff, toklen);
2912                               token_name.buffer[toklen] = '\0';
2913                               token_name.len = toklen;
2914                               /* Name macros and members. */
2915                               tok.named = (structdef == stagseen
2916                                            || typdef == ttypeseen
2917                                            || typdef == tend
2918                                            || (funorvar
2919                                                && definedef == dignorerest)
2920                                            || (funorvar
2921                                                && definedef == dnone
2922                                                && structdef == sinbody));
2923                             }
2924                           tok.lineno = lineno;
2925                           tok.linelen = tokoff + toklen + 1;
2926                           tok.buffer = newlb.buffer;
2927                           tok.linepos = newlinepos;
2928                           tok.valid = TRUE;
2929
2930                           if (definedef == dnone
2931                               && (fvdef == fvnameseen
2932                                   || fvdef == foperator
2933                                   || structdef == stagseen
2934                                   || typdef == tend
2935                                   || objdef != onone))
2936                             {
2937                               if (current_lb_is_new)
2938                                 switch_line_buffers ();
2939                             }
2940                           else
2941                             make_C_tag (funorvar);
2942                         }
2943                       midtoken = FALSE;
2944                     }
2945                 } /* if (endtoken (c)) */
2946               else if (intoken (c))
2947                 intok:
2948                 {
2949                   toklen++;
2950                   continue;
2951                 }
2952             } /* if (midtoken) */
2953           else if (begtoken (c))
2954             {
2955               switch (definedef)
2956                 {
2957                 case dnone:
2958                   switch (fvdef)
2959                     {
2960                     case fstartlist:
2961                       fvdef = finlist;
2962                       continue;
2963                     case flistseen:
2964                       make_C_tag (TRUE); /* a function */
2965                       fvdef = fignore;
2966                       break;
2967                     case fvnameseen:
2968                       fvdef = fvnone;
2969                       break;
2970                     }
2971                   if (structdef == stagseen && !cjava)
2972                     structdef = snone;
2973                   break;
2974                 case dsharpseen:
2975                   savetok = tok;
2976                 }
2977               if (!yacc_rules || lp == newlb.buffer + 1)
2978                 {
2979                   tokoff = lp - 1 - newlb.buffer;
2980                   toklen = 1;
2981                   midtoken = TRUE;
2982                 }
2983               continue;
2984             } /* if (begtoken) */
2985         } /* if must look at token */
2986
2987
2988       /* Detect end of line, colon, comma, semicolon and various braces
2989          after having handled a token.*/
2990       switch (c)
2991         {
2992         case ':':
2993           if (definedef != dnone)
2994             break;
2995           switch (objdef)
2996             {
2997             case  otagseen:
2998               objdef = oignore;
2999               make_C_tag (TRUE); /* an Objective C class */
3000               break;
3001             case omethodtag:
3002             case omethodparm:
3003               objdef = omethodcolon;
3004               methodlen += 1;
3005               grow_linebuffer (&token_name, methodlen + 1);
3006               strcat (token_name.buffer, ":");
3007               token_name.len = methodlen;
3008               break;
3009             }
3010           if (structdef == stagseen)
3011             structdef = scolonseen;
3012           else
3013             switch (fvdef)
3014               {
3015               case fvnameseen:
3016                 if (yacc_rules)
3017                   {
3018                     make_C_tag (FALSE); /* a yacc function */
3019                     fvdef = fignore;
3020                   }
3021                 break;
3022               case fstartlist:
3023                 fvextern = FALSE;
3024                 fvdef = fvnone;
3025                 break;
3026               }
3027           break;
3028         case ';':
3029           if (definedef != dnone)
3030             break;
3031           if (cblev == 0)
3032             switch (typdef)
3033               {
3034               case tend:
3035                 make_C_tag (FALSE); /* a typedef */
3036                 /* FALLTHRU */
3037               default:
3038                 typdef = tnone;
3039               }
3040           switch (fvdef)
3041             {
3042             case fignore:
3043               break;
3044             case fvnameseen:
3045               if ((members && cblev == 1)
3046                   || (globals && cblev == 0 && (!fvextern || declarations)))
3047                 make_C_tag (FALSE); /* a variable */
3048               fvextern = FALSE;
3049               fvdef = fvnone;
3050               tok.valid = FALSE;
3051               break;
3052             case flistseen:
3053               if (declarations && (cblev == 0 || cblev == 1))
3054                 make_C_tag (TRUE); /* a function declaration */
3055               /* FALLTHRU */
3056             default:
3057               fvextern = FALSE;
3058               fvdef = fvnone;
3059               /* The following instruction invalidates the token.
3060                  Probably the token should be invalidated in all
3061                  other cases  where some state machine is reset. */
3062               tok.valid = FALSE;
3063             }
3064           if (structdef == stagseen)
3065             structdef = snone;
3066           break;
3067         case ',':
3068           if (definedef != dnone)
3069             break;
3070           switch (objdef)
3071             {
3072             case omethodtag:
3073             case omethodparm:
3074               make_C_tag (TRUE); /* an Objective C method */
3075               objdef = oinbody;
3076               break;
3077             }
3078           switch (fvdef)
3079             {
3080             case foperator:
3081             case finlist:
3082             case fignore:
3083             case vignore:
3084               break;
3085             case fvnameseen:
3086               if ((members && cblev == 1)
3087                   || (globals && cblev == 0 && (!fvextern || declarations)))
3088                 make_C_tag (FALSE); /* a variable */
3089               break;
3090             default:
3091               fvdef = fvnone;
3092             }
3093           if (structdef == stagseen)
3094             structdef = snone;
3095           break;
3096         case '[':
3097           if (definedef != dnone)
3098             break;
3099           if (cblev == 0 && typdef == tend)
3100             {
3101               typdef = tignore;
3102               make_C_tag (FALSE);       /* a typedef */
3103               break;
3104             }
3105           switch (fvdef)
3106             {
3107             case foperator:
3108             case finlist:
3109             case fignore:
3110             case vignore:
3111               break;
3112             case fvnameseen:
3113               if ((members && cblev == 1)
3114                   || (globals && cblev == 0 && (!fvextern || declarations)))
3115                 make_C_tag (FALSE); /* a variable */
3116               /* FALLTHRU */
3117             default:
3118               fvdef = fvnone;
3119             }
3120           if (structdef == stagseen)
3121             structdef = snone;
3122           break;
3123         case '(':
3124           if (definedef != dnone)
3125             break;
3126           if (objdef == otagseen && parlev == 0)
3127             objdef = oparenseen;
3128           switch (fvdef)
3129             {
3130             case fvnameseen:
3131               if (typdef == ttypeseen
3132                   && tok.valid
3133                   && *lp != '*'
3134                   && structdef != sinbody)
3135                 {
3136                   /* This handles constructs like:
3137                      typedef void OperatorFun (int fun); */
3138                   make_C_tag (FALSE);
3139                   typdef = tignore;
3140                 }
3141               /* FALLTHRU */
3142             case foperator:
3143               fvdef = fstartlist;
3144               break;
3145             case flistseen:
3146               fvdef = finlist;
3147               break;
3148             }
3149           parlev++;
3150           break;
3151         case ')':
3152           if (definedef != dnone)
3153             break;
3154           if (objdef == ocatseen && parlev == 1)
3155             {
3156               make_C_tag (TRUE); /* an Objective C category */
3157               objdef = oignore;
3158             }
3159           if (--parlev == 0)
3160             {
3161               switch (fvdef)
3162                 {
3163                 case fstartlist:
3164                 case finlist:
3165                   fvdef = flistseen;
3166                   break;
3167                 }
3168               if (cblev == 0 && (typdef == tend))
3169                 {
3170                   typdef = tignore;
3171                   make_C_tag (FALSE); /* a typedef */
3172                 }
3173             }
3174           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3175             parlev = 0;
3176           break;
3177         case '{':
3178           if (definedef != dnone)
3179             break;
3180           if (typdef == ttypeseen)
3181             typdef = tinbody;
3182           switch (structdef)
3183             {
3184             case skeyseen:      /* unnamed struct */
3185               structdef = sinbody;
3186               structtag = "_anonymous_";
3187               break;
3188             case stagseen:
3189             case scolonseen:    /* named struct */
3190               structdef = sinbody;
3191               make_C_tag (FALSE);       /* a struct */
3192               break;
3193             }
3194           switch (fvdef)
3195             {
3196             case flistseen:
3197               make_C_tag (TRUE); /* a function */
3198               /* FALLTHRU */
3199             case fignore:
3200               fvdef = fvnone;
3201               break;
3202             case fvnone:
3203               switch (objdef)
3204                 {
3205                 case otagseen:
3206                   make_C_tag (TRUE); /* an Objective C class */
3207                   objdef = oignore;
3208                   break;
3209                 case omethodtag:
3210                 case omethodparm:
3211                   make_C_tag (TRUE); /* an Objective C method */
3212                   objdef = oinbody;
3213                   break;
3214                 default:
3215                   /* Neutralize `extern "C" {' grot. */
3216                   if (cblev == 0 && structdef == snone && typdef == tnone)
3217                     cblev = -1;
3218                 }
3219             }
3220           cblev++;
3221           break;
3222         case '*':
3223           if (definedef != dnone)
3224             break;
3225           if (fvdef == fstartlist)
3226             fvdef = fvnone;     /* avoid tagging `foo' in `foo (*bar()) ()' */
3227           break;
3228         case '}':
3229           if (definedef != dnone)
3230             break;
3231           if (!noindentypedefs && lp == newlb.buffer + 1)
3232             {
3233               cblev = 0;        /* reset curly brace level if first column */
3234               parlev = 0;       /* also reset paren level, just in case... */
3235             }
3236           else if (cblev > 0)
3237             cblev--;
3238           if (cblev == 0)
3239             {
3240               if (typdef == tinbody)
3241                 typdef = tend;
3242               /* Memory leakage here: the string pointed by structtag is
3243                  never released, because I fear to miss something and
3244                  break things while freeing the area.  The amount of
3245                  memory leaked here is the sum of the lengths of the
3246                  struct tags.
3247               if (structdef == sinbody)
3248                 free (structtag); */
3249
3250               structdef = snone;
3251               structtag = "<error>";
3252             }
3253           break;
3254         case '=':
3255           if (definedef != dnone)
3256             break;
3257           switch (fvdef)
3258             {
3259             case foperator:
3260             case finlist:
3261             case fignore:
3262             case vignore:
3263               break;
3264             case fvnameseen:
3265               if ((members && cblev == 1)
3266                   || (globals && cblev == 0 && (!fvextern || declarations)))
3267                 make_C_tag (FALSE); /* a variable */
3268               /* FALLTHRU */
3269             default:
3270               fvdef = vignore;
3271             }
3272           break;
3273         case '+':
3274         case '-':
3275           if (objdef == oinbody && cblev == 0)
3276             {
3277               objdef = omethodsign;
3278               break;
3279             }
3280           /* FALLTHRU */
3281         case '#': case '~': case '&': case '%': case '/': case '|':
3282         case '^': case '!': case '<': case '>': case '.': case '?': case ']':
3283           if (definedef != dnone)
3284             break;
3285           /* These surely cannot follow a function tag in C. */
3286           switch (fvdef)
3287             {
3288             case foperator:
3289             case finlist:
3290             case fignore:
3291             case vignore:
3292               break;
3293             default:
3294               fvdef = fvnone;
3295             }
3296           break;
3297         case '\0':
3298           if (objdef == otagseen)
3299             {
3300               make_C_tag (TRUE); /* an Objective C class */
3301               objdef = oignore;
3302             }
3303           /* If a macro spans multiple lines don't reset its state. */
3304           if (quotednl)
3305             CNL_SAVE_DEFINEDEF ();
3306           else
3307             CNL ();
3308           break;
3309         } /* switch (c) */
3310
3311     } /* while not eof */
3312 }
3313
3314 /*
3315  * Process either a C++ file or a C file depending on the setting
3316  * of a global flag.
3317  */
3318 static void
3319 default_C_entries (inf)
3320      FILE *inf;
3321 {
3322   C_entries (cplusplus ? C_PLPL : 0, inf);
3323 }
3324
3325 /* Always do plain ANSI C. */
3326 static void
3327 plain_C_entries (inf)
3328      FILE *inf;
3329 {
3330   C_entries (0, inf);
3331 }
3332
3333 /* Always do C++. */
3334 static void
3335 Cplusplus_entries (inf)
3336      FILE *inf;
3337 {
3338   C_entries (C_PLPL, inf);
3339 }
3340
3341 /* Always do Java. */
3342 static void
3343 Cjava_entries (inf)
3344      FILE *inf;
3345 {
3346   C_entries (C_JAVA, inf);
3347 }
3348
3349 /* Always do C*. */
3350 static void
3351 Cstar_entries (inf)
3352      FILE *inf;
3353 {
3354   C_entries (C_STAR, inf);
3355 }
3356
3357 /* Always do Yacc. */
3358 static void
3359 Yacc_entries (inf)
3360      FILE *inf;
3361 {
3362   C_entries (YACC, inf);
3363 }
3364 \f
3365 /* A useful macro. */
3366 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3367   for (lineno = charno = 0;     /* loop initialization */               \
3368        !feof (file_pointer)     /* loop test */                         \
3369        && (lineno++,            /* instructions at start of loop */     \
3370            linecharno = charno,                                         \
3371            charno += readline (&line_buffer, file_pointer),             \
3372            char_pointer = lb.buffer,                                    \
3373            TRUE);                                                       \
3374       )
3375
3376
3377 /*
3378  * Read a file, but do no processing.  This is used to do regexp
3379  * matching on files that have no language defined.
3380  */
3381 static void
3382 just_read_file (inf)
3383      FILE *inf;
3384 {
3385   register char *dummy;
3386
3387   LOOP_ON_INPUT_LINES (inf, lb, dummy)
3388     continue;
3389 }
3390 \f
3391 /* Fortran parsing */
3392
3393 static bool tail P_((char *));
3394 static void takeprec P_((void));
3395 static void getit P_((FILE *));
3396
3397 static bool
3398 tail (cp)
3399      char *cp;
3400 {
3401   register int len = 0;
3402
3403   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3404     cp++, len++;
3405   if (*cp == '\0' && !intoken (dbp[len]))
3406     {
3407       dbp += len;
3408       return TRUE;
3409     }
3410   return FALSE;
3411 }
3412
3413 static void
3414 takeprec ()
3415 {
3416   dbp = skip_spaces (dbp);
3417   if (*dbp != '*')
3418     return;
3419   dbp++;
3420   dbp = skip_spaces (dbp);
3421   if (strneq (dbp, "(*)", 3))
3422     {
3423       dbp += 3;
3424       return;
3425     }
3426   if (!ISDIGIT (*dbp))
3427     {
3428       --dbp;                    /* force failure */
3429       return;
3430     }
3431   do
3432     dbp++;
3433   while (ISDIGIT (*dbp));
3434 }
3435
3436 static void
3437 getit (inf)
3438      FILE *inf;
3439 {
3440   register char *cp;
3441
3442   dbp = skip_spaces (dbp);
3443   if (*dbp == '\0')
3444     {
3445       lineno++;
3446       linecharno = charno;
3447       charno += readline (&lb, inf);
3448       dbp = lb.buffer;
3449       if (dbp[5] != '&')
3450         return;
3451       dbp += 6;
3452       dbp = skip_spaces (dbp);
3453     }
3454   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3455     return;
3456   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3457     continue;
3458   pfnote (savenstr (dbp, cp-dbp), TRUE,
3459           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3460 }
3461
3462
3463 static void
3464 Fortran_functions (inf)
3465      FILE *inf;
3466 {
3467   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3468     {
3469       if (*dbp == '%')
3470         dbp++;                  /* Ratfor escape to fortran */
3471       dbp = skip_spaces (dbp);
3472       if (*dbp == '\0')
3473         continue;
3474       switch (lowcase (*dbp))
3475         {
3476         case 'i':
3477           if (tail ("integer"))
3478             takeprec ();
3479           break;
3480         case 'r':
3481           if (tail ("real"))
3482             takeprec ();
3483           break;
3484         case 'l':
3485           if (tail ("logical"))
3486             takeprec ();
3487           break;
3488         case 'c':
3489           if (tail ("complex") || tail ("character"))
3490             takeprec ();
3491           break;
3492         case 'd':
3493           if (tail ("double"))
3494             {
3495               dbp = skip_spaces (dbp);
3496               if (*dbp == '\0')
3497                 continue;
3498               if (tail ("precision"))
3499                 break;
3500               continue;
3501             }
3502           break;
3503         }
3504       dbp = skip_spaces (dbp);
3505       if (*dbp == '\0')
3506         continue;
3507       switch (lowcase (*dbp))
3508         {
3509         case 'f':
3510           if (tail ("function"))
3511             getit (inf);
3512           continue;
3513         case 's':
3514           if (tail ("subroutine"))
3515             getit (inf);
3516           continue;
3517         case 'e':
3518           if (tail ("entry"))
3519             getit (inf);
3520           continue;
3521         case 'b':
3522           if (tail ("blockdata") || tail ("block data"))
3523             {
3524               dbp = skip_spaces (dbp);
3525               if (*dbp == '\0') /* assume un-named */
3526                 pfnote (savestr ("blockdata"), TRUE,
3527                         lb.buffer, dbp - lb.buffer, lineno, linecharno);
3528               else
3529                 getit (inf);    /* look for name */
3530             }
3531           continue;
3532         }
3533     }
3534 }
3535 \f
3536 /*
3537  * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be>, 1998-04-24
3538  * Ada parsing
3539  */
3540
3541 static void adagetit P_((FILE *, char *));
3542
3543 /* Once we are positioned after an "interesting" keyword, let's get
3544    the real tag value necessary. */
3545 static void
3546 adagetit (inf, name_qualifier)
3547      FILE *inf;
3548      char *name_qualifier;
3549 {
3550   register char *cp;
3551   char *name;
3552   char c;
3553
3554   while (!feof (inf))
3555     {
3556       dbp = skip_spaces (dbp);
3557       if (*dbp == '\0'
3558           || (dbp[0] == '-' && dbp[1] == '-'))
3559         {
3560           lineno++;
3561           linecharno = charno;
3562           charno += readline (&lb, inf);
3563           dbp = lb.buffer;
3564         }
3565       switch (*dbp)
3566         {
3567         case 'b':
3568         case 'B':
3569           if (tail ("body"))
3570             {
3571               /* Skipping body of   procedure body   or   package body or ....
3572                  resetting qualifier to body instead of spec. */
3573               name_qualifier = "/b";
3574               continue;
3575             }
3576           break;
3577         case 't':
3578         case 'T':
3579           /* Skipping type of   task type   or   protected type ... */
3580           if (tail ("type"))
3581             continue;
3582           break;
3583         }
3584       if (*dbp == '"')
3585         {
3586           dbp += 1;
3587           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3588             continue;
3589         }
3590       else
3591         {
3592           dbp = skip_spaces (dbp);
3593           for (cp = dbp;
3594                (*cp != '\0'
3595                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
3596                cp++)
3597             continue;
3598           if (cp == dbp)
3599             return;
3600         }
3601       c = *cp;
3602       *cp = '\0';
3603       name = concat (dbp, name_qualifier, "");
3604       *cp = c;
3605       pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3606       if (c == '"')
3607         dbp = cp + 1;
3608       return;
3609     }
3610 }
3611
3612 static void
3613 Ada_funcs (inf)
3614      FILE *inf;
3615 {
3616   bool inquote = FALSE;
3617
3618   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3619     {
3620       while (*dbp != '\0')
3621         {
3622           /* Skip a string i.e. "abcd". */
3623           if (inquote || (*dbp == '"'))
3624             {
3625               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3626               if (dbp != NULL)
3627                 {
3628                   inquote = FALSE;
3629                   dbp += 1;
3630                   continue;     /* advance char */
3631                 }
3632               else
3633                 {
3634                   inquote = TRUE;
3635                   break;        /* advance line */
3636                 }
3637             }
3638
3639           /* Skip comments. */
3640           if (dbp[0] == '-' && dbp[1] == '-')
3641             break;              /* advance line */
3642
3643           /* Skip character enclosed in single quote i.e. 'a'
3644              and skip single quote starting an attribute i.e. 'Image. */
3645           if (*dbp == '\'')
3646             {
3647               dbp++ ;
3648               if (*dbp != '\0')
3649                 dbp++;
3650               continue;
3651             }
3652
3653           /* Search for beginning of a token.  */
3654           if (!begtoken (*dbp))
3655             {
3656               dbp++;
3657               continue;         /* advance char */
3658             }
3659
3660           /* We are at the beginning of a token. */
3661           switch (*dbp)
3662             {
3663             case 'f':
3664             case 'F':
3665               if (!packages_only && tail ("function"))
3666                 adagetit (inf, "/f");
3667               else
3668                 break;          /* from switch */
3669               continue;         /* advance char */
3670             case 'p':
3671             case 'P':
3672               if (!packages_only && tail ("procedure"))
3673                 adagetit (inf, "/p");
3674               else if (tail ("package"))
3675                 adagetit (inf, "/s");
3676               else if (tail ("protected")) /* protected type */
3677                 adagetit (inf, "/t");
3678               else
3679                 break;          /* from switch */
3680               continue;         /* advance char */
3681             case 't':
3682             case 'T':
3683               if (!packages_only && tail ("task"))
3684                 adagetit (inf, "/k");
3685               else if (typedefs && !packages_only && tail ("type"))
3686                 {
3687                   adagetit (inf, "/t");
3688                   while (*dbp != '\0')
3689                     dbp += 1;
3690                 }
3691               else
3692                 break;          /* from switch */
3693               continue;         /* advance char */
3694             }
3695
3696           /* Look for the end of the token. */
3697           while (!endtoken (*dbp))
3698             dbp++;
3699
3700         } /* advance char */
3701     } /* advance line */
3702 }
3703 \f
3704 /*
3705  * Bob Weiner, Motorola Inc., 4/3/94
3706  * Unix and microcontroller assembly tag handling
3707  * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3708  */
3709 static void
3710 Asm_labels (inf)
3711      FILE *inf;
3712 {
3713   register char *cp;
3714
3715   LOOP_ON_INPUT_LINES (inf, lb, cp)
3716     {
3717       /* If first char is alphabetic or one of [_.$], test for colon
3718          following identifier. */
3719       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3720         {
3721           /* Read past label. */
3722           cp++;
3723           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3724             cp++;
3725           if (*cp == ':' || iswhite (*cp))
3726             {
3727               /* Found end of label, so copy it and add it to the table. */
3728               pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3729                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3730             }
3731         }
3732     }
3733 }
3734 \f
3735 /*
3736  * Perl support by Bart Robinson <lomew@cs.utah.edu>
3737  *              enhanced by Michael Ernst <mernst@alum.mit.edu>
3738  * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3739  * Perl variable names: /^(my|local).../
3740  */
3741 static void
3742 Perl_functions (inf)
3743      FILE *inf;
3744 {
3745   register char *cp;
3746
3747   LOOP_ON_INPUT_LINES (inf, lb, cp)
3748     {
3749       if (*cp++ == 's'
3750           && *cp++ == 'u'
3751           && *cp++ == 'b' && iswhite (*cp++))
3752         {
3753           cp = skip_spaces (cp);
3754           if (*cp != '\0')
3755             {
3756               char *sp = cp;
3757               while (*cp != '\0'
3758                      && !iswhite (*cp) && *cp != '{' && *cp != '(')
3759                 cp++;
3760               pfnote (savenstr (sp, cp-sp), TRUE,
3761                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3762             }
3763         }
3764        else if (globals         /* only if tagging global vars is enabled */
3765                 && ((cp = lb.buffer,
3766                      *cp++ == 'm'
3767                      && *cp++ == 'y')
3768                     || (cp = lb.buffer,
3769                         *cp++ == 'l'
3770                         && *cp++ == 'o'
3771                         && *cp++ == 'c'
3772                         && *cp++ == 'a'
3773                         && *cp++ == 'l'))
3774                 && (*cp == '(' || iswhite (*cp)))
3775         {
3776           /* After "my" or "local", but before any following paren or space. */
3777           char *varname = NULL;
3778
3779           cp = skip_spaces (cp);
3780           if (*cp == '$' || *cp == '@' || *cp == '%')
3781             {
3782               char* varstart = ++cp;
3783               while (ISALNUM (*cp) || *cp == '_')
3784                 cp++;
3785               varname = savenstr (varstart, cp-varstart);
3786             }
3787           else
3788             {
3789               /* Should be examining a variable list at this point;
3790                  could insist on seeing an open parenthesis. */
3791               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
3792                 cp++;
3793             }
3794
3795           /* Perhaps I should back cp up one character, so the TAGS table
3796              doesn't mention (and so depend upon) the following char. */
3797           pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
3798                   FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3799         }
3800     }
3801 }
3802 \f
3803 /*
3804  * Python support by Eric S. Raymond <esr@thyrsus.com>
3805  * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
3806  */
3807 static void
3808 Python_functions (inf)
3809      FILE *inf;
3810 {
3811   register char *cp;
3812
3813   LOOP_ON_INPUT_LINES (inf, lb, cp)
3814     {
3815       if (*cp++ == 'd'
3816           && *cp++ == 'e'
3817           && *cp++ == 'f' && iswhite (*cp++))
3818         {
3819           cp = skip_spaces (cp);
3820           while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
3821             cp++;
3822           pfnote (NULL, TRUE,
3823                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3824         }
3825
3826       cp = lb.buffer;
3827       if (*cp++ == 'c'
3828           && *cp++ == 'l'
3829           && *cp++ == 'a'
3830           && *cp++ == 's'
3831           && *cp++ == 's' && iswhite (*cp++))
3832         {
3833           cp = skip_spaces (cp);
3834           while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
3835             cp++;
3836           pfnote (NULL, TRUE,
3837                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3838         }
3839     }
3840 }
3841 \f
3842 /* Idea by Corny de Souza
3843  * Cobol tag functions
3844  * We could look for anything that could be a paragraph name.
3845  * i.e. anything that starts in column 8 is one word and ends in a full stop.
3846  */
3847 static void
3848 Cobol_paragraphs (inf)
3849      FILE *inf;
3850 {
3851   register char *bp, *ep;
3852
3853   LOOP_ON_INPUT_LINES (inf, lb, bp)
3854     {
3855       if (lb.len < 9)
3856         continue;
3857       bp += 8;
3858
3859       /* If eoln, compiler option or comment ignore whole line. */
3860       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
3861         continue;
3862
3863       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
3864         continue;
3865       if (*ep++ == '.')
3866         pfnote (savenstr (bp, ep-bp), TRUE,
3867                 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
3868     }
3869 }
3870 \f
3871 /*
3872  * Makefile support
3873  */
3874 static void
3875 Makefile_targets (inf)
3876      FILE *inf;
3877 {
3878   register char *bp;
3879
3880   LOOP_ON_INPUT_LINES (inf, lb, bp)
3881     {
3882       if (*bp == '\t' || *bp == '#')
3883         continue;
3884       while (*bp != '\0' && *bp != '=' && *bp != ':')
3885         bp++;
3886       if (*bp == ':')
3887         pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
3888                 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
3889     }
3890 }
3891 \f
3892 /* Added by Mosur Mohan, 4/22/88 */
3893 /* Pascal parsing                */
3894
3895 /*
3896  *  Locates tags for procedures & functions.  Doesn't do any type- or
3897  *  var-definitions.  It does look for the keyword "extern" or
3898  *  "forward" immediately following the procedure statement; if found,
3899  *  the tag is skipped.
3900  */
3901 static void
3902 Pascal_functions (inf)
3903      FILE *inf;
3904 {
3905   linebuffer tline;             /* mostly copied from C_entries */
3906   long save_lcno;
3907   int save_lineno, save_len;
3908   char c, *cp, *namebuf;
3909
3910   bool                          /* each of these flags is TRUE iff: */
3911     incomment,                  /* point is inside a comment */
3912     inquote,                    /* point is inside '..' string */
3913     get_tagname,                /* point is after PROCEDURE/FUNCTION
3914                                    keyword, so next item = potential tag */
3915     found_tag,                  /* point is after a potential tag */
3916     inparms,                    /* point is within parameter-list */
3917     verify_tag;                 /* point has passed the parm-list, so the
3918                                    next token will determine whether this
3919                                    is a FORWARD/EXTERN to be ignored, or
3920                                    whether it is a real tag */
3921
3922   save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
3923   namebuf = NULL;               /* keep compiler quiet */
3924   lineno = 0;
3925   charno = 0;
3926   dbp = lb.buffer;
3927   *dbp = '\0';
3928   initbuffer (&tline);
3929
3930   incomment = inquote = FALSE;
3931   found_tag = FALSE;            /* have a proc name; check if extern */
3932   get_tagname = FALSE;          /* have found "procedure" keyword    */
3933   inparms = FALSE;              /* found '(' after "proc"            */
3934   verify_tag = FALSE;           /* check if "extern" is ahead        */
3935
3936
3937   while (!feof (inf))           /* long main loop to get next char */
3938     {
3939       c = *dbp++;
3940       if (c == '\0')            /* if end of line */
3941         {
3942           lineno++;
3943           linecharno = charno;
3944           charno += readline (&lb, inf);
3945           dbp = lb.buffer;
3946           if (*dbp == '\0')
3947             continue;
3948           if (!((found_tag && verify_tag)
3949                 || get_tagname))
3950             c = *dbp++;         /* only if don't need *dbp pointing
3951                                    to the beginning of the name of
3952                                    the procedure or function */
3953         }
3954       if (incomment)
3955         {
3956           if (c == '}')         /* within { } comments */
3957             incomment = FALSE;
3958           else if (c == '*' && *dbp == ')') /* within (* *) comments */
3959             {
3960               dbp++;
3961               incomment = FALSE;
3962             }
3963           continue;
3964         }
3965       else if (inquote)
3966         {
3967           if (c == '\'')
3968             inquote = FALSE;
3969           continue;
3970         }
3971       else
3972         switch (c)
3973           {
3974           case '\'':
3975             inquote = TRUE;     /* found first quote */
3976             continue;
3977           case '{':             /* found open { comment */
3978             incomment = TRUE;
3979             continue;
3980           case '(':
3981             if (*dbp == '*')    /* found open (* comment */
3982               {
3983                 incomment = TRUE;
3984                 dbp++;
3985               }
3986             else if (found_tag) /* found '(' after tag, i.e., parm-list */
3987               inparms = TRUE;
3988             continue;
3989           case ')':             /* end of parms list */
3990             if (inparms)
3991               inparms = FALSE;
3992             continue;
3993           case ';':
3994             if (found_tag && !inparms) /* end of proc or fn stmt */
3995               {
3996                 verify_tag = TRUE;
3997                 break;
3998               }
3999             continue;
4000           }
4001       if (found_tag && verify_tag && (*dbp != ' '))
4002         {
4003           /* check if this is an "extern" declaration */
4004           if (*dbp == '\0')
4005             continue;
4006           if (lowcase (*dbp == 'e'))
4007             {
4008               if (tail ("extern"))      /* superfluous, really! */
4009                 {
4010                   found_tag = FALSE;
4011                   verify_tag = FALSE;
4012                 }
4013             }
4014           else if (lowcase (*dbp) == 'f')
4015             {
4016               if (tail ("forward"))     /*  check for forward reference */
4017                 {
4018                   found_tag = FALSE;
4019                   verify_tag = FALSE;
4020                 }
4021             }
4022           if (found_tag && verify_tag) /* not external proc, so make tag */
4023             {
4024               found_tag = FALSE;
4025               verify_tag = FALSE;
4026               pfnote (namebuf, TRUE,
4027                       tline.buffer, save_len, save_lineno, save_lcno);
4028               continue;
4029             }
4030         }
4031       if (get_tagname)          /* grab name of proc or fn */
4032         {
4033           if (*dbp == '\0')
4034             continue;
4035
4036           /* save all values for later tagging */
4037           grow_linebuffer (&tline, lb.len + 1);
4038           strcpy (tline.buffer, lb.buffer);
4039           save_lineno = lineno;
4040           save_lcno = linecharno;
4041
4042           /* grab block name */
4043           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4044             continue;
4045           namebuf = savenstr (dbp, cp-dbp);
4046           dbp = cp;             /* set dbp to e-o-token */
4047           save_len = dbp - lb.buffer + 1;
4048           get_tagname = FALSE;
4049           found_tag = TRUE;
4050           continue;
4051
4052           /* and proceed to check for "extern" */
4053         }
4054       else if (!incomment && !inquote && !found_tag)
4055         {
4056           /* check for proc/fn keywords */
4057           switch (lowcase (c))
4058             {
4059             case 'p':
4060               if (tail ("rocedure"))    /* c = 'p', dbp has advanced */
4061                 get_tagname = TRUE;
4062               continue;
4063             case 'f':
4064               if (tail ("unction"))
4065                 get_tagname = TRUE;
4066               continue;
4067             }
4068         }
4069     }                           /* while not eof */
4070
4071   free (tline.buffer);
4072 }
4073 \f
4074 /*
4075  * lisp tag functions
4076  *  look for (def or (DEF, quote or QUOTE
4077  */
4078
4079 static int L_isdef P_((char *));
4080 static int L_isquote P_((char *));
4081 static void L_getit P_((void));
4082
4083 static int
4084 L_isdef (strp)
4085      register char *strp;
4086 {
4087   return ((strp[1] == 'd' || strp[1] == 'D')
4088           && (strp[2] == 'e' || strp[2] == 'E')
4089           && (strp[3] == 'f' || strp[3] == 'F'));
4090 }
4091
4092 static int
4093 L_isquote (strp)
4094      register char *strp;
4095 {
4096   return ((*++strp == 'q' || *strp == 'Q')
4097           && (*++strp == 'u' || *strp == 'U')
4098           && (*++strp == 'o' || *strp == 'O')
4099           && (*++strp == 't' || *strp == 'T')
4100           && (*++strp == 'e' || *strp == 'E')
4101           && iswhite (*++strp));
4102 }
4103
4104 static void
4105 L_getit ()
4106 {
4107   register char *cp;
4108
4109   if (*dbp == '\'')             /* Skip prefix quote */
4110     dbp++;
4111   else if (*dbp == '(')
4112   {
4113     if (L_isquote (dbp))
4114       dbp += 7;                 /* Skip "(quote " */
4115     else
4116       dbp += 1;                 /* Skip "(" before name in (defstruct (foo)) */
4117     dbp = skip_spaces (dbp);
4118   }
4119
4120   for (cp = dbp /*+1*/;
4121        *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')';
4122        cp++)
4123     continue;
4124   if (cp == dbp)
4125     return;
4126
4127   pfnote (savenstr (dbp, cp-dbp), TRUE,
4128           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4129 }
4130
4131 static void
4132 Lisp_functions (inf)
4133      FILE *inf;
4134 {
4135   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4136     {
4137       if (dbp[0] == '(')
4138         {
4139           if (L_isdef (dbp))
4140             {
4141               dbp = skip_non_spaces (dbp);
4142               dbp = skip_spaces (dbp);
4143               L_getit ();
4144             }
4145           else
4146             {
4147               /* Check for (foo::defmumble name-defined ... */
4148               do
4149                 dbp++;
4150               while (*dbp != '\0' && !iswhite (*dbp)
4151                      && *dbp != ':' && *dbp != '(' && *dbp != ')');
4152               if (*dbp == ':')
4153                 {
4154                   do
4155                     dbp++;
4156                   while (*dbp == ':');
4157
4158                   if (L_isdef (dbp - 1))
4159                     {
4160                       dbp = skip_non_spaces (dbp);
4161                       dbp = skip_spaces (dbp);
4162                       L_getit ();
4163                     }
4164                 }
4165             }
4166         }
4167     }
4168 }
4169 \f
4170 /*
4171  * Postscript tag functions
4172  * Just look for lines where the first character is '/'
4173  * Richard Mlynarik <mly@adoc.xerox.com>
4174  * Also look at "defineps" for PSWrap
4175  * suggested by Masatake YAMATO <masata-y@is.aist-nara.ac.jp>
4176  */
4177 static void
4178 Postscript_functions (inf)
4179      FILE *inf;
4180 {
4181   register char *bp, *ep;
4182
4183   LOOP_ON_INPUT_LINES (inf, lb, bp)
4184     {
4185       if (bp[0] == '/')
4186         {
4187           for (ep = bp+1;
4188                *ep != '\0' && *ep != ' ' && *ep != '{';
4189                ep++)
4190             continue;
4191           pfnote (savenstr (bp, ep-bp), TRUE,
4192                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4193         }
4194       else if (strneq (bp, "defineps", 8))
4195         {
4196           bp = skip_non_spaces (bp);
4197           bp = skip_spaces (bp);
4198           get_tag (bp);
4199         }
4200     }
4201 }
4202
4203 \f
4204 /*
4205  * Scheme tag functions
4206  * look for (def... xyzzy
4207  * look for (def... (xyzzy
4208  * look for (def ... ((...(xyzzy ....
4209  * look for (set! xyzzy
4210  */
4211
4212 static void
4213 Scheme_functions (inf)
4214      FILE *inf;
4215 {
4216   register char *bp;
4217
4218   LOOP_ON_INPUT_LINES (inf, lb, bp)
4219     {
4220       if (bp[0] == '('
4221           && (bp[1] == 'D' || bp[1] == 'd')
4222           && (bp[2] == 'E' || bp[2] == 'e')
4223           && (bp[3] == 'F' || bp[3] == 'f'))
4224         {
4225           bp = skip_non_spaces (bp);
4226           /* Skip over open parens and white space */
4227           while (iswhite (*bp) || *bp == '(')
4228             bp++;
4229           get_tag (bp);
4230         }
4231       if (bp[0] == '('
4232           && (bp[1] == 'S' || bp[1] == 's')
4233           && (bp[2] == 'E' || bp[2] == 'e')
4234           && (bp[3] == 'T' || bp[3] == 't')
4235           && (bp[4] == '!' || bp[4] == '!')
4236           && (iswhite (bp[5])))
4237         {
4238           bp = skip_non_spaces (bp);
4239           bp = skip_spaces (bp);
4240           get_tag (bp);
4241         }
4242     }
4243 }
4244 \f
4245 /* Find tags in TeX and LaTeX input files.  */
4246
4247 /* TEX_toktab is a table of TeX control sequences that define tags.
4248    Each TEX_tabent records one such control sequence.
4249    CONVERT THIS TO USE THE Stab TYPE!! */
4250 struct TEX_tabent
4251 {
4252   char *name;
4253   int len;
4254 };
4255
4256 struct TEX_tabent *TEX_toktab = NULL;   /* Table with tag tokens */
4257
4258 /* Default set of control sequences to put into TEX_toktab.
4259    The value of environment var TEXTAGS is prepended to this.  */
4260
4261 char *TEX_defenv = "\
4262 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4263 :part:appendix:entry:index";
4264
4265 static void TEX_mode P_((FILE *));
4266 static struct TEX_tabent *TEX_decode_env P_((char *, char *));
4267 static int TEX_Token P_((char *));
4268
4269 char TEX_esc = '\\';
4270 char TEX_opgrp = '{';
4271 char TEX_clgrp = '}';
4272
4273 /*
4274  * TeX/LaTeX scanning loop.
4275  */
4276 static void
4277 TeX_commands (inf)
4278      FILE *inf;
4279 {
4280   char *cp, *lasthit;
4281   register int i;
4282
4283   /* Select either \ or ! as escape character.  */
4284   TEX_mode (inf);
4285
4286   /* Initialize token table once from environment. */
4287   if (!TEX_toktab)
4288     TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4289
4290   LOOP_ON_INPUT_LINES (inf, lb, cp)
4291     {
4292       lasthit = cp;
4293       /* Look at each esc in line. */
4294       while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4295         {
4296           if (*++cp == '\0')
4297             break;
4298           linecharno += cp - lasthit;
4299           lasthit = cp;
4300           i = TEX_Token (lasthit);
4301           if (i >= 0)
4302             {
4303               /* We seem to include the TeX command in the tag name.
4304               register char *p;
4305               for (p = lasthit + TEX_toktab[i].len;
4306                    *p != '\0' && *p != TEX_clgrp;
4307                    p++)
4308                 continue; */
4309               pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4310                       lb.buffer, lb.len, lineno, linecharno);
4311               break;            /* We only tag a line once */
4312             }
4313         }
4314     }
4315 }
4316
4317 #define TEX_LESC '\\'
4318 #define TEX_SESC '!'
4319 #define TEX_cmt  '%'
4320
4321 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4322    chars accordingly. */
4323 static void
4324 TEX_mode (inf)
4325      FILE *inf;
4326 {
4327   int c;
4328
4329   while ((c = getc (inf)) != EOF)
4330     {
4331       /* Skip to next line if we hit the TeX comment char. */
4332       if (c == TEX_cmt)
4333         while (c != '\n')
4334           c = getc (inf);
4335       else if (c == TEX_LESC || c == TEX_SESC )
4336         break;
4337     }
4338
4339   if (c == TEX_LESC)
4340     {
4341       TEX_esc = TEX_LESC;
4342       TEX_opgrp = '{';
4343       TEX_clgrp = '}';
4344     }
4345   else
4346     {
4347       TEX_esc = TEX_SESC;
4348       TEX_opgrp = '<';
4349       TEX_clgrp = '>';
4350     }
4351   /* If the input file is compressed, inf is a pipe, and rewind may fail.
4352      No attempt is made to correct the situation. */
4353   rewind (inf);
4354 }
4355
4356 /* Read environment and prepend it to the default string.
4357    Build token table. */
4358 static struct TEX_tabent *
4359 TEX_decode_env (evarname, defenv)
4360      char *evarname;
4361      char *defenv;
4362 {
4363   register char *env, *p;
4364
4365   struct TEX_tabent *tab;
4366   int size, i;
4367
4368   /* Append default string to environment. */
4369   env = getenv (evarname);
4370   if (!env)
4371     env = defenv;
4372   else
4373     {
4374       char *oldenv = env;
4375       env = concat (oldenv, defenv, "");
4376     }
4377
4378   /* Allocate a token table */
4379   for (size = 1, p = env; p;)
4380     if ((p = etags_strchr (p, ':')) && *++p != '\0')
4381       size++;
4382   /* Add 1 to leave room for null terminator.  */
4383   tab = xnew (size + 1, struct TEX_tabent);
4384
4385   /* Unpack environment string into token table. Be careful about */
4386   /* zero-length strings (leading ':', "::" and trailing ':') */
4387   for (i = 0; *env;)
4388     {
4389       p = etags_strchr (env, ':');
4390       if (!p)                   /* End of environment string. */
4391         p = env + strlen (env);
4392       if (p - env > 0)
4393         {                       /* Only non-zero strings. */
4394           tab[i].name = savenstr (env, p - env);
4395           tab[i].len = strlen (tab[i].name);
4396           i++;
4397         }
4398       if (*p)
4399         env = p + 1;
4400       else
4401         {
4402           tab[i].name = NULL;   /* Mark end of table. */
4403           tab[i].len = 0;
4404           break;
4405         }
4406     }
4407   return tab;
4408 }
4409
4410 /* If the text at CP matches one of the tag-defining TeX command names,
4411    return the pointer to the first occurrence of that command in TEX_toktab.
4412    Otherwise return -1.
4413    Keep the capital `T' in `token' for dumb truncating compilers
4414    (this distinguishes it from `TEX_toktab' */
4415 static int
4416 TEX_Token (cp)
4417      char *cp;
4418 {
4419   int i;
4420
4421   for (i = 0; TEX_toktab[i].len > 0; i++)
4422     if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4423       return i;
4424   return -1;
4425 }
4426 \f
4427 /* Texinfo support.  Dave Love, Mar. 2000.  */
4428 static void
4429 Texinfo_nodes (inf)
4430      FILE * inf;
4431 {
4432   char *cp, *start;
4433   LOOP_ON_INPUT_LINES (inf, lb, cp)
4434     {
4435       if ((*cp++ == '@'
4436            && *cp++ == 'n'
4437            && *cp++ == 'o'
4438            && *cp++ == 'd'
4439            && *cp++ == 'e' && iswhite (*cp++)))
4440         {
4441           start = cp = skip_spaces(cp);
4442           while (*cp != '\0' && *cp != ',')
4443             cp++;
4444           pfnote (savenstr (start, cp - start), TRUE,
4445                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4446         }
4447     }
4448 }
4449 \f
4450 /*
4451  * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4452  *
4453  * Assumes that the predicate starts at column 0.
4454  * Only the first clause of a predicate is added.
4455  */
4456 static int prolog_pred P_((char *, char *));
4457 static void prolog_skip_comment P_((linebuffer *, FILE *));
4458 static int prolog_atom P_((char *, int));
4459
4460 static void
4461 Prolog_functions (inf)
4462      FILE *inf;
4463 {
4464   char *cp, *last;
4465   int len;
4466   int allocated;
4467
4468   allocated = 0;
4469   len = 0;
4470   last = NULL;
4471
4472   LOOP_ON_INPUT_LINES (inf, lb, cp)
4473     {
4474       if (cp[0] == '\0')        /* Empty line */
4475         continue;
4476       else if (iswhite (cp[0])) /* Not a predicate */
4477         continue;
4478       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
4479         prolog_skip_comment (&lb, inf);
4480       else if ((len = prolog_pred (cp, last)) > 0)
4481         {
4482           /* Predicate.  Store the function name so that we only
4483              generate a tag for the first clause.  */
4484           if (last == NULL)
4485             last = xnew(len + 1, char);
4486           else if (len + 1 > allocated)
4487             last = xrnew (last, len + 1, char);
4488           allocated = len + 1;
4489           strncpy (last, cp, len);
4490           last[len] = '\0';
4491         }
4492     }
4493 }
4494
4495
4496 static void
4497 prolog_skip_comment (plb, inf)
4498      linebuffer *plb;
4499      FILE *inf;
4500 {
4501   char *cp;
4502
4503   do
4504     {
4505       for (cp = plb->buffer; *cp != '\0'; cp++)
4506         if (cp[0] == '*' && cp[1] == '/')
4507           return;
4508       lineno++;
4509       linecharno += readline (plb, inf);
4510     }
4511   while (!feof(inf));
4512 }
4513
4514 /*
4515  * A predicate definition is added if it matches:
4516  *     <beginning of line><Prolog Atom><whitespace>(
4517  *
4518  * It is added to the tags database if it doesn't match the
4519  * name of the previous clause header.
4520  *
4521  * Return the size of the name of the predicate, or 0 if no header
4522  * was found.
4523  */
4524 static int
4525 prolog_pred (s, last)
4526      char *s;
4527      char *last;                /* Name of last clause. */
4528 {
4529   int pos;
4530   int len;
4531
4532   pos = prolog_atom (s, 0);
4533   if (pos < 1)
4534     return 0;
4535
4536   len = pos;
4537   pos = skip_spaces (s + pos) - s;
4538
4539   if ((s[pos] == '(') || (s[pos] == '.'))
4540     {
4541       if (s[pos] == '(')
4542         pos++;
4543
4544       /* Save only the first clause. */
4545       if (last == NULL
4546           || len != (int)strlen (last)
4547           || !strneq (s, last, len))
4548         {
4549           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4550           return len;
4551         }
4552     }
4553   return 0;
4554 }
4555
4556 /*
4557  * Consume a Prolog atom.
4558  * Return the number of bytes consumed, or -1 if there was an error.
4559  *
4560  * A prolog atom, in this context, could be one of:
4561  * - An alphanumeric sequence, starting with a lower case letter.
4562  * - A quoted arbitrary string. Single quotes can escape themselves.
4563  *   Backslash quotes everything.
4564  */
4565 static int
4566 prolog_atom (s, pos)
4567      char *s;
4568      int pos;
4569 {
4570   int origpos;
4571
4572   origpos = pos;
4573
4574   if (ISLOWER(s[pos]) || (s[pos] == '_'))
4575     {
4576       /* The atom is unquoted. */
4577       pos++;
4578       while (ISALNUM(s[pos]) || (s[pos] == '_'))
4579         {
4580           pos++;
4581         }
4582       return pos - origpos;
4583     }
4584   else if (s[pos] == '\'')
4585     {
4586       pos++;
4587
4588       while (1)
4589         {
4590           if (s[pos] == '\'')
4591             {
4592               pos++;
4593               if (s[pos] != '\'')
4594                 break;
4595               pos++;            /* A double quote */
4596             }
4597           else if (s[pos] == '\0')
4598             /* Multiline quoted atoms are ignored. */
4599             return -1;
4600           else if (s[pos] == '\\')
4601             {
4602               if (s[pos+1] == '\0')
4603                 return -1;
4604               pos += 2;
4605             }
4606           else
4607             pos++;
4608         }
4609       return pos - origpos;
4610     }
4611   else
4612     return -1;
4613 }
4614 \f
4615 /*
4616  * Support for Erlang  --  Anders Lindgren, Feb 1996.
4617  *
4618  * Generates tags for functions, defines, and records.
4619  *
4620  * Assumes that Erlang functions start at column 0.
4621  */
4622 static int erlang_func P_((char *, char *));
4623 static void erlang_attribute P_((char *));
4624 static int erlang_atom P_((char *, int));
4625
4626 static void
4627 Erlang_functions (inf)
4628      FILE *inf;
4629 {
4630   char *cp, *last;
4631   int len;
4632   int allocated;
4633
4634   allocated = 0;
4635   len = 0;
4636   last = NULL;
4637
4638   LOOP_ON_INPUT_LINES (inf, lb, cp)
4639     {
4640       if (cp[0] == '\0')        /* Empty line */
4641         continue;
4642       else if (iswhite (cp[0])) /* Not function nor attribute */
4643         continue;
4644       else if (cp[0] == '%')    /* comment */
4645         continue;
4646       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
4647         continue;
4648       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
4649         {
4650           erlang_attribute (cp);
4651           last = NULL;
4652         }
4653       else if ((len = erlang_func (cp, last)) > 0)
4654         {
4655           /*
4656            * Function.  Store the function name so that we only
4657            * generates a tag for the first clause.
4658            */
4659           if (last == NULL)
4660             last = xnew (len + 1, char);
4661           else if (len + 1 > allocated)
4662             last = xrnew (last, len + 1, char);
4663           allocated = len + 1;
4664           strncpy (last, cp, len);
4665           last[len] = '\0';
4666         }
4667     }
4668 }
4669
4670
4671 /*
4672  * A function definition is added if it matches:
4673  *     <beginning of line><Erlang Atom><whitespace>(
4674  *
4675  * It is added to the tags database if it doesn't match the
4676  * name of the previous clause header.
4677  *
4678  * Return the size of the name of the function, or 0 if no function
4679  * was found.
4680  */
4681 static int
4682 erlang_func (s, last)
4683      char *s;
4684      char *last;                /* Name of last clause. */
4685 {
4686   int pos;
4687   int len;
4688
4689   pos = erlang_atom (s, 0);
4690   if (pos < 1)
4691     return 0;
4692
4693   len = pos;
4694   pos = skip_spaces (s + pos) - s;
4695
4696   /* Save only the first clause. */
4697   if (s[pos++] == '('
4698       && (last == NULL
4699           || len != (int)strlen (last)
4700           || !strneq (s, last, len)))
4701         {
4702           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4703           return len;
4704         }
4705
4706   return 0;
4707 }
4708
4709
4710 /*
4711  * Handle attributes.  Currently, tags are generated for defines
4712  * and records.
4713  *
4714  * They are on the form:
4715  * -define(foo, bar).
4716  * -define(Foo(M, N), M+N).
4717  * -record(graph, {vtab = notable, cyclic = true}).
4718  */
4719 static void
4720 erlang_attribute (s)
4721      char *s;
4722 {
4723   int pos;
4724   int len;
4725
4726   if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4727     {
4728       pos = skip_spaces (s + 7) - s;
4729       if (s[pos++] == '(')
4730         {
4731           pos = skip_spaces (s + pos) - s;
4732           len = erlang_atom (s, pos);
4733           if (len != 0)
4734             pfnote (savenstr (& s[pos], len), TRUE,
4735                     s, pos + len, lineno, linecharno);
4736         }
4737     }
4738   return;
4739 }
4740
4741
4742 /*
4743  * Consume an Erlang atom (or variable).
4744  * Return the number of bytes consumed, or -1 if there was an error.
4745  */
4746 static int
4747 erlang_atom (s, pos)
4748      char *s;
4749      int pos;
4750 {
4751   int origpos;
4752
4753   origpos = pos;
4754
4755   if (ISALPHA (s[pos]) || s[pos] == '_')
4756     {
4757       /* The atom is unquoted. */
4758       pos++;
4759       while (ISALNUM (s[pos]) || s[pos] == '_')
4760         pos++;
4761       return pos - origpos;
4762     }
4763   else if (s[pos] == '\'')
4764     {
4765       pos++;
4766
4767       while (1)
4768         {
4769           if (s[pos] == '\'')
4770             {
4771               pos++;
4772               break;
4773             }
4774           else if (s[pos] == '\0')
4775             /* Multiline quoted atoms are ignored. */
4776             return -1;
4777           else if (s[pos] == '\\')
4778             {
4779               if (s[pos+1] == '\0')
4780                 return -1;
4781               pos += 2;
4782             }
4783           else
4784             pos++;
4785         }
4786       return pos - origpos;
4787     }
4788   else
4789     return -1;
4790 }
4791 \f
4792 #ifdef ETAGS_REGEXPS
4793
4794 static char *scan_separators P_((char *));
4795 static void analyse_regex P_((char *, bool));
4796 static void add_regex P_((char *, bool, language *));
4797 static char *substitute P_((char *, char *, struct re_registers *));
4798
4799 /* Take a string like "/blah/" and turn it into "blah", making sure
4800    that the first and last characters are the same, and handling
4801    quoted separator characters.  Actually, stops on the occurrence of
4802    an unquoted separator.  Also turns "\t" into a Tab character.
4803    Returns pointer to terminating separator.  Works in place.  Null
4804    terminates name string. */
4805 static char *
4806 scan_separators (name)
4807      char *name;
4808 {
4809   char sep = name[0];
4810   char *copyto = name;
4811   bool quoted = FALSE;
4812
4813   for (++name; *name != '\0'; ++name)
4814     {
4815       if (quoted)
4816         {
4817           if (*name == 't')
4818             *copyto++ = '\t';
4819           else if (*name == sep)
4820             *copyto++ = sep;
4821           else
4822             {
4823               /* Something else is quoted, so preserve the quote. */
4824               *copyto++ = '\\';
4825               *copyto++ = *name;
4826             }
4827           quoted = FALSE;
4828         }
4829       else if (*name == '\\')
4830         quoted = TRUE;
4831       else if (*name == sep)
4832         break;
4833       else
4834         *copyto++ = *name;
4835     }
4836
4837   /* Terminate copied string. */
4838   *copyto = '\0';
4839   return name;
4840 }
4841
4842 /* Look at the argument of --regex or --no-regex and do the right
4843    thing.  Same for each line of a regexp file. */
4844 static void
4845 analyse_regex (regex_arg, ignore_case)
4846      char *regex_arg;
4847      bool ignore_case;
4848 {
4849   if (regex_arg == NULL)
4850     free_patterns ();           /* --no-regex: remove existing regexps */
4851
4852   /* A real --regexp option or a line in a regexp file. */
4853   switch (regex_arg[0])
4854     {
4855       /* Comments in regexp file or null arg to --regex. */
4856     case '\0':
4857     case ' ':
4858     case '\t':
4859       break;
4860
4861       /* Read a regex file.  This is recursive and may result in a
4862          loop, which will stop when the file descriptors are exhausted. */
4863     case '@':
4864       {
4865         FILE *regexfp;
4866         linebuffer regexbuf;
4867         char *regexfile = regex_arg + 1;
4868
4869         /* regexfile is a file containing regexps, one per line. */
4870         regexfp = fopen (regexfile, "r");
4871         if (regexfp == NULL)
4872           {
4873             pfatal (regexfile);
4874             return;
4875           }
4876         initbuffer (&regexbuf);
4877         while (readline_internal (&regexbuf, regexfp) > 0)
4878           analyse_regex (regexbuf.buffer, ignore_case);
4879         free (regexbuf.buffer);
4880         fclose (regexfp);
4881       }
4882       break;
4883
4884       /* Regexp to be used for a specific language only. */
4885     case '{':
4886       {
4887         language *lang;
4888         char *lang_name = regex_arg + 1;
4889         char *cp;
4890
4891         for (cp = lang_name; *cp != '}'; cp++)
4892           if (*cp == '\0')
4893             {
4894               error ("unterminated language name in regex: %s", regex_arg);
4895               return;
4896             }
4897         *cp = '\0';
4898         lang = get_language_from_langname (lang_name);
4899         if (lang == NULL)
4900           return;
4901         add_regex (cp + 1, ignore_case, lang);
4902       }
4903       break;
4904
4905       /* Regexp to be used for any language. */
4906     default:
4907       add_regex (regex_arg, ignore_case, NULL);
4908       break;
4909     }
4910 }
4911
4912 /* Turn a name, which is an ed-style (but Emacs syntax) regular
4913    expression, into a real regular expression by compiling it. */
4914 static void
4915 add_regex (regexp_pattern, ignore_case, lang)
4916      char *regexp_pattern;
4917      bool ignore_case;
4918      language *lang;
4919 {
4920   char *name;
4921   const char *err;
4922   struct re_pattern_buffer *patbuf;
4923   pattern *pp;
4924
4925
4926   if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
4927     {
4928       error ("%s: unterminated regexp", regexp_pattern);
4929       return;
4930     }
4931   name = scan_separators (regexp_pattern);
4932   if (regexp_pattern[0] == '\0')
4933     {
4934       error ("null regexp", (char *)NULL);
4935       return;
4936     }
4937   (void) scan_separators (name);
4938
4939   patbuf = xnew (1, struct re_pattern_buffer);
4940   /* Translation table to fold case if appropriate. */
4941   patbuf->translate = (ignore_case) ? lc_trans : NULL;
4942   patbuf->fastmap = NULL;
4943   patbuf->buffer = NULL;
4944   patbuf->allocated = 0;
4945
4946   err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
4947   if (err != NULL)
4948     {
4949       error ("%s while compiling pattern", err);
4950       return;
4951     }
4952
4953   pp = p_head;
4954   p_head = xnew (1, pattern);
4955   p_head->regex = savestr (regexp_pattern);
4956   p_head->p_next = pp;
4957   p_head->language = lang;
4958   p_head->pattern = patbuf;
4959   p_head->name_pattern = savestr (name);
4960   p_head->error_signaled = FALSE;
4961 }
4962
4963 /*
4964  * Do the substitutions indicated by the regular expression and
4965  * arguments.
4966  */
4967 static char *
4968 substitute (in, out, regs)
4969      char *in, *out;
4970      struct re_registers *regs;
4971 {
4972   char *result, *t;
4973   int size, dig, diglen;
4974
4975   result = NULL;
4976   size = strlen (out);
4977
4978   /* Pass 1: figure out how much to allocate by finding all \N strings. */
4979   if (out[size - 1] == '\\')
4980     fatal ("pattern error in \"%s\"", out);
4981   for (t = etags_strchr (out, '\\');
4982        t != NULL;
4983        t = etags_strchr (t + 2, '\\'))
4984     if (ISDIGIT (t[1]))
4985       {
4986         dig = t[1] - '0';
4987         diglen = regs->end[dig] - regs->start[dig];
4988         size += diglen - 2;
4989       }
4990     else
4991       size -= 1;
4992
4993   /* Allocate space and do the substitutions. */
4994   result = xnew (size + 1, char);
4995
4996   for (t = result; *out != '\0'; out++)
4997     if (*out == '\\' && ISDIGIT (*++out))
4998       {
4999         /* Using "dig2" satisfies my debugger.  Bleah. */
5000         dig = *out - '0';
5001         diglen = regs->end[dig] - regs->start[dig];
5002         strncpy (t, in + regs->start[dig], diglen);
5003         t += diglen;
5004       }
5005     else
5006       *t++ = *out;
5007   *t = '\0';
5008
5009   if (DEBUG && (t > result + size || t - result != (int)strlen (result)))
5010     abort ();
5011
5012   return result;
5013 }
5014
5015 /* Deallocate all patterns. */
5016 static void
5017 free_patterns ()
5018 {
5019   pattern *pp;
5020   while (p_head != NULL)
5021     {
5022       pp = p_head->p_next;
5023       free (p_head->regex);
5024       free (p_head->name_pattern);
5025       free (p_head);
5026       p_head = pp;
5027     }
5028   return;
5029 }
5030 \f
5031 static void
5032 get_tag (bp)
5033      register char *bp;
5034 {
5035   register char *cp;
5036
5037   if (*bp == '\0')
5038     return;
5039   /* Go till you get to white space or a syntactic break */
5040   for (cp = bp + 1;
5041        *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp);
5042        cp++)
5043     continue;
5044   pfnote (savenstr (bp, cp-bp), TRUE,
5045           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5046 }
5047
5048 #endif /* ETAGS_REGEXPS */
5049 /* Initialize a linebuffer for use */
5050 static void
5051 initbuffer (lbp)
5052      linebuffer *lbp;
5053 {
5054   lbp->size = 200;
5055   lbp->buffer = xnew (200, char);
5056 }
5057
5058 /*
5059  * Read a line of text from `stream' into `lbp', excluding the
5060  * newline or CR-NL, if any.  Return the number of characters read from
5061  * `stream', which is the length of the line including the newline.
5062  *
5063  * On DOS or Windows we do not count the CR character, if any, before the
5064  * NL, in the returned length; this mirrors the behavior of emacs on those
5065  * platforms (for text files, it translates CR-NL to NL as it reads in the
5066  * file).
5067  */
5068 static long
5069 readline_internal (lbp, stream)
5070      linebuffer *lbp;
5071      register FILE *stream;
5072 {
5073   char *buffer = lbp->buffer;
5074   register char *p = lbp->buffer;
5075   register char *pend;
5076   int chars_deleted;
5077
5078   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5079
5080   while (1)
5081     {
5082       register int c = getc (stream);
5083       if (p == pend)
5084         {
5085           /* We're at the end of linebuffer: expand it. */
5086           lbp->size *= 2;
5087           buffer = xrnew (buffer, lbp->size, char);
5088           p += buffer - lbp->buffer;
5089           pend = buffer + lbp->size;
5090           lbp->buffer = buffer;
5091         }
5092       if (c == EOF)
5093         {
5094           *p = '\0';
5095           chars_deleted = 0;
5096           break;
5097         }
5098       if (c == '\n')
5099         {
5100           if (p > buffer && p[-1] == '\r')
5101             {
5102               p -= 1;
5103 #ifdef DOS_NT
5104              /* Assume CRLF->LF translation will be performed by Emacs
5105                 when loading this file, so CRs won't appear in the buffer.
5106                 It would be cleaner to compensate within Emacs;
5107                 however, Emacs does not know how many CRs were deleted
5108                 before any given point in the file.  */
5109               chars_deleted = 1;
5110 #else
5111               chars_deleted = 2;
5112 #endif
5113             }
5114           else
5115             {
5116               chars_deleted = 1;
5117             }
5118           *p = '\0';
5119           break;
5120         }
5121       *p++ = c;
5122     }
5123   lbp->len = p - buffer;
5124
5125   return lbp->len + chars_deleted;
5126 }
5127
5128 /*
5129  * Like readline_internal, above, but in addition try to match the
5130  * input line against relevant regular expressions.
5131  */
5132 static long
5133 readline (lbp, stream)
5134      linebuffer *lbp;
5135      FILE *stream;
5136 {
5137   /* Read new line. */
5138   long result = readline_internal (lbp, stream);
5139 #ifdef ETAGS_REGEXPS
5140   int match;
5141   pattern *pp;
5142
5143   /* Match against relevant patterns. */
5144   if (lbp->len > 0)
5145     for (pp = p_head; pp != NULL; pp = pp->p_next)
5146       {
5147         /* Only use generic regexps or those for the current language. */
5148         if (pp->language != NULL && pp->language != curlang)
5149           continue;
5150
5151         match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5152         switch (match)
5153           {
5154           case -2:
5155             /* Some error. */
5156             if (!pp->error_signaled)
5157               {
5158                 error ("error while matching \"%s\"", pp->regex);
5159                 pp->error_signaled = TRUE;
5160               }
5161             break;
5162           case -1:
5163             /* No match. */
5164             break;
5165           default:
5166             /* Match occurred.  Construct a tag. */
5167             if (pp->name_pattern[0] != '\0')
5168               {
5169                 /* Make a named tag. */
5170                 char *name = substitute (lbp->buffer,
5171                                          pp->name_pattern, &pp->regs);
5172                 if (name != NULL)
5173                   pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5174               }
5175             else
5176               {
5177                 /* Make an unnamed tag. */
5178                 pfnote ((char *)NULL, TRUE,
5179                         lbp->buffer, match, lineno, linecharno);
5180               }
5181             break;
5182           }
5183       }
5184 #endif /* ETAGS_REGEXPS */
5185
5186   return result;
5187 }
5188 \f
5189 /*
5190  * Return a pointer to a space of size strlen(cp)+1 allocated
5191  * with xnew where the string CP has been copied.
5192  */
5193 static char *
5194 savestr (cp)
5195      char *cp;
5196 {
5197   return savenstr (cp, strlen (cp));
5198 }
5199
5200 /*
5201  * Return a pointer to a space of size LEN+1 allocated with xnew where
5202  * the string CP has been copied for at most the first LEN characters.
5203  */
5204 static char *
5205 savenstr (cp, len)
5206      char *cp;
5207      int len;
5208 {
5209   register char *dp;
5210
5211   dp = xnew (len + 1, char);
5212   strncpy (dp, cp, len);
5213   dp[len] = '\0';
5214   return dp;
5215 }
5216
5217 /*
5218  * Return the ptr in sp at which the character c last
5219  * appears; NULL if not found
5220  *
5221  * Identical to POSIX strrchr, included for portability.
5222  */
5223 static char *
5224 etags_strrchr (sp, c)
5225      register const char *sp;
5226      register int c;
5227 {
5228   register const char *r;
5229
5230   r = NULL;
5231   do
5232     {
5233       if (*sp == c)
5234         r = sp;
5235   } while (*sp++);
5236   return (char *)r;
5237 }
5238
5239
5240 /*
5241  * Return the ptr in sp at which the character c first
5242  * appears; NULL if not found
5243  *
5244  * Identical to POSIX strchr, included for portability.
5245  */
5246 static char *
5247 etags_strchr (sp, c)
5248      register const char *sp;
5249      register int c;
5250 {
5251   do
5252     {
5253       if (*sp == c)
5254         return (char *)sp;
5255     } while (*sp++);
5256   return NULL;
5257 }
5258
5259 /* Skip spaces, return new pointer. */
5260 static char *
5261 skip_spaces (cp)
5262      char *cp;
5263 {
5264   while (iswhite (*cp))
5265     cp++;
5266   return cp;
5267 }
5268
5269 /* Skip non spaces, return new pointer. */
5270 static char *
5271 skip_non_spaces (cp)
5272      char *cp;
5273 {
5274   while (*cp != '\0' && !iswhite (*cp))
5275     cp++;
5276   return cp;
5277 }
5278
5279 /* Print error message and exit.  */
5280 static void
5281 fatal (s1, s2)
5282      char *s1, *s2;
5283 {
5284   error (s1, s2);
5285   exit (BAD);
5286 }
5287
5288 static void
5289 pfatal (s1)
5290      char *s1;
5291 {
5292   perror (s1);
5293   exit (BAD);
5294 }
5295
5296 static void
5297 suggest_asking_for_help ()
5298 {
5299   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5300            progname,
5301 #ifdef LONG_OPTIONS
5302            "--help"
5303 #else
5304            "-h"
5305 #endif
5306            );
5307   exit (BAD);
5308 }
5309
5310 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
5311 static void
5312 error (s1, s2)
5313      const char *s1, *s2;
5314 {
5315   fprintf (stderr, "%s: ", progname);
5316   fprintf (stderr, s1, s2);
5317   fprintf (stderr, "\n");
5318 }
5319
5320 /* Return a newly-allocated string whose contents
5321    concatenate those of s1, s2, s3.  */
5322 static char *
5323 concat (s1, s2, s3)
5324      char *s1, *s2, *s3;
5325 {
5326   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5327   char *result = xnew (len1 + len2 + len3 + 1, char);
5328
5329   strcpy (result, s1);
5330   strcpy (result + len1, s2);
5331   strcpy (result + len1 + len2, s3);
5332   result[len1 + len2 + len3] = '\0';
5333
5334   return result;
5335 }
5336 \f
5337 /* Does the same work as the system V getcwd, but does not need to
5338    guess the buffer size in advance. */
5339 static char *
5340 etags_getcwd ()
5341 {
5342 #ifdef HAVE_GETCWD
5343   int bufsize = 200;
5344   char *path = xnew (bufsize, char);
5345
5346   while (getcwd (path, bufsize) == NULL)
5347     {
5348       if (errno != ERANGE)
5349         pfatal ("getcwd");
5350       bufsize *= 2;
5351       free (path);
5352       path = xnew (bufsize, char);
5353     }
5354
5355   canonicalize_filename (path);
5356   return path;
5357
5358 #else /* not HAVE_GETCWD */
5359 #ifdef MSDOS
5360   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
5361
5362   getwd (path);
5363
5364   for (p = path; *p != '\0'; p++)
5365     if (*p == '\\')
5366       *p = '/';
5367     else
5368       *p = lowcase (*p);
5369
5370   return strdup (path);
5371 #else /* not MSDOS */
5372   linebuffer path;
5373   FILE *pipe;
5374
5375   initbuffer (&path);
5376   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5377   if (pipe == NULL || readline_internal (&path, pipe) == 0)
5378     pfatal ("pwd");
5379   pclose (pipe);
5380
5381   return path.buffer;
5382 #endif /* not MSDOS */
5383 #endif /* not HAVE_GETCWD */
5384 }
5385
5386 /* Return a newly allocated string containing the file name of FILE
5387    relative to the absolute directory DIR (which should end with a slash). */
5388 static char *
5389 relative_filename (file, dir)
5390      char *file, *dir;
5391 {
5392   char *fp, *dp, *afn, *res;
5393   int i;
5394
5395   /* Find the common root of file and dir (with a trailing slash). */
5396   afn = absolute_filename (file, cwd);
5397   fp = afn;
5398   dp = dir;
5399   while (*fp++ == *dp++)
5400     continue;
5401   fp--, dp--;                   /* back to the first differing char */
5402 #ifdef DOS_NT
5403   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5404     return afn;
5405 #endif
5406   do                            /* look at the equal chars until '/' */
5407     fp--, dp--;
5408   while (*fp != '/');
5409
5410   /* Build a sequence of "../" strings for the resulting relative file name. */
5411   i = 0;
5412   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5413     i += 1;
5414   res = xnew (3*i + strlen (fp + 1) + 1, char);
5415   res[0] = '\0';
5416   while (i-- > 0)
5417     strcat (res, "../");
5418
5419   /* Add the file name relative to the common root of file and dir. */
5420   strcat (res, fp + 1);
5421   free (afn);
5422
5423   return res;
5424 }
5425
5426 /* Return a newly allocated string containing the absolute file name
5427    of FILE given DIR (which should end with a slash). */
5428 static char *
5429 absolute_filename (file, dir)
5430      char *file, *dir;
5431 {
5432   char *slashp, *cp, *res;
5433
5434   if (filename_is_absolute (file))
5435     res = savestr (file);
5436 #ifdef DOS_NT
5437   /* We don't support non-absolute file names with a drive
5438      letter, like `d:NAME' (it's too much hassle).  */
5439   else if (file[1] == ':')
5440     fatal ("%s: relative file names with drive letters not supported", file);
5441 #endif
5442   else
5443     res = concat (dir, file, "");
5444
5445   /* Delete the "/dirname/.." and "/." substrings. */
5446   slashp = etags_strchr (res, '/');
5447   while (slashp != NULL && slashp[0] != '\0')
5448     {
5449       if (slashp[1] == '.')
5450         {
5451           if (slashp[2] == '.'
5452               && (slashp[3] == '/' || slashp[3] == '\0'))
5453             {
5454               cp = slashp;
5455               do
5456                 cp--;
5457               while (cp >= res && !filename_is_absolute (cp));
5458               if (cp < res)
5459                 cp = slashp;    /* the absolute name begins with "/.." */
5460 #ifdef DOS_NT
5461               /* Under MSDOS and NT we get `d:/NAME' as absolute
5462                  file name, so the luser could say `d:/../NAME'.
5463                  We silently treat this as `d:/NAME'.  */
5464               else if (cp[0] != '/')
5465                 cp = slashp;
5466 #endif
5467               strcpy (cp, slashp + 3);
5468               slashp = cp;
5469               continue;
5470             }
5471           else if (slashp[2] == '/' || slashp[2] == '\0')
5472             {
5473               strcpy (slashp, slashp + 2);
5474               continue;
5475             }
5476         }
5477
5478       slashp = etags_strchr (slashp + 1, '/');
5479     }
5480
5481   if (res[0] == '\0')
5482     return savestr ("/");
5483   else
5484     return res;
5485 }
5486
5487 /* Return a newly allocated string containing the absolute
5488    file name of dir where FILE resides given DIR (which should
5489    end with a slash). */
5490 static char *
5491 absolute_dirname (file, dir)
5492      char *file, *dir;
5493 {
5494   char *slashp, *res;
5495   char save;
5496
5497   canonicalize_filename (file);
5498   slashp = etags_strrchr (file, '/');
5499   if (slashp == NULL)
5500     return savestr (dir);
5501   save = slashp[1];
5502   slashp[1] = '\0';
5503   res = absolute_filename (file, dir);
5504   slashp[1] = save;
5505
5506   return res;
5507 }
5508
5509 /* Whether the argument string is an absolute file name.  The argument
5510    string must have been canonicalized with canonicalize_filename. */
5511 static bool
5512 filename_is_absolute (fn)
5513      char *fn;
5514 {
5515   return (fn[0] == '/'
5516 #ifdef DOS_NT
5517           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
5518 #endif
5519           );
5520 }
5521
5522 /* Translate backslashes into slashes.  Works in place. */
5523 static void
5524 canonicalize_filename (fn)
5525      register char *fn;
5526 {
5527 #ifdef DOS_NT
5528   /* Canonicalize drive letter case.  */
5529   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
5530     fn[0] = upcase (fn[0]);
5531   /* Convert backslashes to slashes.  */
5532   for (; *fn != '\0'; fn++)
5533     if (*fn == '\\')
5534       *fn = '/';
5535 #else
5536   /* No action. */
5537   fn = NULL;                    /* shut up the compiler */
5538 #endif
5539 }
5540
5541 /* Increase the size of a linebuffer. */
5542 static void
5543 grow_linebuffer (lbp, toksize)
5544      linebuffer *lbp;
5545      int toksize;
5546 {
5547   while (lbp->size < toksize)
5548     lbp->size *= 2;
5549   lbp->buffer = xrnew (lbp->buffer, lbp->size, char);
5550 }
5551
5552 /* Like malloc but get fatal error if memory is exhausted.  */
5553 long *
5554 xmalloc (size)
5555      unsigned int size;
5556 {
5557   long *result = (long *) malloc (size);
5558   if (result == NULL)
5559     fatal ("virtual memory exhausted", (char *)NULL);
5560   return result;
5561 }
5562
5563 long *
5564 xrealloc (ptr, size)
5565      char *ptr;
5566      unsigned int size;
5567 {
5568   long *result =  (long *) realloc (ptr, size);
5569   if (result == NULL)
5570     fatal ("virtual memory exhausted", (char *)NULL);
5571   return result;
5572 }