lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs
   2    Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99, 2000
   3    Free Software Foundation, Inc. and Ken Arnold
   4
   5 This file is not considered part of GNU Emacs.
   6
   7 This program is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 This program is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with this program; if not, write to the Free Software Foundation,
  19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  20
  21 /*
  22  * Authors:
  23  *      Ctags originally by Ken Arnold.
  24  *      Fortran added by Jim Kleckner.
  25  *      Ed Pelegri-Llopart added C typedefs.
  26  *      Gnu Emacs TAGS format and modifications by RMS?
  27  * 199x Sam Kendall added C++.
  28  * 1993 Francesco Potortì reorganised C and C++ based on work by Joe Wells.
  29  * 1994 Regexp tags by Tom Tromey.
  30  * 2001 Nested classes by Francesco Potortì based on work by Mykola Dzyuba.
  31  *
  32  *      Francesco Potortì <pot@gnu.org> has maintained it since 1993.
  33  */
  34
  35 char pot_etags_version[] = "@(#) pot number is $Revision: 13.59 $";
  36
  37 #define TRUE    1
  38 #define FALSE   0
  39
  40 #ifdef DEBUG
  41 #  undef DEBUG
  42 #  define DEBUG TRUE
  43 #else
  44 #  define DEBUG  FALSE
  45 #  define NDEBUG                /* disable assert */
  46 #endif
  47
  48 #if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  49 # define P_(proto) proto
  50 #else
  51 # define P_(proto) ()
  52 #endif
  53
  54 #ifdef HAVE_CONFIG_H
  55 # include <config.h>
  56   /* On some systems, Emacs defines static as nothing for the sake
  57      of unexec.  We don't want that here since we don't use unexec. */
  58 # undef static
  59 # define ETAGS_REGEXPS          /* use the regexp features */
  60 # define LONG_OPTIONS           /* accept long options */
  61 #endif /* HAVE_CONFIG_H */
  62
  63 #ifndef _GNU_SOURCE
  64 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  65 #endif
  66
  67 #ifdef MSDOS
  68 # undef MSDOS
  69 # define MSDOS TRUE
  70 # include <fcntl.h>
  71 # include <sys/param.h>
  72 # include <io.h>
  73 # ifndef HAVE_CONFIG_H
  74 #   define DOS_NT
  75 #   include <sys/config.h>
  76 # endif
  77 #else
  78 # define MSDOS FALSE
  79 #endif /* MSDOS */
  80
  81 #ifdef WINDOWSNT
  82 # include <stdlib.h>
  83 # include <fcntl.h>
  84 # include <string.h>
  85 # include <direct.h>
  86 # include <io.h>
  87 # define MAXPATHLEN _MAX_PATH
  88 # ifdef HAVE_CONFIG_H
  89 #   undef HAVE_NTGUI
  90 # else
  91 #   define DOS_NT
  92 # endif /* not HAVE_CONFIG_H */
  93 # ifndef HAVE_GETCWD
  94 #   define HAVE_GETCWD
  95 # endif /* undef HAVE_GETCWD */
  96 #else /* !WINDOWSNT */
  97 # ifdef STDC_HEADERS
  98 #  include <stdlib.h>
  99 #  include <string.h>
 100 # else
 101     extern char *getenv ();
 102 # endif
 103 #endif /* !WINDOWSNT */
 104
 105 #ifdef HAVE_UNISTD_H
 106 # include <unistd.h>
 107 #else
 108 # if defined (HAVE_GETCWD) && !WINDOWSNT
 109     extern char *getcwd (char *buf, size_t size);
 110 # endif
 111 #endif /* HAVE_UNISTD_H */
 112
 113 #include <stdio.h>
 114 #include <ctype.h>
 115 #include <errno.h>
 116 #ifndef errno
 117   extern int errno;
 118 #endif
 119 #include <assert.h>
 120 #include <sys/types.h>
 121 #include <sys/stat.h>
 122
 123 #if !defined (S_ISREG) && defined (S_IFREG)
 124 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 125 #endif
 126
 127 #ifdef LONG_OPTIONS
 128 # include <getopt.h>
 129 #else
 130 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 131   extern char *optarg;
 132   extern int optind, opterr;
 133 #endif /* LONG_OPTIONS */
 134
 135 #ifdef ETAGS_REGEXPS
 136 # include <regex.h>
 137 #endif /* ETAGS_REGEXPS */
 138
 139 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 140  Leave it undefined to make the program "etags", which makes emacs-style
 141  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 142 #ifdef CTAGS
 143 # undef  CTAGS
 144 # define CTAGS TRUE
 145 #else
 146 # define CTAGS FALSE
 147 #endif
 148
 149 /* Exit codes for success and failure.  */
 150 #ifdef VMS
 151 # define        GOOD    1
 152 # define        BAD     0
 153 #else
 154 # define        GOOD    0
 155 # define        BAD     1
 156 #endif
 157
 158 /* C extensions. */
 159 #define C_PLPL  0x00001         /* C++ */
 160 #define C_STAR  0x00003         /* C* */
 161 #define C_JAVA  0x00005         /* JAVA */
 162 #define YACC    0x10000         /* yacc file */
 163 #define PUREC   (!(c_ext & ~YACC)) /* no extensions (apart from possibly yacc) */
 164
 165 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 166 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 167
 168 #define CHARS 256               /* 2^sizeof(char) */
 169 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 170 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white */
 171 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name */
 172 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token */
 173 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token */
 174 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens */
 175
 176 #define ISALNUM(c)      isalnum (CHAR(c))
 177 #define ISALPHA(c)      isalpha (CHAR(c))
 178 #define ISDIGIT(c)      isdigit (CHAR(c))
 179 #define ISLOWER(c)      islower (CHAR(c))
 180
 181 #define lowcase(c)      tolower (CHAR(c))
 182 #define upcase(c)       toupper (CHAR(c))
 183
 184
 185 /*
 186  *      xnew, xrnew -- allocate, reallocate storage
 187  *
 188  * SYNOPSIS:    Type *xnew (int n, Type);
 189  *              void xrnew (OldPointer, int n, Type);
 190  */
 191 #if DEBUG
 192 # include "chkmalloc.h"
 193 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 194                                                   (n) * sizeof (Type)))
 195 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 196                                         (char *) (op), (n) * sizeof (Type)))
 197 #else
 198 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 199 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 200                                         (char *) (op), (n) * sizeof (Type)))
 201 #endif
 202
 203 typedef int bool;
 204
 205 typedef void Lang_function P_((FILE *));
 206
 207 typedef struct
 208 {
 209   char *suffix;
 210   char *command;                /* Takes one arg and decompresses to stdout */
 211 } compressor;
 212
 213 typedef struct
 214 {
 215   char *name;
 216   Lang_function *function;
 217   char **filenames;
 218   char **suffixes;
 219   char **interpreters;
 220 } language;
 221
 222 typedef struct node_st
 223 {                               /* sorting structure            */
 224   char *name;                   /* function or type name        */
 225   char *file;                   /* file name                    */
 226   bool is_func;                 /* use pattern or line no       */
 227   bool been_warned;             /* set if noticed dup           */
 228   int lno;                      /* line number tag is on        */
 229   long cno;                     /* character number line starts on */
 230   char *pat;                    /* search pattern               */
 231   struct node_st *left, *right; /* left and right sons          */
 232 } node;
 233
 234 /*
 235  * A `linebuffer' is a structure which holds a line of text.
 236  * `readline_internal' reads a line from a stream into a linebuffer
 237  * and works regardless of the length of the line.
 238  * SIZE is the size of BUFFER, LEN is the length of the string in
 239  * BUFFER after readline reads it.
 240  */
 241 typedef struct
 242 {
 243   long size;
 244   int len;
 245   char *buffer;
 246 } linebuffer;
 247
 248 /* Many compilers barf on this:
 249         Lang_function Ada_funcs;
 250    so let's write it this way */
 251 static void Ada_funcs P_((FILE *));
 252 static void Asm_labels P_((FILE *));
 253 static void C_entries P_((int c_ext, FILE *));
 254 static void default_C_entries P_((FILE *));
 255 static void plain_C_entries P_((FILE *));
 256 static void Cjava_entries P_((FILE *));
 257 static void Cobol_paragraphs P_((FILE *));
 258 static void Cplusplus_entries P_((FILE *));
 259 static void Cstar_entries P_((FILE *));
 260 static void Erlang_functions P_((FILE *));
 261 static void Fortran_functions P_((FILE *));
 262 static void Yacc_entries P_((FILE *));
 263 static void Lisp_functions P_((FILE *));
 264 static void Makefile_targets P_((FILE *));
 265 static void Pascal_functions P_((FILE *));
 266 static void Perl_functions P_((FILE *));
 267 static void Postscript_functions P_((FILE *));
 268 static void Prolog_functions P_((FILE *));
 269 static void Python_functions P_((FILE *));
 270 static void Scheme_functions P_((FILE *));
 271 static void TeX_commands P_((FILE *));
 272 static void Texinfo_nodes P_((FILE *));
 273 static void just_read_file P_((FILE *));
 274
 275 static void print_language_names P_((void));
 276 static void print_version P_((void));
 277 static void print_help P_((void));
 278 int main P_((int, char **));
 279 static int number_len P_((long));
 280
 281 static compressor *get_compressor_from_suffix P_((char *, char **));
 282 static language *get_language_from_langname P_((char *));
 283 static language *get_language_from_interpreter P_((char *));
 284 static language *get_language_from_filename P_((char *));
 285 static int total_size_of_entries P_((node *));
 286 static long readline P_((linebuffer *, FILE *));
 287 static long readline_internal P_((linebuffer *, FILE *));
 288 static void get_tag P_((char *));
 289
 290 #ifdef ETAGS_REGEXPS
 291 static void analyse_regex P_((char *, bool));
 292 static void add_regex P_((char *, bool, language *));
 293 static void free_patterns P_((void));
 294 #endif /* ETAGS_REGEXPS */
 295 static void error P_((const char *, const char *));
 296 static void suggest_asking_for_help P_((void));
 297 void fatal P_((char *, char *));
 298 static void pfatal P_((char *));
 299 static void add_node P_((node *, node **));
 300
 301 static void init P_((void));
 302 static void initbuffer P_((linebuffer *));
 303 static void find_entries P_((char *, FILE *));
 304 static void free_tree P_((node *));
 305 static void pfnote P_((char *, bool, char *, int, int, long));
 306 static void new_pfnote P_((char *, int, bool, char *, int, int, long));
 307 static void process_file P_((char *));
 308 static void put_entries P_((node *));
 309 static void takeprec P_((void));
 310
 311 static char *concat P_((char *, char *, char *));
 312 static char *skip_spaces P_((char *));
 313 static char *skip_non_spaces P_((char *));
 314 static char *savenstr P_((char *, int));
 315 static char *savestr P_((char *));
 316 static char *etags_strchr P_((const char *, int));
 317 static char *etags_strrchr P_((const char *, int));
 318 static char *etags_getcwd P_((void));
 319 static char *relative_filename P_((char *, char *));
 320 static char *absolute_filename P_((char *, char *));
 321 static char *absolute_dirname P_((char *, char *));
 322 static bool filename_is_absolute P_((char *f));
 323 static void canonicalize_filename P_((char *));
 324 static void linebuffer_setlen P_((linebuffer *, int));
 325 long *xmalloc P_((unsigned int));
 326 long *xrealloc P_((char *, unsigned int));
 327
 328 \f
 329 char searchar = '/';            /* use /.../ searches */
 330
 331 char *tagfile;                  /* output file */
 332 char *progname;                 /* name this program was invoked with */
 333 char *cwd;                      /* current working directory */
 334 char *tagfiledir;               /* directory of tagfile */
 335 FILE *tagf;                     /* ioptr for tags file */
 336
 337 char *curfile;                  /* current input file name */
 338 language *curlang;              /* current language */
 339
 340 int lineno;                     /* line number of current line */
 341 long charno;                    /* current character number */
 342 long linecharno;                /* charno of start of current line */
 343 char *dbp;                      /* pointer to start of current tag */
 344
 345 node *head;                     /* the head of the binary tree of tags */
 346
 347 linebuffer lb;                  /* the current line */
 348
 349 /* boolean "functions" (see init)       */
 350 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 351 char
 352   /* white chars */
 353   *white = " \f\t\n\r\v",
 354   /* not in a name */
 355   *nonam = " \f\t\n\r(=,[;",
 356   /* token ending chars */
 357   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 358   /* token starting chars */
 359   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 360   /* valid in-token chars */
 361   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 362
 363 bool append_to_tagfile;         /* -a: append to tags */
 364 /* The following four default to TRUE for etags, but to FALSE for ctags.  */
 365 bool typedefs;                  /* -t: create tags for C and Ada typedefs */
 366 bool typedefs_or_cplusplus;     /* -T: create tags for C typedefs, level */
 367                                 /* 0 struct/enum/union decls, and C++ */
 368                                 /* member functions. */
 369 bool constantypedefs;           /* -d: create tags for C #define, enum */
 370                                 /* constants and variables. */
 371                                 /* -D: opposite of -d.  Default under ctags. */
 372 bool declarations;              /* --declarations: tag them and extern in C&Co*/
 373 bool globals;                   /* create tags for global variables */
 374 bool members;                   /* create tags for C member variables */
 375 bool update;                    /* -u: update tags */
 376 bool vgrind_style;              /* -v: create vgrind style index output */
 377 bool no_warnings;               /* -w: suppress warnings */
 378 bool cxref_style;               /* -x: create cxref style output */
 379 bool cplusplus;                 /* .[hc] means C++, not C */
 380 bool noindentypedefs;           /* -I: ignore indentation in C */
 381 bool packages_only;             /* --packages-only: in Ada, only tag packages*/
 382
 383 #ifdef LONG_OPTIONS
 384 struct option longopts[] =
 385 {
 386   { "packages-only",      no_argument,       &packages_only, TRUE  },
 387   { "append",             no_argument,       NULL,           'a'   },
 388   { "backward-search",    no_argument,       NULL,           'B'   },
 389   { "c++",                no_argument,       NULL,           'C'   },
 390   { "cxref",              no_argument,       NULL,           'x'   },
 391   { "defines",            no_argument,       NULL,           'd'   },
 392   { "declarations",       no_argument,       &declarations,  TRUE  },
 393   { "no-defines",         no_argument,       NULL,           'D'   },
 394   { "globals",            no_argument,       &globals,       TRUE  },
 395   { "no-globals",         no_argument,       &globals,       FALSE },
 396   { "help",               no_argument,       NULL,           'h'   },
 397   { "help",               no_argument,       NULL,           'H'   },
 398   { "ignore-indentation", no_argument,       NULL,           'I'   },
 399   { "include",            required_argument, NULL,           'i'   },
 400   { "language",           required_argument, NULL,           'l'   },
 401   { "members",            no_argument,       &members,       TRUE  },
 402   { "no-members",         no_argument,       &members,       FALSE },
 403   { "no-warn",            no_argument,       NULL,           'w'   },
 404   { "output",             required_argument, NULL,           'o'   },
 405 #ifdef ETAGS_REGEXPS
 406   { "regex",              required_argument, NULL,           'r'   },
 407   { "no-regex",           no_argument,       NULL,           'R'   },
 408   { "ignore-case-regex",  required_argument, NULL,           'c'   },
 409 #endif /* ETAGS_REGEXPS */
 410   { "typedefs",           no_argument,       NULL,           't'   },
 411   { "typedefs-and-c++",   no_argument,       NULL,           'T'   },
 412   { "update",             no_argument,       NULL,           'u'   },
 413   { "version",            no_argument,       NULL,           'V'   },
 414   { "vgrind",             no_argument,       NULL,           'v'   },
 415   { NULL }
 416 };
 417 #endif /* LONG_OPTIONS */
 418
 419 #ifdef ETAGS_REGEXPS
 420 /* Structure defining a regular expression.  Elements are
 421    the compiled pattern, and the name string. */
 422 typedef struct pattern
 423 {
 424   struct pattern *p_next;
 425   language *language;
 426   char *regex;
 427   struct re_pattern_buffer *pattern;
 428   struct re_registers regs;
 429   char *name_pattern;
 430   bool error_signaled;
 431 } pattern;
 432
 433 /* List of all regexps. */
 434 pattern *p_head = NULL;
 435
 436 /* How many characters in the character set.  (From regex.c.)  */
 437 #define CHAR_SET_SIZE 256
 438 /* Translation table for case-insensitive matching. */
 439 char lc_trans[CHAR_SET_SIZE];
 440 #endif /* ETAGS_REGEXPS */
 441
 442 compressor compressors[] =
 443 {
 444   { "z", "gzip -d -c"},
 445   { "Z", "gzip -d -c"},
 446   { "gz", "gzip -d -c"},
 447   { "GZ", "gzip -d -c"},
 448   { "bz2", "bzip2 -d -c" },
 449   { NULL }
 450 };
 451
 452 /*
 453  * Language stuff.
 454  */
 455
 456 /* Non-NULL if language fixed. */
 457 language *forced_lang = NULL;
 458
 459 /* Ada code */
 460 char *Ada_suffixes [] =
 461   { "ads", "adb", "ada", NULL };
 462
 463 /* Assembly code */
 464 char *Asm_suffixes [] = { "a",  /* Unix assembler */
 465                           "asm", /* Microcontroller assembly */
 466                           "def", /* BSO/Tasking definition includes  */
 467                           "inc", /* Microcontroller include files */
 468                           "ins", /* Microcontroller include files */
 469                           "s", "sa", /* Unix assembler */
 470                           "S",   /* cpp-processed Unix assembler */
 471                           "src", /* BSO/Tasking C compiler output */
 472                           NULL
 473                         };
 474
 475 /* Note that .c and .h can be considered C++, if the --c++ flag was
 476    given.  That is why default_C_entries is called here. */
 477 char *default_C_suffixes [] =
 478   { "c", "h", NULL };
 479
 480 char *Cplusplus_suffixes [] =
 481   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 482     "M",                        /* Objective C++ */
 483     "pdb",                      /* Postscript with C syntax */
 484     NULL };
 485
 486 char *Cjava_suffixes [] =
 487   { "java", NULL };
 488
 489 char *Cobol_suffixes [] =
 490   { "COB", "cob", NULL };
 491
 492 char *Cstar_suffixes [] =
 493   { "cs", "hs", NULL };
 494
 495 char *Erlang_suffixes [] =
 496   { "erl", "hrl", NULL };
 497
 498 char *Fortran_suffixes [] =
 499   { "F", "f", "f90", "for", NULL };
 500
 501 char *Lisp_suffixes [] =
 502   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 503
 504 char *Makefile_filenames [] =
 505   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 506
 507 char *Pascal_suffixes [] =
 508   { "p", "pas", NULL };
 509
 510 char *Perl_suffixes [] =
 511   { "pl", "pm", NULL };
 512 char *Perl_interpreters [] =
 513   { "perl", "@PERL@", NULL };
 514
 515 char *plain_C_suffixes [] =
 516   { "lm",                       /* Objective lex file */
 517     "m",                        /* Objective C file */
 518     "pc",                       /* Pro*C file */
 519      NULL };
 520
 521 char *Postscript_suffixes [] =
 522   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 523
 524 char *Prolog_suffixes [] =
 525   { "prolog", NULL };
 526
 527 char *Python_suffixes [] =
 528   { "py", NULL };
 529
 530 /* Can't do the `SCM' or `scm' prefix with a version number. */
 531 char *Scheme_suffixes [] =
 532   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 533
 534 char *TeX_suffixes [] =
 535   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 536
 537 char *Texinfo_suffixes [] =
 538   { "texi", "texinfo", "txi", NULL };
 539
 540 char *Yacc_suffixes [] =
 541   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 542
 543 /*
 544  * Table of languages.
 545  *
 546  * It is ok for a given function to be listed under more than one
 547  * name.  I just didn't.
 548  */
 549
 550 language lang_names [] =
 551 {
 552   { "ada",        Ada_funcs,            NULL, Ada_suffixes,             NULL },
 553   { "asm",        Asm_labels,           NULL, Asm_suffixes,             NULL },
 554   { "c",          default_C_entries,    NULL, default_C_suffixes,       NULL },
 555   { "c++",        Cplusplus_entries,    NULL, Cplusplus_suffixes,       NULL },
 556   { "c*",         Cstar_entries,        NULL, Cstar_suffixes,           NULL },
 557   { "cobol",      Cobol_paragraphs,     NULL, Cobol_suffixes,           NULL },
 558   { "erlang",     Erlang_functions,     NULL, Erlang_suffixes,          NULL },
 559   { "fortran",    Fortran_functions,    NULL, Fortran_suffixes,         NULL },
 560   { "java",       Cjava_entries,        NULL, Cjava_suffixes,           NULL },
 561   { "lisp",       Lisp_functions,       NULL, Lisp_suffixes,            NULL },
 562   { "makefile",   Makefile_targets,     Makefile_filenames, NULL,       NULL },
 563   { "pascal",     Pascal_functions,     NULL, Pascal_suffixes,          NULL },
 564   { "perl",       Perl_functions,       NULL, Perl_suffixes, Perl_interpreters },
 565   { "postscript", Postscript_functions, NULL, Postscript_suffixes,      NULL },
 566   { "proc",       plain_C_entries,      NULL, plain_C_suffixes,         NULL },
 567   { "prolog",     Prolog_functions,     NULL, Prolog_suffixes,          NULL },
 568   { "python",     Python_functions,     NULL, Python_suffixes,          NULL },
 569   { "scheme",     Scheme_functions,     NULL, Scheme_suffixes,          NULL },
 570   { "tex",        TeX_commands,         NULL, TeX_suffixes,             NULL },
 571   { "texinfo",    Texinfo_nodes,        NULL, Texinfo_suffixes,         NULL },
 572   { "yacc",       Yacc_entries,         NULL, Yacc_suffixes,            NULL },
 573   { "auto", NULL },             /* default guessing scheme */
 574   { "none", just_read_file },   /* regexp matching only */
 575   { NULL, NULL }                /* end of list */
 576 };
 577
 578 \f
 579 static void
 580 print_language_names ()
 581 {
 582   language *lang;
 583   char **name, **ext;
 584
 585   puts ("\nThese are the currently supported languages, along with the\n\
 586 default file names and dot suffixes:");
 587   for (lang = lang_names; lang->name != NULL; lang++)
 588     {
 589       printf ("  %-*s", 10, lang->name);
 590       if (lang->filenames != NULL)
 591         for (name = lang->filenames; *name != NULL; name++)
 592           printf (" %s", *name);
 593       if (lang->suffixes != NULL)
 594         for (ext = lang->suffixes; *ext != NULL; ext++)
 595           printf (" .%s", *ext);
 596       puts ("");
 597     }
 598   puts ("Where `auto' means use default language for files based on file\n\
 599 name suffix, and `none' means only do regexp processing on files.\n\
 600 If no language is specified and no matching suffix is found,\n\
 601 the first line of the file is read for a sharp-bang (#!) sequence\n\
 602 followed by the name of an interpreter.  If no such sequence is found,\n\
 603 Fortran is tried first; if no tags are found, C is tried next.\n\
 604 Compressed files are supported using gzip and bzip2.");
 605 }
 606
 607 #ifndef EMACS_NAME
 608 # define EMACS_NAME "GNU Emacs"
 609 #endif
 610 #ifndef VERSION
 611 # define VERSION "21"
 612 #endif
 613 static void
 614 print_version ()
 615 {
 616   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 617   puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
 618   puts ("This program is distributed under the same terms as Emacs");
 619
 620   exit (GOOD);
 621 }
 622
 623 static void
 624 print_help ()
 625 {
 626   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 627 \n\
 628 These are the options accepted by %s.\n", progname, progname);
 629 #ifdef LONG_OPTIONS
 630   puts ("You may use unambiguous abbreviations for the long option names.");
 631 #else
 632   puts ("Long option names do not work with this executable, as it is not\n\
 633 linked with GNU getopt.");
 634 #endif /* LONG_OPTIONS */
 635   puts ("A - as file name means read names from stdin (one per line).");
 636   if (!CTAGS)
 637     printf ("  Absolute names are stored in the output file as they are.\n\
 638 Relative ones are stored relative to the output file's directory.");
 639   puts ("\n");
 640
 641   puts ("-a, --append\n\
 642         Append tag entries to existing tags file.");
 643
 644   puts ("--packages-only\n\
 645         For Ada files, only generate tags for packages .");
 646
 647   if (CTAGS)
 648     puts ("-B, --backward-search\n\
 649         Write the search commands for the tag entries using '?', the\n\
 650         backward-search command instead of '/', the forward-search command.");
 651
 652   puts ("-C, --c++\n\
 653         Treat files whose name suffix defaults to C language as C++ files.");
 654
 655   puts ("--declarations\n\
 656         In C and derived languages, create tags for function declarations,");
 657   if (CTAGS)
 658     puts ("\tand create tags for extern variables if --globals is used.");
 659   else
 660     puts
 661       ("\tand create tags for extern variables unless --no-globals is used.");
 662
 663   if (CTAGS)
 664     puts ("-d, --defines\n\
 665         Create tag entries for C #define constants and enum constants, too.");
 666   else
 667     puts ("-D, --no-defines\n\
 668         Don't create tag entries for C #define constants and enum constants.\n\
 669         This makes the tags file smaller.");
 670
 671   if (!CTAGS)
 672     {
 673       puts ("-i FILE, --include=FILE\n\
 674         Include a note in tag file indicating that, when searching for\n\
 675         a tag, one should also consult the tags file FILE after\n\
 676         checking the current file.");
 677       puts ("-l LANG, --language=LANG\n\
 678         Force the following files to be considered as written in the\n\
 679         named language up to the next --language=LANG option.");
 680     }
 681
 682   if (CTAGS)
 683     puts ("--globals\n\
 684         Create tag entries for global variables in some languages.");
 685   else
 686     puts ("--no-globals\n\
 687         Do not create tag entries for global variables in some\n\
 688         languages.  This makes the tags file smaller.");
 689   puts ("--members\n\
 690         Create tag entries for member variables in C and derived languages.");
 691
 692 #ifdef ETAGS_REGEXPS
 693   puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
 694         Make a tag for each line matching pattern REGEXP in the following\n\
 695         files.  {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
 696         regexfile is a file containing one REGEXP per line.\n\
 697         REGEXP is anchored (as if preceded by ^).\n\
 698         The form /REGEXP/NAME/ creates a named tag.\n\
 699         For example Tcl named tags can be created with:\n\
 700         --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
 701   puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
 702         Like -r, --regex but ignore case when matching expressions.");
 703   puts ("-R, --no-regex\n\
 704         Don't create tags from regexps for the following files.");
 705 #endif /* ETAGS_REGEXPS */
 706   puts ("-o FILE, --output=FILE\n\
 707         Write the tags to FILE.");
 708   puts ("-I, --ignore-indentation\n\
 709         Don't rely on indentation quite as much as normal.  Currently,\n\
 710         this means not to assume that a closing brace in the first\n\
 711         column is the final brace of a function or structure\n\
 712         definition in C and C++.");
 713
 714   if (CTAGS)
 715     {
 716       puts ("-t, --typedefs\n\
 717         Generate tag entries for C and Ada typedefs.");
 718       puts ("-T, --typedefs-and-c++\n\
 719         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 720         and C++ member functions.");
 721       puts ("-u, --update\n\
 722         Update the tag entries for the given files, leaving tag\n\
 723         entries for other files in place.  Currently, this is\n\
 724         implemented by deleting the existing entries for the given\n\
 725         files and then rewriting the new entries at the end of the\n\
 726         tags file.  It is often faster to simply rebuild the entire\n\
 727         tag file than to use this.");
 728       puts ("-v, --vgrind\n\
 729         Generates an index of items intended for human consumption,\n\
 730         similar to the output of vgrind.  The index is sorted, and\n\
 731         gives the page number of each item.");
 732       puts ("-w, --no-warn\n\
 733         Suppress warning messages about entries defined in multiple\n\
 734         files.");
 735       puts ("-x, --cxref\n\
 736         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 737         The output uses line numbers instead of page numbers, but\n\
 738         beyond that the differences are cosmetic; try both to see\n\
 739         which you like.");
 740     }
 741
 742   puts ("-V, --version\n\
 743         Print the version of the program.\n\
 744 -h, --help\n\
 745         Print this help message.");
 746
 747   print_language_names ();
 748
 749   puts ("");
 750   puts ("Report bugs to bug-gnu-emacs@gnu.org");
 751
 752   exit (GOOD);
 753 }
 754
 755 \f
 756 enum argument_type
 757 {
 758   at_language,
 759   at_regexp,
 760   at_filename,
 761   at_icregexp
 762 };
 763
 764 /* This structure helps us allow mixing of --lang and file names. */
 765 typedef struct
 766 {
 767   enum argument_type arg_type;
 768   char *what;
 769   language *lang;               /* language of the regexp */
 770 } argument;
 771
 772 #ifdef VMS                      /* VMS specific functions */
 773
 774 #define EOS     '\0'
 775
 776 /* This is a BUG!  ANY arbitrary limit is a BUG!
 777    Won't someone please fix this?  */
 778 #define MAX_FILE_SPEC_LEN       255
 779 typedef struct  {
 780   short   curlen;
 781   char    body[MAX_FILE_SPEC_LEN + 1];
 782 } vspec;
 783
 784 /*
 785  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
 786  returning in each successive call the next file name matching the input
 787  spec. The function expects that each in_spec passed
 788  to it will be processed to completion; in particular, up to and
 789  including the call following that in which the last matching name
 790  is returned, the function ignores the value of in_spec, and will
 791  only start processing a new spec with the following call.
 792  If an error occurs, on return out_spec contains the value
 793  of in_spec when the error occurred.
 794
 795  With each successive file name returned in out_spec, the
 796  function's return value is one. When there are no more matching
 797  names the function returns zero. If on the first call no file
 798  matches in_spec, or there is any other error, -1 is returned.
 799 */
 800
 801 #include        <rmsdef.h>
 802 #include        <descrip.h>
 803 #define         OUTSIZE MAX_FILE_SPEC_LEN
 804 static short
 805 fn_exp (out, in)
 806      vspec *out;
 807      char *in;
 808 {
 809   static long context = 0;
 810   static struct dsc$descriptor_s o;
 811   static struct dsc$descriptor_s i;
 812   static bool pass1 = TRUE;
 813   long status;
 814   short retval;
 815
 816   if (pass1)
 817     {
 818       pass1 = FALSE;
 819       o.dsc$a_pointer = (char *) out;
 820       o.dsc$w_length = (short)OUTSIZE;
 821       i.dsc$a_pointer = in;
 822       i.dsc$w_length = (short)strlen(in);
 823       i.dsc$b_dtype = DSC$K_DTYPE_T;
 824       i.dsc$b_class = DSC$K_CLASS_S;
 825       o.dsc$b_dtype = DSC$K_DTYPE_VT;
 826       o.dsc$b_class = DSC$K_CLASS_VS;
 827     }
 828   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
 829     {
 830       out->body[out->curlen] = EOS;
 831       return 1;
 832     }
 833   else if (status == RMS$_NMF)
 834     retval = 0;
 835   else
 836     {
 837       strcpy(out->body, in);
 838       retval = -1;
 839     }
 840   lib$find_file_end(&context);
 841   pass1 = TRUE;
 842   return retval;
 843 }
 844
 845 /*
 846   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
 847   name of each file specified by the provided arg expanding wildcards.
 848 */
 849 static char *
 850 gfnames (arg, p_error)
 851      char *arg;
 852      bool *p_error;
 853 {
 854   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
 855
 856   switch (fn_exp (&filename, arg))
 857     {
 858     case 1:
 859       *p_error = FALSE;
 860       return filename.body;
 861     case 0:
 862       *p_error = FALSE;
 863       return NULL;
 864     default:
 865       *p_error = TRUE;
 866       return filename.body;
 867     }
 868 }
 869
 870 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
 871 system (cmd)
 872      char *cmd;
 873 {
 874   error ("%s", "system() function not implemented under VMS");
 875 }
 876 #endif
 877
 878 #define VERSION_DELIM   ';'
 879 char *massage_name (s)
 880      char *s;
 881 {
 882   char *start = s;
 883
 884   for ( ; *s; s++)
 885     if (*s == VERSION_DELIM)
 886       {
 887         *s = EOS;
 888         break;
 889       }
 890     else
 891       *s = lowcase (*s);
 892   return start;
 893 }
 894 #endif /* VMS */
 895
 896 \f
 897 int
 898 main (argc, argv)
 899      int argc;
 900      char *argv[];
 901 {
 902   int i;
 903   unsigned int nincluded_files;
 904   char **included_files;
 905   char *this_file;
 906   argument *argbuffer;
 907   int current_arg, file_count;
 908   linebuffer filename_lb;
 909 #ifdef VMS
 910   bool got_err;
 911 #endif
 912
 913 #ifdef DOS_NT
 914   _fmode = O_BINARY;   /* all of files are treated as binary files */
 915 #endif /* DOS_NT */
 916
 917   progname = argv[0];
 918   nincluded_files = 0;
 919   included_files = xnew (argc, char *);
 920   current_arg = 0;
 921   file_count = 0;
 922
 923   /* Allocate enough no matter what happens.  Overkill, but each one
 924      is small. */
 925   argbuffer = xnew (argc, argument);
 926
 927 #ifdef ETAGS_REGEXPS
 928   /* Set syntax for regular expression routines. */
 929   re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
 930   /* Translation table for case-insensitive search. */
 931   for (i = 0; i < CHAR_SET_SIZE; i++)
 932     lc_trans[i] = lowcase (i);
 933 #endif /* ETAGS_REGEXPS */
 934
 935   /*
 936    * If etags, always find typedefs and structure tags.  Why not?
 937    * Also default is to find macro constants, enum constants and
 938    * global variables.
 939    */
 940   if (!CTAGS)
 941     {
 942       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
 943       globals = TRUE;
 944       members = FALSE;
 945     }
 946
 947   while (1)
 948     {
 949       int opt;
 950       char *optstring;
 951
 952 #ifdef ETAGS_REGEXPS
 953       optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
 954 #else
 955       optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
 956 #endif /* ETAGS_REGEXPS */
 957
 958 #ifndef LONG_OPTIONS
 959       optstring = optstring + 1;
 960 #endif /* LONG_OPTIONS */
 961
 962       opt = getopt_long (argc, argv, optstring, longopts, 0);
 963       if (opt == EOF)
 964         break;
 965
 966       switch (opt)
 967         {
 968         case 0:
 969           /* If getopt returns 0, then it has already processed a
 970              long-named option.  We should do nothing.  */
 971           break;
 972
 973         case 1:
 974           /* This means that a file name has been seen.  Record it. */
 975           argbuffer[current_arg].arg_type = at_filename;
 976           argbuffer[current_arg].what = optarg;
 977           ++current_arg;
 978           ++file_count;
 979           break;
 980
 981           /* Common options. */
 982         case 'a': append_to_tagfile = TRUE;     break;
 983         case 'C': cplusplus = TRUE;             break;
 984         case 'd': constantypedefs = TRUE;       break;
 985         case 'D': constantypedefs = FALSE;      break;
 986         case 'f':               /* for compatibility with old makefiles */
 987         case 'o':
 988           if (tagfile)
 989             {
 990               error ("-o option may only be given once.", (char *)NULL);
 991               suggest_asking_for_help ();
 992             }
 993           tagfile = optarg;
 994           break;
 995         case 'I':
 996         case 'S':               /* for backward compatibility */
 997           noindentypedefs = TRUE;
 998           break;
 999         case 'l':
1000           {
1001             language *lang = get_language_from_langname (optarg);
1002             if (lang != NULL)
1003               {
1004                 argbuffer[current_arg].lang = lang;
1005                 argbuffer[current_arg].arg_type = at_language;
1006                 ++current_arg;
1007               }
1008           }
1009           break;
1010 #ifdef ETAGS_REGEXPS
1011         case 'r':
1012           argbuffer[current_arg].arg_type = at_regexp;
1013           argbuffer[current_arg].what = optarg;
1014           ++current_arg;
1015           break;
1016         case 'R':
1017           argbuffer[current_arg].arg_type = at_regexp;
1018           argbuffer[current_arg].what = NULL;
1019           ++current_arg;
1020           break;
1021         case 'c':
1022           argbuffer[current_arg].arg_type = at_icregexp;
1023           argbuffer[current_arg].what = optarg;
1024           ++current_arg;
1025           break;
1026 #endif /* ETAGS_REGEXPS */
1027         case 'V':
1028           print_version ();
1029           break;
1030         case 'h':
1031         case 'H':
1032           print_help ();
1033           break;
1034         case 't':
1035           typedefs = TRUE;
1036           break;
1037         case 'T':
1038           typedefs = typedefs_or_cplusplus = TRUE;
1039           break;
1040 #if (!CTAGS)
1041           /* Etags options */
1042         case 'i':
1043           included_files[nincluded_files++] = optarg;
1044           break;
1045 #else /* CTAGS */
1046           /* Ctags options. */
1047         case 'B': searchar = '?';       break;
1048         case 'u': update = TRUE;        break;
1049         case 'v': vgrind_style = TRUE;  /*FALLTHRU*/
1050         case 'x': cxref_style = TRUE;   break;
1051         case 'w': no_warnings = TRUE;   break;
1052 #endif /* CTAGS */
1053         default:
1054           suggest_asking_for_help ();
1055         }
1056     }
1057
1058   for (; optind < argc; ++optind)
1059     {
1060       argbuffer[current_arg].arg_type = at_filename;
1061       argbuffer[current_arg].what = argv[optind];
1062       ++current_arg;
1063       ++file_count;
1064     }
1065
1066   if (nincluded_files == 0 && file_count == 0)
1067     {
1068       error ("no input files specified.", (char *)NULL);
1069       suggest_asking_for_help ();
1070     }
1071
1072   if (tagfile == NULL)
1073     tagfile = CTAGS ? "tags" : "TAGS";
1074   cwd = etags_getcwd ();        /* the current working directory */
1075   if (cwd[strlen (cwd) - 1] != '/')
1076     {
1077       char *oldcwd = cwd;
1078       cwd = concat (oldcwd, "/", "");
1079       free (oldcwd);
1080     }
1081   if (streq (tagfile, "-"))
1082     tagfiledir = cwd;
1083   else
1084     tagfiledir = absolute_dirname (tagfile, cwd);
1085
1086   init ();                      /* set up boolean "functions" */
1087
1088   initbuffer (&lb);
1089   initbuffer (&filename_lb);
1090
1091   if (!CTAGS)
1092     {
1093       if (streq (tagfile, "-"))
1094         {
1095           tagf = stdout;
1096 #ifdef DOS_NT
1097           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1098              doesn't take effect until after `stdout' is already open). */
1099           if (!isatty (fileno (stdout)))
1100             setmode (fileno (stdout), O_BINARY);
1101 #endif /* DOS_NT */
1102         }
1103       else
1104         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1105       if (tagf == NULL)
1106         pfatal (tagfile);
1107     }
1108
1109   /*
1110    * Loop through files finding functions.
1111    */
1112   for (i = 0; i < current_arg; ++i)
1113     {
1114       switch (argbuffer[i].arg_type)
1115         {
1116         case at_language:
1117           forced_lang = argbuffer[i].lang;
1118           break;
1119 #ifdef ETAGS_REGEXPS
1120         case at_regexp:
1121           analyse_regex (argbuffer[i].what, FALSE);
1122           break;
1123         case at_icregexp:
1124           analyse_regex (argbuffer[i].what, TRUE);
1125           break;
1126 #endif
1127         case at_filename:
1128 #ifdef VMS
1129           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1130             {
1131               if (got_err)
1132                 {
1133                   error ("can't find file %s\n", this_file);
1134                   argc--, argv++;
1135                 }
1136               else
1137                 {
1138                   this_file = massage_name (this_file);
1139                 }
1140 #else
1141               this_file = argbuffer[i].what;
1142 #endif
1143               /* Input file named "-" means read file names from stdin
1144                  (one per line) and use them. */
1145               if (streq (this_file, "-"))
1146                 while (readline_internal (&filename_lb, stdin) > 0)
1147                   process_file (filename_lb.buffer);
1148               else
1149                 process_file (this_file);
1150 #ifdef VMS
1151             }
1152 #endif
1153           break;
1154         }
1155     }
1156
1157 #ifdef ETAGS_REGEXPS
1158   free_patterns ();
1159 #endif /* ETAGS_REGEXPS */
1160
1161   if (!CTAGS)
1162     {
1163       while (nincluded_files-- > 0)
1164         fprintf (tagf, "\f\n%s,include\n", *included_files++);
1165
1166       fclose (tagf);
1167       exit (GOOD);
1168     }
1169
1170   /* If CTAGS, we are here.  process_file did not write the tags yet,
1171      because we want them ordered.  Let's do it now. */
1172   if (cxref_style)
1173     {
1174       put_entries (head);
1175       free_tree (head);
1176       head = NULL;
1177       exit (GOOD);
1178     }
1179
1180   if (update)
1181     {
1182       char cmd[BUFSIZ];
1183       for (i = 0; i < current_arg; ++i)
1184         {
1185           if (argbuffer[i].arg_type != at_filename)
1186             continue;
1187           sprintf (cmd,
1188                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1189                    tagfile, argbuffer[i].what, tagfile);
1190           if (system (cmd) != GOOD)
1191             fatal ("failed to execute shell command", (char *)NULL);
1192         }
1193       append_to_tagfile = TRUE;
1194     }
1195
1196   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1197   if (tagf == NULL)
1198     pfatal (tagfile);
1199   put_entries (head);
1200   free_tree (head);
1201   head = NULL;
1202   fclose (tagf);
1203
1204   if (update)
1205     {
1206       char cmd[BUFSIZ];
1207       sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1208       exit (system (cmd));
1209     }
1210   return GOOD;
1211 }
1212
1213
1214
1215 /*
1216  * Return a compressor given the file name.  If EXTPTR is non-zero,
1217  * return a pointer into FILE where the compressor-specific
1218  * extension begins.  If no compressor is found, NULL is returned
1219  * and EXTPTR is not significant.
1220  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1221  */
1222 static compressor *
1223 get_compressor_from_suffix (file, extptr)
1224      char *file;
1225      char **extptr;
1226 {
1227   compressor *compr;
1228   char *slash, *suffix;
1229
1230   /* This relies on FN to be after canonicalize_filename,
1231      so we don't need to consider backslashes on DOS_NT.  */
1232   slash = etags_strrchr (file, '/');
1233   suffix = etags_strrchr (file, '.');
1234   if (suffix == NULL || suffix < slash)
1235     return NULL;
1236   if (extptr != NULL)
1237     *extptr = suffix;
1238   suffix += 1;
1239   /* Let those poor souls who live with DOS 8+3 file name limits get
1240      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1241      Only the first do loop is run if not MSDOS */
1242   do
1243     {
1244       for (compr = compressors; compr->suffix != NULL; compr++)
1245         if (streq (compr->suffix, suffix))
1246           return compr;
1247       if (!MSDOS)
1248         break;                  /* do it only once: not really a loop */
1249       if (extptr != NULL)
1250         *extptr = ++suffix;
1251     } while (*suffix != '\0');
1252   return NULL;
1253 }
1254
1255
1256
1257 /*
1258  * Return a language given the name.
1259  */
1260 static language *
1261 get_language_from_langname (name)
1262      char *name;
1263 {
1264   language *lang;
1265
1266   if (name == NULL)
1267     error ("empty language name", (char *)NULL);
1268   else
1269     {
1270       for (lang = lang_names; lang->name != NULL; lang++)
1271         if (streq (name, lang->name))
1272           return lang;
1273       error ("unknown language \"%s\"", name);
1274     }
1275
1276   return NULL;
1277 }
1278
1279
1280 /*
1281  * Return a language given the interpreter name.
1282  */
1283 static language *
1284 get_language_from_interpreter (interpreter)
1285      char *interpreter;
1286 {
1287   language *lang;
1288   char **iname;
1289
1290   if (interpreter == NULL)
1291     return NULL;
1292   for (lang = lang_names; lang->name != NULL; lang++)
1293     if (lang->interpreters != NULL)
1294       for (iname = lang->interpreters; *iname != NULL; iname++)
1295         if (streq (*iname, interpreter))
1296             return lang;
1297
1298   return NULL;
1299 }
1300
1301
1302
1303 /*
1304  * Return a language given the file name.
1305  */
1306 static language *
1307 get_language_from_filename (file)
1308      char *file;
1309 {
1310   language *lang;
1311   char **name, **ext, *suffix;
1312
1313   /* Try whole file name first. */
1314   for (lang = lang_names; lang->name != NULL; lang++)
1315     if (lang->filenames != NULL)
1316       for (name = lang->filenames; *name != NULL; name++)
1317         if (streq (*name, file))
1318           return lang;
1319
1320   /* If not found, try suffix after last dot. */
1321   suffix = etags_strrchr (file, '.');
1322   if (suffix == NULL)
1323     return NULL;
1324   suffix += 1;
1325   for (lang = lang_names; lang->name != NULL; lang++)
1326     if (lang->suffixes != NULL)
1327       for (ext = lang->suffixes; *ext != NULL; ext++)
1328         if (streq (*ext, suffix))
1329           return lang;
1330   return NULL;
1331 }
1332
1333
1334
1335 /*
1336  * This routine is called on each file argument.
1337  */
1338 static void
1339 process_file (file)
1340      char *file;
1341 {
1342   struct stat stat_buf;
1343   FILE *inf;
1344   compressor *compr;
1345   char *compressed_name, *uncompressed_name;
1346   char *ext, *real_name;
1347
1348
1349   canonicalize_filename (file);
1350   if (streq (file, tagfile) && !streq (tagfile, "-"))
1351     {
1352       error ("skipping inclusion of %s in self.", file);
1353       return;
1354     }
1355   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1356     {
1357       compressed_name = NULL;
1358       real_name = uncompressed_name = savestr (file);
1359     }
1360   else
1361     {
1362       real_name = compressed_name = savestr (file);
1363       uncompressed_name = savenstr (file, ext - file);
1364     }
1365
1366   /* If the canonicalised uncompressed name has already be dealt with,
1367      skip it silently, else add it to the list. */
1368   {
1369     typedef struct processed_file
1370     {
1371       char *filename;
1372       struct processed_file *next;
1373     } processed_file;
1374     static processed_file *pf_head = NULL;
1375     register processed_file *fnp;
1376
1377     for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1378       if (streq (uncompressed_name, fnp->filename))
1379         goto exit;
1380     fnp = pf_head;
1381     pf_head = xnew (1, struct processed_file);
1382     pf_head->filename = savestr (uncompressed_name);
1383     pf_head->next = fnp;
1384   }
1385
1386   if (stat (real_name, &stat_buf) != 0)
1387     {
1388       /* Reset real_name and try with a different name. */
1389       real_name = NULL;
1390       if (compressed_name != NULL) /* try with the given suffix */
1391         {
1392           if (stat (uncompressed_name, &stat_buf) == 0)
1393             real_name = uncompressed_name;
1394         }
1395       else                      /* try all possible suffixes */
1396         {
1397           for (compr = compressors; compr->suffix != NULL; compr++)
1398             {
1399               compressed_name = concat (file, ".", compr->suffix);
1400               if (stat (compressed_name, &stat_buf) != 0)
1401                 {
1402                   if (MSDOS)
1403                     {
1404                       char *suf = compressed_name + strlen (file);
1405                       size_t suflen = strlen (compr->suffix) + 1;
1406                       for ( ; suf[1]; suf++, suflen--)
1407                         {
1408                           memmove (suf, suf + 1, suflen);
1409                           if (stat (compressed_name, &stat_buf) == 0)
1410                             {
1411                               real_name = compressed_name;
1412                               break;
1413                             }
1414                         }
1415                       if (real_name != NULL)
1416                         break;
1417                     } /* MSDOS */
1418                   free (compressed_name);
1419                   compressed_name = NULL;
1420                 }
1421               else
1422                 {
1423                   real_name = compressed_name;
1424                   break;
1425                 }
1426             }
1427         }
1428       if (real_name == NULL)
1429         {
1430           perror (file);
1431           goto exit;
1432         }
1433     } /* try with a different name */
1434
1435   if (!S_ISREG (stat_buf.st_mode))
1436     {
1437       error ("skipping %s: it is not a regular file.", real_name);
1438       goto exit;
1439     }
1440   if (real_name == compressed_name)
1441     {
1442       char *cmd = concat (compr->command, " ", real_name);
1443       inf = popen (cmd, "r");
1444       free (cmd);
1445     }
1446   else
1447     inf = fopen (real_name, "r");
1448   if (inf == NULL)
1449     {
1450       perror (real_name);
1451       goto exit;
1452     }
1453
1454   find_entries (uncompressed_name, inf);
1455
1456   if (real_name == compressed_name)
1457     pclose (inf);
1458   else
1459     fclose (inf);
1460
1461   if (!CTAGS)
1462     {
1463       char *filename;
1464
1465       if (filename_is_absolute (uncompressed_name))
1466         {
1467           /* file is an absolute file name.  Canonicalise it. */
1468           filename = absolute_filename (uncompressed_name, cwd);
1469         }
1470       else
1471         {
1472           /* file is a file name relative to cwd.  Make it relative
1473              to the directory of the tags file. */
1474           filename = relative_filename (uncompressed_name, tagfiledir);
1475         }
1476       fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1477       free (filename);
1478       put_entries (head);
1479       free_tree (head);
1480       head = NULL;
1481     }
1482
1483  exit:
1484   if (compressed_name) free(compressed_name);
1485   if (uncompressed_name) free(uncompressed_name);
1486   return;
1487 }
1488
1489 /*
1490  * This routine sets up the boolean pseudo-functions which work
1491  * by setting boolean flags dependent upon the corresponding character.
1492  * Every char which is NOT in that string is not a white char.  Therefore,
1493  * all of the array "_wht" is set to FALSE, and then the elements
1494  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1495  * of a char is TRUE if it is the string "white", else FALSE.
1496  */
1497 static void
1498 init ()
1499 {
1500   register char *sp;
1501   register int i;
1502
1503   for (i = 0; i < CHARS; i++)
1504     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1505   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1506   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1507   notinname('\0') = notinname('\n');
1508   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1509   begtoken('\0') = begtoken('\n');
1510   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1511   intoken('\0') = intoken('\n');
1512   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1513   endtoken('\0') = endtoken('\n');
1514 }
1515
1516 /*
1517  * This routine opens the specified file and calls the function
1518  * which finds the function and type definitions.
1519  */
1520 node *last_node = NULL;
1521
1522 static void
1523 find_entries (file, inf)
1524      char *file;
1525      FILE *inf;
1526 {
1527   char *cp;
1528   language *lang;
1529   node *old_last_node;
1530
1531   /* Memory leakage here: the string pointed by curfile is
1532      never released, because curfile is copied into np->file
1533      for each node, to be used in CTAGS mode.  The amount of
1534      memory leaked here is the sum of the lengths of the
1535      file names. */
1536   curfile = savestr (file);
1537
1538   /* If user specified a language, use it. */
1539   lang = forced_lang;
1540   if (lang != NULL && lang->function != NULL)
1541     {
1542       curlang = lang;
1543       lang->function (inf);
1544       return;
1545     }
1546
1547   /* Try to guess the language given the file name. */
1548   lang = get_language_from_filename (file);
1549   if (lang != NULL && lang->function != NULL)
1550     {
1551       curlang = lang;
1552       lang->function (inf);
1553       return;
1554     }
1555
1556   /* Look for sharp-bang as the first two characters. */
1557   if (readline_internal (&lb, inf) > 0
1558       && lb.len >= 2
1559       && lb.buffer[0] == '#'
1560       && lb.buffer[1] == '!')
1561     {
1562       char *lp;
1563
1564       /* Set lp to point at the first char after the last slash in the
1565          line or, if no slashes, at the first nonblank.  Then set cp to
1566          the first successive blank and terminate the string. */
1567       lp = etags_strrchr (lb.buffer+2, '/');
1568       if (lp != NULL)
1569         lp += 1;
1570       else
1571         lp = skip_spaces (lb.buffer + 2);
1572       cp = skip_non_spaces (lp);
1573       *cp = '\0';
1574
1575       if (strlen (lp) > 0)
1576         {
1577           lang = get_language_from_interpreter (lp);
1578           if (lang != NULL && lang->function != NULL)
1579             {
1580               curlang = lang;
1581               lang->function (inf);
1582               return;
1583             }
1584         }
1585     }
1586   /* We rewind here, even if inf may be a pipe.  We fail if the
1587      length of the first line is longer than the pipe block size,
1588      which is unlikely. */
1589   rewind (inf);
1590
1591   /* Try Fortran. */
1592   old_last_node = last_node;
1593   curlang = get_language_from_langname ("fortran");
1594   Fortran_functions (inf);
1595
1596   /* No Fortran entries found.  Try C. */
1597   if (old_last_node == last_node)
1598     {
1599       /* We do not tag if rewind fails.
1600          Only the file name will be recorded in the tags file. */
1601       rewind (inf);
1602       curlang = get_language_from_langname (cplusplus ? "c++" : "c");
1603       default_C_entries (inf);
1604     }
1605   return;
1606 }
1607
1608 \f
1609 /* Record a tag. */
1610 static void
1611 pfnote (name, is_func, linestart, linelen, lno, cno)
1612      char *name;                /* tag name, or NULL if unnamed */
1613      bool is_func;              /* tag is a function */
1614      char *linestart;           /* start of the line where tag is */
1615      int linelen;               /* length of the line where tag is */
1616      int lno;                   /* line number */
1617      long cno;                  /* character number */
1618 {
1619   register node *np;
1620
1621   if (CTAGS && name == NULL)
1622     return;
1623
1624   np = xnew (1, node);
1625
1626   /* If ctags mode, change name "main" to M<thisfilename>. */
1627   if (CTAGS && !cxref_style && streq (name, "main"))
1628     {
1629       register char *fp = etags_strrchr (curfile, '/');
1630       np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1631       fp = etags_strrchr (np->name, '.');
1632       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1633         fp[0] = '\0';
1634     }
1635   else
1636     np->name = name;
1637   np->been_warned = FALSE;
1638   np->file = curfile;
1639   np->is_func = is_func;
1640   np->lno = lno;
1641   /* Our char numbers are 0-base, because of C language tradition?
1642      ctags compatibility?  old versions compatibility?   I don't know.
1643      Anyway, since emacs's are 1-base we expect etags.el to take care
1644      of the difference.  If we wanted to have 1-based numbers, we would
1645      uncomment the +1 below. */
1646   np->cno = cno /* + 1 */ ;
1647   np->left = np->right = NULL;
1648   if (CTAGS && !cxref_style)
1649     {
1650       if (strlen (linestart) < 50)
1651         np->pat = concat (linestart, "$", "");
1652       else
1653         np->pat = savenstr (linestart, 50);
1654     }
1655   else
1656     np->pat = savenstr (linestart, linelen);
1657
1658   add_node (np, &head);
1659 }
1660
1661 /*
1662  * TAGS format specification
1663  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1664  *
1665  * pfnote should emit the optimized form [unnamed tag] only if:
1666  *  1. name does not contain any of the characters " \t\r\n(),;";
1667  *  2. linestart contains name as either a rightmost, or rightmost but
1668  *     one character, substring;
1669  *  3. the character, if any, immediately before name in linestart must
1670  *     be one of the characters " \t(),;";
1671  *  4. the character, if any, immediately after name in linestart must
1672  *     also be one of the characters " \t(),;".
1673  *
1674  * The real implementation uses the notinname() macro, which recognises
1675  * characters slightly different form " \t\r\n(),;".  See the variable
1676  * `nonam'.
1677  */
1678 #define traditional_tag_style TRUE
1679 static void
1680 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1681      char *name;                /* tag name, or NULL if unnamed */
1682      int namelen;               /* tag length */
1683      bool is_func;              /* tag is a function */
1684      char *linestart;           /* start of the line where tag is */
1685      int linelen;               /* length of the line where tag is */
1686      int lno;                   /* line number */
1687      long cno;                  /* character number */
1688 {
1689   register char *cp;
1690   bool named;
1691
1692   named = TRUE;
1693   if (!CTAGS)
1694     {
1695       for (cp = name; !notinname (*cp); cp++)
1696         continue;
1697       if (*cp == '\0')                          /* rule #1 */
1698         {
1699           cp = linestart + linelen - namelen;
1700           if (notinname (linestart[linelen-1]))
1701             cp -= 1;                            /* rule #4 */
1702           if (cp >= linestart                   /* rule #2 */
1703               && (cp == linestart
1704                   || notinname (cp[-1]))        /* rule #3 */
1705               && strneq (name, cp, namelen))    /* rule #2 */
1706             named = FALSE;      /* use unnamed tag */
1707         }
1708     }
1709
1710   if (named)
1711     name = savenstr (name, namelen);
1712   else
1713     name = NULL;
1714   pfnote (name, is_func, linestart, linelen, lno, cno);
1715 }
1716
1717 /*
1718  * free_tree ()
1719  *      recurse on left children, iterate on right children.
1720  */
1721 static void
1722 free_tree (np)
1723      register node *np;
1724 {
1725   while (np)
1726     {
1727       register node *node_right = np->right;
1728       free_tree (np->left);
1729       if (np->name != NULL)
1730         free (np->name);
1731       free (np->pat);
1732       free (np);
1733       np = node_right;
1734     }
1735 }
1736
1737 /*
1738  * add_node ()
1739  *      Adds a node to the tree of nodes.  In etags mode, we don't keep
1740  *      it sorted; we just keep a linear list.  In ctags mode, maintain
1741  *      an ordered tree, with no attempt at balancing.
1742  *
1743  *      add_node is the only function allowed to add nodes, so it can
1744  *      maintain state.
1745  */
1746 static void
1747 add_node (np, cur_node_p)
1748      node *np, **cur_node_p;
1749 {
1750   register int dif;
1751   register node *cur_node = *cur_node_p;
1752
1753   if (cur_node == NULL)
1754     {
1755       *cur_node_p = np;
1756       last_node = np;
1757       return;
1758     }
1759
1760   if (!CTAGS)
1761     {
1762       /* Etags Mode */
1763       if (last_node == NULL)
1764         fatal ("internal error in add_node", (char *)NULL);
1765       last_node->right = np;
1766       last_node = np;
1767     }
1768   else
1769     {
1770       /* Ctags Mode */
1771       dif = strcmp (np->name, cur_node->name);
1772
1773       /*
1774        * If this tag name matches an existing one, then
1775        * do not add the node, but maybe print a warning.
1776        */
1777       if (!dif)
1778         {
1779           if (streq (np->file, cur_node->file))
1780             {
1781               if (!no_warnings)
1782                 {
1783                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1784                            np->file, lineno, np->name);
1785                   fprintf (stderr, "Second entry ignored\n");
1786                 }
1787             }
1788           else if (!cur_node->been_warned && !no_warnings)
1789             {
1790               fprintf
1791                 (stderr,
1792                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
1793                  np->file, cur_node->file, np->name);
1794               cur_node->been_warned = TRUE;
1795             }
1796           return;
1797         }
1798
1799       /* Actually add the node */
1800       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1801     }
1802 }
1803
1804 \f
1805 static void
1806 put_entries (np)
1807      register node *np;
1808 {
1809   register char *sp;
1810
1811   if (np == NULL)
1812     return;
1813
1814   /* Output subentries that precede this one */
1815   put_entries (np->left);
1816
1817   /* Output this entry */
1818
1819   if (!CTAGS)
1820     {
1821       if (np->name != NULL)
1822         fprintf (tagf, "%s\177%s\001%d,%ld\n",
1823                  np->pat, np->name, np->lno, np->cno);
1824       else
1825         fprintf (tagf, "%s\177%d,%ld\n",
1826                  np->pat, np->lno, np->cno);
1827     }
1828   else
1829     {
1830       if (np->name == NULL)
1831         error ("internal error: NULL name in ctags mode.", (char *)NULL);
1832
1833       if (cxref_style)
1834         {
1835           if (vgrind_style)
1836             fprintf (stdout, "%s %s %d\n",
1837                      np->name, np->file, (np->lno + 63) / 64);
1838           else
1839             fprintf (stdout, "%-16s %3d %-16s %s\n",
1840                      np->name, np->lno, np->file, np->pat);
1841         }
1842       else
1843         {
1844           fprintf (tagf, "%s\t%s\t", np->name, np->file);
1845
1846           if (np->is_func)
1847             {                   /* a function */
1848               putc (searchar, tagf);
1849               putc ('^', tagf);
1850
1851               for (sp = np->pat; *sp; sp++)
1852                 {
1853                   if (*sp == '\\' || *sp == searchar)
1854                     putc ('\\', tagf);
1855                   putc (*sp, tagf);
1856                 }
1857               putc (searchar, tagf);
1858             }
1859           else
1860             {                   /* a typedef; text pattern inadequate */
1861               fprintf (tagf, "%d", np->lno);
1862             }
1863           putc ('\n', tagf);
1864         }
1865     }
1866
1867   /* Output subentries that follow this one */
1868   put_entries (np->right);
1869 }
1870
1871 /* Length of a number's decimal representation. */
1872 static int
1873 number_len (num)
1874      long num;
1875 {
1876   int len = 1;
1877   while ((num /= 10) > 0)
1878     len += 1;
1879   return len;
1880 }
1881
1882 /*
1883  * Return total number of characters that put_entries will output for
1884  * the nodes in the subtree of the specified node.  Works only if
1885  * we are not ctags, but called only in that case.  This count
1886  * is irrelevant with the new tags.el, but is still supplied for
1887  * backward compatibility.
1888  */
1889 static int
1890 total_size_of_entries (np)
1891      register node *np;
1892 {
1893   register int total;
1894
1895   if (np == NULL)
1896     return 0;
1897
1898   for (total = 0; np != NULL; np = np->right)
1899     {
1900       /* Count left subentries. */
1901       total += total_size_of_entries (np->left);
1902
1903       /* Count this entry */
1904       total += strlen (np->pat) + 1;
1905       total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1906       if (np->name != NULL)
1907         total += 1 + strlen (np->name); /* \001name */
1908     }
1909
1910   return total;
1911 }
1912
1913 \f
1914 /*
1915  * The C symbol tables.
1916  */
1917 enum sym_type
1918 {
1919   st_none,
1920   st_C_objprot, st_C_objimpl, st_C_objend,
1921   st_C_gnumacro,
1922   st_C_ignore,
1923   st_C_javastruct,
1924   st_C_operator,
1925   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1926 };
1927
1928 static unsigned int hash P_((const char *, unsigned int));
1929 static struct C_stab_entry * in_word_set P_((const char *, unsigned int));
1930 static enum sym_type C_symtype P_((char *, int, int));
1931
1932 /* Feed stuff between (but not including) %[ and %] lines to:
1933       gperf -c -k 1,3 -o -p -r -t
1934 %[
1935 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1936 %%
1937 if,             0,      st_C_ignore
1938 for,            0,      st_C_ignore
1939 while,          0,      st_C_ignore
1940 switch,         0,      st_C_ignore
1941 return,         0,      st_C_ignore
1942 @interface,     0,      st_C_objprot
1943 @protocol,      0,      st_C_objprot
1944 @implementation,0,      st_C_objimpl
1945 @end,           0,      st_C_objend
1946 import,         C_JAVA, st_C_ignore
1947 package,        C_JAVA, st_C_ignore
1948 friend,         C_PLPL, st_C_ignore
1949 extends,        C_JAVA, st_C_javastruct
1950 implements,     C_JAVA, st_C_javastruct
1951 interface,      C_JAVA, st_C_struct
1952 class,          C_PLPL, st_C_struct
1953 namespace,      C_PLPL, st_C_struct
1954 domain,         C_STAR, st_C_struct
1955 union,          0,      st_C_struct
1956 struct,         0,      st_C_struct
1957 extern,         0,      st_C_extern
1958 enum,           0,      st_C_enum
1959 typedef,        0,      st_C_typedef
1960 define,         0,      st_C_define
1961 operator,       C_PLPL, st_C_operator
1962 bool,           C_PLPL, st_C_typespec
1963 long,           0,      st_C_typespec
1964 short,          0,      st_C_typespec
1965 int,            0,      st_C_typespec
1966 char,           0,      st_C_typespec
1967 float,          0,      st_C_typespec
1968 double,         0,      st_C_typespec
1969 signed,         0,      st_C_typespec
1970 unsigned,       0,      st_C_typespec
1971 auto,           0,      st_C_typespec
1972 void,           0,      st_C_typespec
1973 static,         0,      st_C_typespec
1974 const,          0,      st_C_typespec
1975 volatile,       0,      st_C_typespec
1976 explicit,       C_PLPL, st_C_typespec
1977 mutable,        C_PLPL, st_C_typespec
1978 typename,       C_PLPL, st_C_typespec
1979 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
1980 DEFUN,          0,      st_C_gnumacro
1981 SYSCALL,        0,      st_C_gnumacro
1982 ENTRY,          0,      st_C_gnumacro
1983 PSEUDO,         0,      st_C_gnumacro
1984 # These are defined inside C functions, so currently they are not met.
1985 # EXFUN used in glibc, DEFVAR_* in emacs.
1986 #EXFUN,         0,      st_C_gnumacro
1987 #DEFVAR_,       0,      st_C_gnumacro
1988 %]
1989 and replace lines between %< and %> with its output. */
1990 /*%<*/
1991 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
1992 /* Command-line: gperf -c -k 1,3 -o -p -r -t  */
1993 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
1994
1995 #define TOTAL_KEYWORDS 46
1996 #define MIN_WORD_LENGTH 2
1997 #define MAX_WORD_LENGTH 15
1998 #define MIN_HASH_VALUE 13
1999 #define MAX_HASH_VALUE 123
2000 /* maximum key range = 111, duplicates = 0 */
2001
2002 #ifdef __GNUC__
2003 __inline
2004 #endif
2005 static unsigned int
2006 hash (str, len)
2007      register const char *str;
2008      register unsigned int len;
2009 {
2010   static unsigned char asso_values[] =
2011     {
2012       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2013       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2014       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2015       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2016       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2017       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2018       124, 124, 124, 124,   3, 124, 124, 124,  43,   6,
2019        11, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2020        11, 124, 124,  58,   7, 124, 124, 124, 124, 124,
2021       124, 124, 124, 124, 124, 124, 124,  57,   7,  42,
2022         4,  14,  52,   0, 124,  53, 124, 124,  29,  11,
2023         6,  35,  32, 124,  29,  34,  59,  58,  51,  24,
2024       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2025       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2026       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2027       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2028       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2029       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2030       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2031       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2032       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2033       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2034       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2035       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2036       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
2037       124, 124, 124, 124, 124, 124
2038     };
2039   register int hval = len;
2040
2041   switch (hval)
2042     {
2043       default:
2044       case 3:
2045         hval += asso_values[(unsigned char)str[2]];
2046       case 2:
2047       case 1:
2048         hval += asso_values[(unsigned char)str[0]];
2049         break;
2050     }
2051   return hval;
2052 }
2053
2054 #ifdef __GNUC__
2055 __inline
2056 #endif
2057 static struct C_stab_entry *
2058 in_word_set (str, len)
2059      register const char *str;
2060      register unsigned int len;
2061 {
2062   static struct C_stab_entry wordlist[] =
2063     {
2064       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2065       {""}, {""}, {""}, {""},
2066       {"@end",          0,      st_C_objend},
2067       {""}, {""}, {""}, {""},
2068       {"ENTRY",         0,      st_C_gnumacro},
2069       {"@interface",    0,      st_C_objprot},
2070       {""},
2071       {"domain",        C_STAR, st_C_struct},
2072       {""},
2073       {"PSEUDO",                0,      st_C_gnumacro},
2074       {""}, {""},
2075       {"namespace",     C_PLPL, st_C_struct},
2076       {""}, {""},
2077       {"@implementation",0,     st_C_objimpl},
2078       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2079       {"long",          0,      st_C_typespec},
2080       {"signed",        0,      st_C_typespec},
2081       {"@protocol",     0,      st_C_objprot},
2082       {""}, {""}, {""}, {""},
2083       {"bool",          C_PLPL, st_C_typespec},
2084       {""}, {""}, {""}, {""}, {""}, {""},
2085       {"const",         0,      st_C_typespec},
2086       {"explicit",      C_PLPL, st_C_typespec},
2087       {"if",            0,      st_C_ignore},
2088       {""},
2089       {"operator",      C_PLPL, st_C_operator},
2090       {""},
2091       {"DEFUN",         0,      st_C_gnumacro},
2092       {""}, {""},
2093       {"define",        0,      st_C_define},
2094       {""}, {""}, {""}, {""}, {""},
2095       {"double",        0,      st_C_typespec},
2096       {"struct",        0,      st_C_struct},
2097       {""}, {""}, {""}, {""},
2098       {"short",         0,      st_C_typespec},
2099       {""},
2100       {"enum",          0,      st_C_enum},
2101       {"mutable",       C_PLPL, st_C_typespec},
2102       {""},
2103       {"extern",        0,      st_C_extern},
2104       {"extends",       C_JAVA, st_C_javastruct},
2105       {"package",       C_JAVA, st_C_ignore},
2106       {"while",         0,      st_C_ignore},
2107       {""},
2108       {"for",           0,      st_C_ignore},
2109       {""}, {""}, {""},
2110       {"volatile",      0,      st_C_typespec},
2111       {""}, {""},
2112       {"import",                C_JAVA, st_C_ignore},
2113       {"float",         0,      st_C_typespec},
2114       {"switch",                0,      st_C_ignore},
2115       {"return",                0,      st_C_ignore},
2116       {"implements",    C_JAVA, st_C_javastruct},
2117       {""},
2118       {"static",        0,      st_C_typespec},
2119       {"typedef",       0,      st_C_typedef},
2120       {"typename",      C_PLPL, st_C_typespec},
2121       {"unsigned",      0,      st_C_typespec},
2122       {""}, {""},
2123       {"char",          0,      st_C_typespec},
2124       {"class",         C_PLPL, st_C_struct},
2125       {""}, {""}, {""},
2126       {"void",          0,      st_C_typespec},
2127       {""}, {""},
2128       {"friend",                C_PLPL, st_C_ignore},
2129       {""}, {""}, {""},
2130       {"int",           0,      st_C_typespec},
2131       {"union",         0,      st_C_struct},
2132       {""}, {""}, {""},
2133       {"auto",          0,      st_C_typespec},
2134       {"interface",     C_JAVA, st_C_struct},
2135       {""},
2136       {"SYSCALL",       0,      st_C_gnumacro}
2137     };
2138
2139   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2140     {
2141       register int key = hash (str, len);
2142
2143       if (key <= MAX_HASH_VALUE && key >= 0)
2144         {
2145           register const char *s = wordlist[key].name;
2146
2147           if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2148             return &wordlist[key];
2149         }
2150     }
2151   return 0;
2152 }
2153 /*%>*/
2154
2155 static enum sym_type
2156 C_symtype (str, len, c_ext)
2157      char *str;
2158      int len;
2159      int c_ext;
2160 {
2161   register struct C_stab_entry *se = in_word_set (str, len);
2162
2163   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2164     return st_none;
2165   return se->type;
2166 }
2167
2168 \f
2169 /*
2170  * C functions and variables are recognized using a simple
2171  * finite automaton.  fvdef is its state variable.
2172  */
2173 enum
2174 {
2175   fvnone,                       /* nothing seen */
2176   fdefunkey,                    /* Emacs DEFUN keyword seen */
2177   fdefunname,                   /* Emacs DEFUN name seen */
2178   foperator,                    /* func: operator keyword seen (cplpl) */
2179   fvnameseen,                   /* function or variable name seen */
2180   fstartlist,                   /* func: just after open parenthesis */
2181   finlist,                      /* func: in parameter list */
2182   flistseen,                    /* func: after parameter list */
2183   fignore,                      /* func: before open brace */
2184   vignore                       /* var-like: ignore until ';' */
2185 } fvdef;
2186
2187 bool fvextern;                  /* func or var: extern keyword seen; */
2188
2189 /*
2190  * typedefs are recognized using a simple finite automaton.
2191  * typdef is its state variable.
2192  */
2193 enum
2194 {
2195   tnone,                        /* nothing seen */
2196   tkeyseen,                     /* typedef keyword seen */
2197   ttypeseen,                    /* defined type seen */
2198   tinbody,                      /* inside typedef body */
2199   tend,                         /* just before typedef tag */
2200   tignore                       /* junk after typedef tag */
2201 } typdef;
2202
2203 /*
2204  * struct-like structures (enum, struct and union) are recognized
2205  * using another simple finite automaton.  `structdef' is its state
2206  * variable.
2207  */
2208 enum
2209 {
2210   snone,                        /* nothing seen yet */
2211   skeyseen,                     /* struct-like keyword seen */
2212   stagseen,                     /* struct-like tag seen */
2213   scolonseen,                   /* colon seen after struct-like tag */
2214   sinbody                       /* in struct body: recognize member func defs*/
2215 } structdef;
2216
2217 /*
2218  * When structdef is stagseen, scolonseen, or sinbody, structtype is the
2219  * type of the preceding struct-like keyword.
2220  */
2221 char *structtag = "<uninited>";
2222
2223 /*
2224  * When objdef is different from onone, objtag is the name of the class.
2225  */
2226 char *objtag = "<uninited>";
2227
2228 /*
2229  * Yet another little state machine to deal with preprocessor lines.
2230  */
2231 enum
2232 {
2233   dnone,                        /* nothing seen */
2234   dsharpseen,                   /* '#' seen as first char on line */
2235   ddefineseen,                  /* '#' and 'define' seen */
2236   dignorerest                   /* ignore rest of line */
2237 } definedef;
2238
2239 /*
2240  * State machine for Objective C protocols and implementations.
2241  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2242  */
2243 enum
2244 {
2245   onone,                        /* nothing seen */
2246   oprotocol,                    /* @interface or @protocol seen */
2247   oimplementation,              /* @implementations seen */
2248   otagseen,                     /* class name seen */
2249   oparenseen,                   /* parenthesis before category seen */
2250   ocatseen,                     /* category name seen */
2251   oinbody,                      /* in @implementation body */
2252   omethodsign,                  /* in @implementation body, after +/- */
2253   omethodtag,                   /* after method name */
2254   omethodcolon,                 /* after method colon */
2255   omethodparm,                  /* after method parameter */
2256   oignore                       /* wait for @end */
2257 } objdef;
2258
2259
2260 /*
2261  * Use this structure to keep info about the token read, and how it
2262  * should be tagged.  Used by the make_C_tag function to build a tag.
2263  */
2264 struct tok
2265 {
2266   bool valid;
2267   bool named;
2268   int linelen;
2269   int lineno;
2270   long linepos;
2271   char *line;
2272 } token;                        /* latest token read */
2273 linebuffer token_name;          /* its name */
2274
2275 static bool consider_token P_((char *, int, int, int, int, int, bool *));
2276 static void make_C_tag P_((bool));
2277
2278 /*
2279  * consider_token ()
2280  *      checks to see if the current token is at the start of a
2281  *      function or variable, or corresponds to a typedef, or
2282  *      is a struct/union/enum tag, or #define, or an enum constant.
2283  *
2284  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2285  *      with args.  C_EXT is which language we are looking at.
2286  *
2287  * Globals
2288  *      fvdef                   IN OUT
2289  *      structdef               IN OUT
2290  *      definedef               IN OUT
2291  *      typdef                  IN OUT
2292  *      objdef                  IN OUT
2293  */
2294
2295 static bool
2296 consider_token (str, len, c, c_ext, cblev, parlev, is_func_or_var)
2297      register char *str;        /* IN: token pointer */
2298      register int len;          /* IN: token length */
2299      register int c;            /* IN: first char after the token */
2300      int c_ext;                 /* IN: C extensions mask */
2301      int cblev;                 /* IN: curly brace level */
2302      int parlev;                /* IN: parenthesis level */
2303      bool *is_func_or_var;      /* OUT: function or variable found */
2304 {
2305   /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2306      structtype is the type of the preceding struct-like keyword. */
2307   static enum sym_type structtype;
2308   static enum sym_type toktype;
2309
2310
2311   toktype = C_symtype (str, len, c_ext);
2312
2313   /*
2314    * Advance the definedef state machine.
2315    */
2316   switch (definedef)
2317     {
2318     case dnone:
2319       /* We're not on a preprocessor line. */
2320       if (toktype == st_C_gnumacro)
2321         {
2322           fvdef = fdefunkey;
2323           return FALSE;
2324         }
2325       break;
2326     case dsharpseen:
2327       if (toktype == st_C_define)
2328         {
2329           definedef = ddefineseen;
2330         }
2331       else
2332         {
2333           definedef = dignorerest;
2334         }
2335       return FALSE;
2336     case ddefineseen:
2337       /*
2338        * Make a tag for any macro, unless it is a constant
2339        * and constantypedefs is FALSE.
2340        */
2341       definedef = dignorerest;
2342       *is_func_or_var = (c == '(');
2343       if (!*is_func_or_var && !constantypedefs)
2344         return FALSE;
2345       else
2346         return TRUE;
2347     case dignorerest:
2348       return FALSE;
2349     default:
2350       error ("internal error: definedef value.", (char *)NULL);
2351     }
2352
2353   /*
2354    * Now typedefs
2355    */
2356   switch (typdef)
2357     {
2358     case tnone:
2359       if (toktype == st_C_typedef)
2360         {
2361           if (typedefs)
2362             typdef = tkeyseen;
2363           fvextern = FALSE;
2364           fvdef = fvnone;
2365           return FALSE;
2366         }
2367       break;
2368     case tkeyseen:
2369       switch (toktype)
2370         {
2371         case st_none:
2372         case st_C_typespec:
2373         case st_C_struct:
2374         case st_C_enum:
2375           typdef = ttypeseen;
2376           break;
2377         }
2378       /* Do not return here, so the structdef stuff has a chance. */
2379       break;
2380     case tend:
2381       switch (toktype)
2382         {
2383         case st_C_typespec:
2384         case st_C_struct:
2385         case st_C_enum:
2386           return FALSE;
2387         }
2388       return TRUE;
2389     }
2390
2391   /*
2392    * This structdef business is currently only invoked when cblev==0.
2393    * It should be recursively invoked whatever the curly brace level,
2394    * and a stack of states kept, to allow for definitions of structs
2395    * within structs.
2396    *
2397    * This structdef business is NOT invoked when we are ctags and the
2398    * file is plain C.  This is because a struct tag may have the same
2399    * name as another tag, and this loses with ctags.
2400    */
2401   switch (toktype)
2402     {
2403     case st_C_javastruct:
2404       if (structdef == stagseen)
2405         structdef = scolonseen;
2406       return FALSE;
2407     case st_C_struct:
2408     case st_C_enum:
2409       if (typdef == tkeyseen
2410           || (typedefs_or_cplusplus && cblev == 0 && structdef == snone))
2411         {
2412           structdef = skeyseen;
2413           structtype = toktype;
2414         }
2415       return FALSE;
2416     }
2417
2418   if (structdef == skeyseen)
2419     {
2420       /* Save the tag for struct/union/class, for functions and variables
2421          that may be defined inside. */
2422       if (structtype == st_C_struct)
2423         structtag = savenstr (str, len);
2424       else
2425         structtag = "<enum>";
2426       structdef = stagseen;
2427       return TRUE;
2428     }
2429
2430   if (typdef != tnone)
2431     definedef = dnone;
2432
2433   /* Detect Objective C constructs. */
2434   switch (objdef)
2435     {
2436     case onone:
2437       switch (toktype)
2438         {
2439         case st_C_objprot:
2440           objdef = oprotocol;
2441           return FALSE;
2442         case st_C_objimpl:
2443           objdef = oimplementation;
2444           return FALSE;
2445         }
2446       break;
2447     case oimplementation:
2448       /* Save the class tag for functions or variables defined inside. */
2449       objtag = savenstr (str, len);
2450       objdef = oinbody;
2451       return FALSE;
2452     case oprotocol:
2453       /* Save the class tag for categories. */
2454       objtag = savenstr (str, len);
2455       objdef = otagseen;
2456       *is_func_or_var = TRUE;
2457       return TRUE;
2458     case oparenseen:
2459       objdef = ocatseen;
2460       *is_func_or_var = TRUE;
2461       return TRUE;
2462     case oinbody:
2463       break;
2464     case omethodsign:
2465       if (parlev == 0)
2466         {
2467           objdef = omethodtag;
2468           linebuffer_setlen (&token_name, len);
2469           strncpy (token_name.buffer, str, len);
2470           token_name.buffer[len] = '\0';
2471           return TRUE;
2472         }
2473       return FALSE;
2474     case omethodcolon:
2475       if (parlev == 0)
2476         objdef = omethodparm;
2477       return FALSE;
2478     case omethodparm:
2479       if (parlev == 0)
2480         {
2481           objdef = omethodtag;
2482           linebuffer_setlen (&token_name, token_name.len + len);
2483           strncat (token_name.buffer, str, len);
2484           return TRUE;
2485         }
2486       return FALSE;
2487     case oignore:
2488       if (toktype == st_C_objend)
2489         {
2490           /* Memory leakage here: the string pointed by objtag is
2491              never released, because many tests would be needed to
2492              avoid breaking on incorrect input code.  The amount of
2493              memory leaked here is the sum of the lengths of the
2494              class tags.
2495           free (objtag); */
2496           objdef = onone;
2497         }
2498       return FALSE;
2499     }
2500
2501   /* A function, variable or enum constant? */
2502   switch (toktype)
2503     {
2504     case st_C_extern:
2505       fvextern = TRUE;
2506       /* FALLTHRU */
2507     case st_C_typespec:
2508       if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2509         fvdef = fvnone;         /* should be useless */
2510       return FALSE;
2511     case st_C_ignore:
2512       fvextern = FALSE;
2513       fvdef = vignore;
2514       return FALSE;
2515     case st_C_operator:
2516       fvdef = foperator;
2517       *is_func_or_var = TRUE;
2518       return TRUE;
2519     case st_none:
2520       if ((c_ext & C_PLPL) && strneq (str+len-10, "::operator", 10))
2521         {
2522           fvdef = foperator;
2523           *is_func_or_var = TRUE;
2524           return TRUE;
2525         }
2526       if (constantypedefs && structdef == sinbody && structtype == st_C_enum)
2527         return TRUE;
2528       switch (fvdef)
2529         {
2530         case fdefunkey:
2531           fvdef = fdefunname;   /* GNU macro */
2532           *is_func_or_var = TRUE;
2533           return TRUE;
2534         case fvnone:
2535           fvdef = fvnameseen;   /* function or variable */
2536           *is_func_or_var = TRUE;
2537           return TRUE;
2538         }
2539       break;
2540     }
2541
2542   return FALSE;
2543 }
2544
2545 \f
2546 /*
2547  * C_entries often keeps pointers to tokens or lines which are older than
2548  * the line currently read.  By keeping two line buffers, and switching
2549  * them at end of line, it is possible to use those pointers.
2550  */
2551 struct
2552 {
2553   long linepos;
2554   linebuffer lb;
2555 } lbs[2];
2556
2557 #define current_lb_is_new (newndx == curndx)
2558 #define switch_line_buffers() (curndx = 1 - curndx)
2559
2560 #define curlb (lbs[curndx].lb)
2561 #define newlb (lbs[newndx].lb)
2562 #define curlinepos (lbs[curndx].linepos)
2563 #define newlinepos (lbs[newndx].linepos)
2564
2565 #define CNL_SAVE_DEFINEDEF()                                            \
2566 do {                                                                    \
2567   curlinepos = charno;                                                  \
2568   lineno++;                                                             \
2569   linecharno = charno;                                                  \
2570   charno += readline (&curlb, inf);                                     \
2571   lp = curlb.buffer;                                                    \
2572   quotednl = FALSE;                                                     \
2573   newndx = curndx;                                                      \
2574 } while (0)
2575
2576 #define CNL()                                                           \
2577 do {                                                                    \
2578   CNL_SAVE_DEFINEDEF();                                                 \
2579   if (savetoken.valid)                                                  \
2580     {                                                                   \
2581       token = savetoken;                                                \
2582       savetoken.valid = FALSE;                                          \
2583     }                                                                   \
2584   definedef = dnone;                                                    \
2585 } while (0)
2586
2587
2588 static void
2589 make_C_tag (isfun)
2590      bool isfun;
2591 {
2592   /* This function should never be called when token.valid is FALSE, but
2593      we must protect against invalid input or internal errors. */
2594   if (DEBUG || token.valid)
2595     {
2596       if (traditional_tag_style)
2597         {
2598           /* This was the original code.  Now we call new_pfnote instead,
2599              which uses the new method for naming tags (see new_pfnote). */
2600           char *name = NULL;
2601
2602           if (CTAGS || token.named)
2603             {
2604               name = savestr (token_name.buffer);
2605               if (!token.valid)
2606                 name = concat (name, "##invalid##", "");
2607             }
2608           pfnote (name, isfun,
2609                   token.line, token.linelen, token.lineno, token.linepos);
2610         }
2611       else
2612         new_pfnote (token_name.buffer, token_name.len, isfun,
2613                     token.line, token.linelen, token.lineno, token.linepos);
2614       token.valid = FALSE;
2615     }
2616 }
2617
2618
2619 /*
2620  * C_entries ()
2621  *      This routine finds functions, variables, typedefs,
2622  *      #define's, enum constants and struct/union/enum definitions in
2623  *      C syntax and adds them to the list.
2624  */
2625 static void
2626 C_entries (c_ext, inf)
2627      int c_ext;                 /* extension of C */
2628      FILE *inf;                 /* input file */
2629 {
2630   register char c;              /* latest char read; '\0' for end of line */
2631   register char *lp;            /* pointer one beyond the character `c' */
2632   int curndx, newndx;           /* indices for current and new lb */
2633   register int tokoff;          /* offset in line of start of current token */
2634   register int toklen;          /* length of current token */
2635   char *qualifier;              /* string used to qualify names */
2636   int qlen;                     /* length of qualifier */
2637   int cblev;                    /* current curly brace level */
2638   int parlev;                   /* current parenthesis level */
2639   bool incomm, inquote, inchar, quotednl, midtoken;
2640   bool cplpl, cjava;
2641   bool yacc_rules;              /* in the rules part of a yacc file */
2642   struct tok savetoken;         /* token saved during preprocessor handling */
2643
2644
2645   initbuffer (&token_name);
2646   initbuffer (&lbs[0].lb);
2647   initbuffer (&lbs[1].lb);
2648
2649   tokoff = toklen = 0;          /* keep compiler quiet */
2650   curndx = newndx = 0;
2651   lineno = 0;
2652   charno = 0;
2653   lp = curlb.buffer;
2654   *lp = 0;
2655
2656   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2657   structdef = snone; definedef = dnone; objdef = onone;
2658   yacc_rules = FALSE;
2659   midtoken = inquote = inchar = incomm = quotednl = FALSE;
2660   token.valid = savetoken.valid = FALSE;
2661   cblev = 0;
2662   parlev = 0;
2663   cplpl = (c_ext & C_PLPL) == C_PLPL;
2664   cjava = (c_ext & C_JAVA) == C_JAVA;
2665   if (cjava)
2666     { qualifier = "."; qlen = 1; }
2667   else
2668     { qualifier = "::"; qlen = 2; }
2669
2670
2671   while (!feof (inf))
2672     {
2673       c = *lp++;
2674       if (c == '\\')
2675         {
2676           /* If we're at the end of the line, the next character is a
2677              '\0'; don't skip it, because it's the thing that tells us
2678              to read the next line.  */
2679           if (*lp == '\0')
2680             {
2681               quotednl = TRUE;
2682               continue;
2683             }
2684           lp++;
2685           c = ' ';
2686         }
2687       else if (incomm)
2688         {
2689           switch (c)
2690             {
2691             case '*':
2692               if (*lp == '/')
2693                 {
2694                   c = *lp++;
2695                   incomm = FALSE;
2696                 }
2697               break;
2698             case '\0':
2699               /* Newlines inside comments do not end macro definitions in
2700                  traditional cpp. */
2701               CNL_SAVE_DEFINEDEF ();
2702               break;
2703             }
2704           continue;
2705         }
2706       else if (inquote)
2707         {
2708           switch (c)
2709             {
2710             case '"':
2711               inquote = FALSE;
2712               break;
2713             case '\0':
2714               /* Newlines inside strings do not end macro definitions
2715                  in traditional cpp, even though compilers don't
2716                  usually accept them. */
2717               CNL_SAVE_DEFINEDEF ();
2718               break;
2719             }
2720           continue;
2721         }
2722       else if (inchar)
2723         {
2724           switch (c)
2725             {
2726             case '\0':
2727               /* Hmmm, something went wrong. */
2728               CNL ();
2729               /* FALLTHRU */
2730             case '\'':
2731               inchar = FALSE;
2732               break;
2733             }
2734           continue;
2735         }
2736       else
2737         switch (c)
2738           {
2739           case '"':
2740             inquote = TRUE;
2741             switch (fvdef)
2742               {
2743               case fdefunkey:
2744               case finlist:
2745               case fignore:
2746               case vignore:
2747                 break;
2748               default:
2749                 fvextern = FALSE;
2750                 fvdef = fvnone;
2751               }
2752             continue;
2753           case '\'':
2754             inchar = TRUE;
2755             if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2756               {
2757                 fvextern = FALSE;
2758                 fvdef = fvnone;
2759               }
2760             continue;
2761           case '/':
2762             if (*lp == '*')
2763               {
2764                 lp++;
2765                 incomm = TRUE;
2766                 continue;
2767               }
2768             else if (/* cplpl && */ *lp == '/')
2769               {
2770                 c = '\0';
2771                 break;
2772               }
2773             else
2774               break;
2775           case '%':
2776             if ((c_ext & YACC) && *lp == '%')
2777               {
2778                 /* entering or exiting rules section in yacc file */
2779                 lp++;
2780                 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2781                 typdef = tnone; structdef = snone;
2782                 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2783                 cblev = 0;
2784                 yacc_rules = !yacc_rules;
2785                 continue;
2786               }
2787             else
2788               break;
2789           case '#':
2790             if (definedef == dnone)
2791               {
2792                 char *cp;
2793                 bool cpptoken = TRUE;
2794
2795                 /* Look back on this line.  If all blanks, or nonblanks
2796                    followed by an end of comment, this is a preprocessor
2797                    token. */
2798                 for (cp = newlb.buffer; cp < lp-1; cp++)
2799                   if (!iswhite (*cp))
2800                     {
2801                       if (*cp == '*' && *(cp+1) == '/')
2802                         {
2803                           cp++;
2804                           cpptoken = TRUE;
2805                         }
2806                       else
2807                         cpptoken = FALSE;
2808                     }
2809                 if (cpptoken)
2810                   definedef = dsharpseen;
2811               } /* if (definedef == dnone) */
2812
2813             continue;
2814           } /* switch (c) */
2815
2816
2817       /* Consider token only if some complicated conditions are satisfied. */
2818       if (typdef != tignore
2819           && definedef != dignorerest
2820           && fvdef != finlist
2821           && (definedef != dnone
2822               || (cblev == 0 && structdef != scolonseen)
2823               || (cblev == 1 && cplpl && structdef == sinbody)
2824               || (PUREC && structdef == sinbody))
2825           )
2826         {
2827           if (midtoken)
2828             {
2829               if (endtoken (c))
2830                 {
2831                   if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2832                     {
2833                       /*
2834                        * This handles :: in the middle, but not at the
2835                        * beginning of an identifier.  Also, space-separated
2836                        * :: is not recognised.
2837                        */
2838                       lp += 2;
2839                       toklen += 2;
2840                       c = lp[-1];
2841                       goto intoken;
2842                     }
2843                   else
2844                     {
2845                       bool funorvar = FALSE;
2846
2847                       if (yacc_rules
2848                           || consider_token (newlb.buffer + tokoff, toklen, c,
2849                                              c_ext, cblev, parlev, &funorvar))
2850                         {
2851                           if (fvdef == foperator)
2852                             {
2853                               char *oldlp = lp;
2854                               lp = skip_spaces (lp-1);
2855                               if (*lp != '\0')
2856                                 lp += 1;
2857                               while (*lp != '\0'
2858                                      && !iswhite (*lp) && *lp != '(')
2859                                 lp += 1;
2860                               c = *lp++;
2861                               toklen += lp - oldlp;
2862                             }
2863                           token.named = FALSE;
2864                           if (!PUREC
2865                               && funorvar
2866                               && definedef == dnone
2867                               && structdef == sinbody)
2868                             /* function or var defined in C++ class body */
2869                             {
2870                               int len = strlen (structtag) + qlen + toklen;
2871                               linebuffer_setlen (&token_name, len);
2872                               strcpy (token_name.buffer, structtag);
2873                               strcat (token_name.buffer, qualifier);
2874                               strncat (token_name.buffer,
2875                                        newlb.buffer + tokoff, toklen);
2876                               token.named = TRUE;
2877                             }
2878                           else if (objdef == ocatseen)
2879                             /* Objective C category */
2880                             {
2881                               int len = strlen (objtag) + 2 + toklen;
2882                               linebuffer_setlen (&token_name, len);
2883                               strcpy (token_name.buffer, objtag);
2884                               strcat (token_name.buffer, "(");
2885                               strncat (token_name.buffer,
2886                                        newlb.buffer + tokoff, toklen);
2887                               strcat (token_name.buffer, ")");
2888                               token.named = TRUE;
2889                             }
2890                           else if (objdef == omethodtag
2891                                    || objdef == omethodparm)
2892                             /* Objective C method */
2893                             {
2894                               token.named = TRUE;
2895                             }
2896                           else if (fvdef == fdefunname)
2897                             {
2898                               bool defun = (newlb.buffer[tokoff] == 'F');
2899                               int off = tokoff;
2900                               int len = toklen;
2901
2902                               /* Rewrite the tag so that emacs lisp DEFUNs
2903                                  can be found by their elisp name */
2904                               if (defun)
2905                                 {
2906                                   off += 1;
2907                                   len -= 1;
2908                                 }
2909                               len = toklen;
2910                               linebuffer_setlen (&token_name, len);
2911                               strncpy (token_name.buffer,
2912                                        newlb.buffer + off, len);
2913                               token_name.buffer[len] = '\0';
2914                               if (defun)
2915                                 while (--len >= 0)
2916                                   if (token_name.buffer[len] == '_')
2917                                     token_name.buffer[len] = '-';
2918                               token.named = defun;
2919                             }
2920                           else
2921                             {
2922                               linebuffer_setlen (&token_name, toklen);
2923                               strncpy (token_name.buffer,
2924                                        newlb.buffer + tokoff, toklen);
2925                               token_name.buffer[toklen] = '\0';
2926                               /* Name macros and members. */
2927                               token.named = (structdef == stagseen
2928                                              || typdef == ttypeseen
2929                                              || typdef == tend
2930                                              || (funorvar
2931                                                  && definedef == dignorerest)
2932                                              || (funorvar
2933                                                  && definedef == dnone
2934                                                  && structdef == sinbody));
2935                             }
2936                           token.lineno = lineno;
2937                           token.linelen = tokoff + toklen + 1;
2938                           token.line = newlb.buffer;
2939                           token.linepos = newlinepos;
2940                           token.valid = TRUE;
2941
2942                           if (definedef == dnone
2943                               && (fvdef == fvnameseen
2944                                   || fvdef == foperator
2945                                   || structdef == stagseen
2946                                   || typdef == tend
2947                                   || objdef != onone))
2948                             {
2949                               if (current_lb_is_new)
2950                                 switch_line_buffers ();
2951                             }
2952                           else
2953                             make_C_tag (funorvar);
2954                         }
2955                       midtoken = FALSE;
2956                     }
2957                 } /* if (endtoken (c)) */
2958               else if (intoken (c))
2959                 intoken:
2960                 {
2961                   toklen++;
2962                   continue;
2963                 }
2964             } /* if (midtoken) */
2965           else if (begtoken (c))
2966             {
2967               switch (definedef)
2968                 {
2969                 case dnone:
2970                   switch (fvdef)
2971                     {
2972                     case fstartlist:
2973                       fvdef = finlist;
2974                       continue;
2975                     case flistseen:
2976                       make_C_tag (TRUE); /* a function */
2977                       fvdef = fignore;
2978                       break;
2979                     case fvnameseen:
2980                       fvdef = fvnone;
2981                       break;
2982                     }
2983                   if (structdef == stagseen && !cjava)
2984                     structdef = snone;
2985                   break;
2986                 case dsharpseen:
2987                   savetoken = token;
2988                 }
2989               if (!yacc_rules || lp == newlb.buffer + 1)
2990                 {
2991                   tokoff = lp - 1 - newlb.buffer;
2992                   toklen = 1;
2993                   midtoken = TRUE;
2994                 }
2995               continue;
2996             } /* if (begtoken) */
2997         } /* if must look at token */
2998
2999
3000       /* Detect end of line, colon, comma, semicolon and various braces
3001          after having handled a token.*/
3002       switch (c)
3003         {
3004         case ':':
3005           if (definedef != dnone)
3006             break;
3007           switch (objdef)
3008             {
3009             case  otagseen:
3010               objdef = oignore;
3011               make_C_tag (TRUE); /* an Objective C class */
3012               break;
3013             case omethodtag:
3014             case omethodparm:
3015               objdef = omethodcolon;
3016               linebuffer_setlen (&token_name, token_name.len + 1);
3017               strcat (token_name.buffer, ":");
3018               break;
3019             }
3020           if (structdef == stagseen)
3021             structdef = scolonseen;
3022           else
3023             switch (fvdef)
3024               {
3025               case fvnameseen:
3026                 if (yacc_rules)
3027                   {
3028                     make_C_tag (FALSE); /* a yacc function */
3029                     fvdef = fignore;
3030                   }
3031                 break;
3032               case fstartlist:
3033                 fvextern = FALSE;
3034                 fvdef = fvnone;
3035                 break;
3036               }
3037           break;
3038         case ';':
3039           if (definedef != dnone)
3040             break;
3041           switch (fvdef)
3042             {
3043             case fignore:
3044               break;
3045             case fvnameseen:
3046               if ((members && cblev == 1)
3047                   || (globals && cblev == 0 && (!fvextern || declarations)))
3048                 make_C_tag (FALSE); /* a variable */
3049               fvextern = FALSE;
3050               fvdef = fvnone;
3051               token.valid = FALSE;
3052               break;
3053             case flistseen:
3054               if ((declarations && typdef == tnone && cblev == 0)
3055                   || (members && cblev == 1))
3056                 make_C_tag (TRUE); /* a function declaration */
3057               /* FALLTHRU */
3058             default:
3059               fvextern = FALSE;
3060               fvdef = fvnone;
3061               if (typdef != tend)
3062                 /* The following instruction invalidates the token.
3063                    Probably the token should be invalidated in all other
3064                    cases where some state machine is reset prematurely. */
3065                 token.valid = FALSE;
3066             }
3067           if (cblev == 0)
3068             switch (typdef)
3069               {
3070               case tend:
3071                 make_C_tag (FALSE); /* a typedef */
3072                 /* FALLTHRU */
3073               default:
3074                 typdef = tnone;
3075               }
3076           if (structdef == stagseen)
3077             structdef = snone;
3078           break;
3079         case ',':
3080           if (definedef != dnone)
3081             break;
3082           switch (objdef)
3083             {
3084             case omethodtag:
3085             case omethodparm:
3086               make_C_tag (TRUE); /* an Objective C method */
3087               objdef = oinbody;
3088               break;
3089             }
3090           switch (fvdef)
3091             {
3092             case fdefunkey:
3093             case foperator:
3094             case finlist:
3095             case fignore:
3096             case vignore:
3097               break;
3098             case fdefunname:
3099               fvdef = fignore;
3100               break;
3101             case flistseen:     /* a function */
3102               if (!declarations)
3103                 {
3104                   fvdef = fvnone;
3105                   break;
3106                 }
3107               /* FALLTHRU */
3108             case fvnameseen:    /* a variable */
3109               if ((members && structdef == sinbody && cblev == 1)
3110                   || (globals && cblev == 0 && (!fvextern || declarations)))
3111                 make_C_tag (FALSE);
3112               /* FALLTHRU */
3113             default:
3114               fvdef = fvnone;
3115             }
3116           if (structdef == stagseen)
3117             structdef = snone;
3118           break;
3119         case '[':
3120           if (definedef != dnone)
3121             break;
3122           if (cblev == 0 && typdef == tend)
3123             {
3124               typdef = tignore;
3125               make_C_tag (FALSE);       /* a typedef */
3126               break;
3127             }
3128           switch (fvdef)
3129             {
3130             case foperator:
3131             case finlist:
3132             case fignore:
3133             case vignore:
3134               break;
3135             case fvnameseen:
3136               if ((members && cblev == 1)
3137                   || (globals && cblev == 0 && (!fvextern || declarations)))
3138                 make_C_tag (FALSE); /* a variable */
3139               /* FALLTHRU */
3140             default:
3141               fvdef = fvnone;
3142             }
3143           if (structdef == stagseen)
3144             structdef = snone;
3145           break;
3146         case '(':
3147           if (definedef != dnone)
3148             break;
3149           if (objdef == otagseen && parlev == 0)
3150             objdef = oparenseen;
3151           switch (fvdef)
3152             {
3153             case fvnameseen:
3154               if (typdef == ttypeseen
3155                   && token.valid
3156                   && *lp != '*'
3157                   && structdef != sinbody)
3158                 {
3159                   /* This handles constructs like:
3160                      typedef void OperatorFun (int fun); */
3161                   make_C_tag (FALSE);
3162                   typdef = tignore;
3163                 }
3164               /* FALLTHRU */
3165             case foperator:
3166               fvdef = fstartlist;
3167               break;
3168             case flistseen:
3169               fvdef = finlist;
3170               break;
3171             }
3172           parlev++;
3173           break;
3174         case ')':
3175           if (definedef != dnone)
3176             break;
3177           if (objdef == ocatseen && parlev == 1)
3178             {
3179               make_C_tag (TRUE); /* an Objective C category */
3180               objdef = oignore;
3181             }
3182           if (--parlev == 0)
3183             {
3184               switch (fvdef)
3185                 {
3186                 case fstartlist:
3187                 case finlist:
3188                   fvdef = flistseen;
3189                   break;
3190                 }
3191               if (cblev == 0 && (typdef == tend))
3192                 {
3193                   typdef = tignore;
3194                   make_C_tag (FALSE); /* a typedef */
3195                 }
3196             }
3197           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3198             parlev = 0;
3199           break;
3200         case '{':
3201           if (definedef != dnone)
3202             break;
3203           if (typdef == ttypeseen)
3204             typdef = tinbody;
3205           switch (fvdef)
3206             {
3207             case flistseen:
3208               make_C_tag (TRUE);    /* a function */
3209               /* FALLTHRU */
3210             case fignore:
3211               fvdef = fvnone;
3212               break;
3213             case fvnone:
3214               switch (objdef)
3215                 {
3216                 case otagseen:
3217                   make_C_tag (TRUE); /* an Objective C class */
3218                   objdef = oignore;
3219                   break;
3220                 case omethodtag:
3221                 case omethodparm:
3222                   make_C_tag (TRUE); /* an Objective C method */
3223                   objdef = oinbody;
3224                   break;
3225                 default:
3226                   /* Neutralize `extern "C" {' grot. */
3227                   if (cblev == 0 && structdef == snone && typdef == tnone)
3228                     cblev = -1;
3229                 }
3230             }
3231           switch (structdef)
3232             {
3233             case skeyseen:         /* unnamed struct */
3234               structdef = sinbody;
3235               structtag = "_anonymous_";
3236               break;
3237             case stagseen:
3238             case scolonseen:    /* named struct */
3239               structdef = sinbody;
3240               make_C_tag (FALSE);  /* a struct */
3241               break;
3242             }
3243           cblev++;
3244           break;
3245         case '*':
3246           if (definedef != dnone)
3247             break;
3248           if (fvdef == fstartlist)
3249             fvdef = fvnone;     /* avoid tagging `foo' in `foo (*bar()) ()' */
3250           break;
3251         case '}':
3252           if (definedef != dnone)
3253             break;
3254           if (!noindentypedefs && lp == newlb.buffer + 1)
3255             {
3256               cblev = 0;        /* reset curly brace level if first column */
3257               parlev = 0;       /* also reset paren level, just in case... */
3258             }
3259           else if (cblev > 0)
3260             cblev--;
3261           if (cblev == 0)
3262             {
3263               if (typdef == tinbody)
3264                 typdef = tend;
3265               /* Memory leakage here: the string pointed by structtag is
3266                  never released, because I fear to miss something and
3267                  break things while freeing the area.  The amount of
3268                  memory leaked here is the sum of the lengths of the
3269                  struct tags.
3270               if (structdef == sinbody)
3271                 free (structtag); */
3272
3273               structdef = snone;
3274               structtag = "<error>";
3275             }
3276           break;
3277         case '=':
3278           if (definedef != dnone)
3279             break;
3280           switch (fvdef)
3281             {
3282             case foperator:
3283             case finlist:
3284             case fignore:
3285             case vignore:
3286               break;
3287             case fvnameseen:
3288               if ((members && cblev == 1)
3289                   || (globals && cblev == 0 && (!fvextern || declarations)))
3290                 make_C_tag (FALSE); /* a variable */
3291               /* FALLTHRU */
3292             default:
3293               fvdef = vignore;
3294             }
3295           break;
3296         case '+':
3297         case '-':
3298           if (objdef == oinbody && cblev == 0)
3299             {
3300               objdef = omethodsign;
3301               break;
3302             }
3303           /* FALLTHRU */
3304         case '#': case '~': case '&': case '%': case '/': case '|':
3305         case '^': case '!': case '<': case '>': case '.': case '?': case ']':
3306           if (definedef != dnone)
3307             break;
3308           /* These surely cannot follow a function tag in C. */
3309           switch (fvdef)
3310             {
3311             case foperator:
3312             case finlist:
3313             case fignore:
3314             case vignore:
3315               break;
3316             default:
3317               fvdef = fvnone;
3318             }
3319           break;
3320         case '\0':
3321           if (objdef == otagseen)
3322             {
3323               make_C_tag (TRUE); /* an Objective C class */
3324               objdef = oignore;
3325             }
3326           /* If a macro spans multiple lines don't reset its state. */
3327           if (quotednl)
3328             CNL_SAVE_DEFINEDEF ();
3329           else
3330             CNL ();
3331           break;
3332         } /* switch (c) */
3333
3334     } /* while not eof */
3335
3336   free (token_name.buffer);
3337   free (lbs[0].lb.buffer);
3338   free (lbs[1].lb.buffer);
3339 }
3340
3341 /*
3342  * Process either a C++ file or a C file depending on the setting
3343  * of a global flag.
3344  */
3345 static void
3346 default_C_entries (inf)
3347      FILE *inf;
3348 {
3349   C_entries (cplusplus ? C_PLPL : 0, inf);
3350 }
3351
3352 /* Always do plain ANSI C. */
3353 static void
3354 plain_C_entries (inf)
3355      FILE *inf;
3356 {
3357   C_entries (0, inf);
3358 }
3359
3360 /* Always do C++. */
3361 static void
3362 Cplusplus_entries (inf)
3363      FILE *inf;
3364 {
3365   C_entries (C_PLPL, inf);
3366 }
3367
3368 /* Always do Java. */
3369 static void
3370 Cjava_entries (inf)
3371      FILE *inf;
3372 {
3373   C_entries (C_JAVA, inf);
3374 }
3375
3376 /* Always do C*. */
3377 static void
3378 Cstar_entries (inf)
3379      FILE *inf;
3380 {
3381   C_entries (C_STAR, inf);
3382 }
3383
3384 /* Always do Yacc. */
3385 static void
3386 Yacc_entries (inf)
3387      FILE *inf;
3388 {
3389   C_entries (YACC, inf);
3390 }
3391
3392 \f
3393 /* A useful macro. */
3394 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3395   for (lineno = charno = 0;     /* loop initialization */               \
3396        !feof (file_pointer)     /* loop test */                         \
3397        && (lineno++,            /* instructions at start of loop */     \
3398            linecharno = charno,                                         \
3399            charno += readline (&line_buffer, file_pointer),             \
3400            char_pointer = lb.buffer,                                    \
3401            TRUE);                                                       \
3402       )
3403
3404
3405 /*
3406  * Read a file, but do no processing.  This is used to do regexp
3407  * matching on files that have no language defined.
3408  */
3409 static void
3410 just_read_file (inf)
3411      FILE *inf;
3412 {
3413   register char *dummy;
3414
3415   LOOP_ON_INPUT_LINES (inf, lb, dummy)
3416     continue;
3417 }
3418
3419 \f
3420 /* Fortran parsing */
3421
3422 static bool tail P_((char *));
3423 static void takeprec P_((void));
3424 static void getit P_((FILE *));
3425
3426 static bool
3427 tail (cp)
3428      char *cp;
3429 {
3430   register int len = 0;
3431
3432   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3433     cp++, len++;
3434   if (*cp == '\0' && !intoken (dbp[len]))
3435     {
3436       dbp += len;
3437       return TRUE;
3438     }
3439   return FALSE;
3440 }
3441
3442 static void
3443 takeprec ()
3444 {
3445   dbp = skip_spaces (dbp);
3446   if (*dbp != '*')
3447     return;
3448   dbp++;
3449   dbp = skip_spaces (dbp);
3450   if (strneq (dbp, "(*)", 3))
3451     {
3452       dbp += 3;
3453       return;
3454     }
3455   if (!ISDIGIT (*dbp))
3456     {
3457       --dbp;                    /* force failure */
3458       return;
3459     }
3460   do
3461     dbp++;
3462   while (ISDIGIT (*dbp));
3463 }
3464
3465 static void
3466 getit (inf)
3467      FILE *inf;
3468 {
3469   register char *cp;
3470
3471   dbp = skip_spaces (dbp);
3472   if (*dbp == '\0')
3473     {
3474       lineno++;
3475       linecharno = charno;
3476       charno += readline (&lb, inf);
3477       dbp = lb.buffer;
3478       if (dbp[5] != '&')
3479         return;
3480       dbp += 6;
3481       dbp = skip_spaces (dbp);
3482     }
3483   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3484     return;
3485   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3486     continue;
3487   pfnote (savenstr (dbp, cp-dbp), TRUE,
3488           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3489 }
3490
3491
3492 static void
3493 Fortran_functions (inf)
3494      FILE *inf;
3495 {
3496   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3497     {
3498       if (*dbp == '%')
3499         dbp++;                  /* Ratfor escape to fortran */
3500       dbp = skip_spaces (dbp);
3501       if (*dbp == '\0')
3502         continue;
3503       switch (lowcase (*dbp))
3504         {
3505         case 'i':
3506           if (tail ("integer"))
3507             takeprec ();
3508           break;
3509         case 'r':
3510           if (tail ("real"))
3511             takeprec ();
3512           break;
3513         case 'l':
3514           if (tail ("logical"))
3515             takeprec ();
3516           break;
3517         case 'c':
3518           if (tail ("complex") || tail ("character"))
3519             takeprec ();
3520           break;
3521         case 'd':
3522           if (tail ("double"))
3523             {
3524               dbp = skip_spaces (dbp);
3525               if (*dbp == '\0')
3526                 continue;
3527               if (tail ("precision"))
3528                 break;
3529               continue;
3530             }
3531           break;
3532         }
3533       dbp = skip_spaces (dbp);
3534       if (*dbp == '\0')
3535         continue;
3536       switch (lowcase (*dbp))
3537         {
3538         case 'f':
3539           if (tail ("function"))
3540             getit (inf);
3541           continue;
3542         case 's':
3543           if (tail ("subroutine"))
3544             getit (inf);
3545           continue;
3546         case 'e':
3547           if (tail ("entry"))
3548             getit (inf);
3549           continue;
3550         case 'b':
3551           if (tail ("blockdata") || tail ("block data"))
3552             {
3553               dbp = skip_spaces (dbp);
3554               if (*dbp == '\0') /* assume un-named */
3555                 pfnote (savestr ("blockdata"), TRUE,
3556                         lb.buffer, dbp - lb.buffer, lineno, linecharno);
3557               else
3558                 getit (inf);    /* look for name */
3559             }
3560           continue;
3561         }
3562     }
3563 }
3564
3565 \f
3566 /*
3567  * Ada parsing
3568  * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3569  */
3570
3571 static void adagetit P_((FILE *, char *));
3572
3573 /* Once we are positioned after an "interesting" keyword, let's get
3574    the real tag value necessary. */
3575 static void
3576 adagetit (inf, name_qualifier)
3577      FILE *inf;
3578      char *name_qualifier;
3579 {
3580   register char *cp;
3581   char *name;
3582   char c;
3583
3584   while (!feof (inf))
3585     {
3586       dbp = skip_spaces (dbp);
3587       if (*dbp == '\0'
3588           || (dbp[0] == '-' && dbp[1] == '-'))
3589         {
3590           lineno++;
3591           linecharno = charno;
3592           charno += readline (&lb, inf);
3593           dbp = lb.buffer;
3594         }
3595       switch (*dbp)
3596         {
3597         case 'b':
3598         case 'B':
3599           if (tail ("body"))
3600             {
3601               /* Skipping body of   procedure body   or   package body or ....
3602                  resetting qualifier to body instead of spec. */
3603               name_qualifier = "/b";
3604               continue;
3605             }
3606           break;
3607         case 't':
3608         case 'T':
3609           /* Skipping type of   task type   or   protected type ... */
3610           if (tail ("type"))
3611             continue;
3612           break;
3613         }
3614       if (*dbp == '"')
3615         {
3616           dbp += 1;
3617           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3618             continue;
3619         }
3620       else
3621         {
3622           dbp = skip_spaces (dbp);
3623           for (cp = dbp;
3624                (*cp != '\0'
3625                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
3626                cp++)
3627             continue;
3628           if (cp == dbp)
3629             return;
3630         }
3631       c = *cp;
3632       *cp = '\0';
3633       name = concat (dbp, name_qualifier, "");
3634       *cp = c;
3635       pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3636       if (c == '"')
3637         dbp = cp + 1;
3638       return;
3639     }
3640 }
3641
3642 static void
3643 Ada_funcs (inf)
3644      FILE *inf;
3645 {
3646   bool inquote = FALSE;
3647
3648   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3649     {
3650       while (*dbp != '\0')
3651         {
3652           /* Skip a string i.e. "abcd". */
3653           if (inquote || (*dbp == '"'))
3654             {
3655               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3656               if (dbp != NULL)
3657                 {
3658                   inquote = FALSE;
3659                   dbp += 1;
3660                   continue;     /* advance char */
3661                 }
3662               else
3663                 {
3664                   inquote = TRUE;
3665                   break;        /* advance line */
3666                 }
3667             }
3668
3669           /* Skip comments. */
3670           if (dbp[0] == '-' && dbp[1] == '-')
3671             break;              /* advance line */
3672
3673           /* Skip character enclosed in single quote i.e. 'a'
3674              and skip single quote starting an attribute i.e. 'Image. */
3675           if (*dbp == '\'')
3676             {
3677               dbp++ ;
3678               if (*dbp != '\0')
3679                 dbp++;
3680               continue;
3681             }
3682
3683           /* Search for beginning of a token.  */
3684           if (!begtoken (*dbp))
3685             {
3686               dbp++;
3687               continue;         /* advance char */
3688             }
3689
3690           /* We are at the beginning of a token. */
3691           switch (*dbp)
3692             {
3693             case 'f':
3694             case 'F':
3695               if (!packages_only && tail ("function"))
3696                 adagetit (inf, "/f");
3697               else
3698                 break;          /* from switch */
3699               continue;         /* advance char */
3700             case 'p':
3701             case 'P':
3702               if (!packages_only && tail ("procedure"))
3703                 adagetit (inf, "/p");
3704               else if (tail ("package"))
3705                 adagetit (inf, "/s");
3706               else if (tail ("protected")) /* protected type */
3707                 adagetit (inf, "/t");
3708               else
3709                 break;          /* from switch */
3710               continue;         /* advance char */
3711             case 't':
3712             case 'T':
3713               if (!packages_only && tail ("task"))
3714                 adagetit (inf, "/k");
3715               else if (typedefs && !packages_only && tail ("type"))
3716                 {
3717                   adagetit (inf, "/t");
3718                   while (*dbp != '\0')
3719                     dbp += 1;
3720                 }
3721               else
3722                 break;          /* from switch */
3723               continue;         /* advance char */
3724             }
3725
3726           /* Look for the end of the token. */
3727           while (!endtoken (*dbp))
3728             dbp++;
3729
3730         } /* advance char */
3731     } /* advance line */
3732 }
3733
3734 \f
3735 /*
3736  * Bob Weiner, Motorola Inc., 4/3/94
3737  * Unix and microcontroller assembly tag handling
3738  * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3739  */
3740 static void
3741 Asm_labels (inf)
3742      FILE *inf;
3743 {
3744   register char *cp;
3745
3746   LOOP_ON_INPUT_LINES (inf, lb, cp)
3747     {
3748       /* If first char is alphabetic or one of [_.$], test for colon
3749          following identifier. */
3750       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3751         {
3752           /* Read past label. */
3753           cp++;
3754           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3755             cp++;
3756           if (*cp == ':' || iswhite (*cp))
3757             {
3758               /* Found end of label, so copy it and add it to the table. */
3759               pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3760                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3761             }
3762         }
3763     }
3764 }
3765
3766 \f
3767 /*
3768  * Perl support
3769  * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3770  * Perl variable names: /^(my|local).../
3771  * Bart Robinson <lomew@cs.utah.edu> (1995)
3772  * Michael Ernst <mernst@alum.mit.edu> (1997)
3773  */
3774 static void
3775 Perl_functions (inf)
3776      FILE *inf;
3777 {
3778   register char *cp;
3779
3780   LOOP_ON_INPUT_LINES (inf, lb, cp)
3781     {
3782       if (*cp++ == 's'
3783           && *cp++ == 'u'
3784           && *cp++ == 'b' && iswhite (*cp++))
3785         {
3786           cp = skip_spaces (cp);
3787           if (*cp != '\0')
3788             {
3789               char *sp = cp;
3790               while (*cp != '\0'
3791                      && !iswhite (*cp) && *cp != '{' && *cp != '(')
3792                 cp++;
3793               pfnote (savenstr (sp, cp-sp), TRUE,
3794                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3795             }
3796         }
3797        else if (globals         /* only if tagging global vars is enabled */
3798                 && ((cp = lb.buffer,
3799                      *cp++ == 'm'
3800                      && *cp++ == 'y')
3801                     || (cp = lb.buffer,
3802                         *cp++ == 'l'
3803                         && *cp++ == 'o'
3804                         && *cp++ == 'c'
3805                         && *cp++ == 'a'
3806                         && *cp++ == 'l'))
3807                 && (*cp == '(' || iswhite (*cp)))
3808         {
3809           /* After "my" or "local", but before any following paren or space. */
3810           char *varname = NULL;
3811
3812           cp = skip_spaces (cp);
3813           if (*cp == '$' || *cp == '@' || *cp == '%')
3814             {
3815               char* varstart = ++cp;
3816               while (ISALNUM (*cp) || *cp == '_')
3817                 cp++;
3818               varname = savenstr (varstart, cp-varstart);
3819             }
3820           else
3821             {
3822               /* Should be examining a variable list at this point;
3823                  could insist on seeing an open parenthesis. */
3824               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
3825                 cp++;
3826             }
3827
3828           /* Perhaps I should back cp up one character, so the TAGS table
3829              doesn't mention (and so depend upon) the following char. */
3830           pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
3831                   FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3832         }
3833     }
3834 }
3835
3836 \f
3837 /*
3838  * Python support
3839  * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
3840  * Eric S. Raymond <esr@thyrsus.com> (1997)
3841  */
3842 static void
3843 Python_functions (inf)
3844      FILE *inf;
3845 {
3846   register char *cp;
3847
3848   LOOP_ON_INPUT_LINES (inf, lb, cp)
3849     {
3850       if (*cp++ == 'd'
3851           && *cp++ == 'e'
3852           && *cp++ == 'f' && iswhite (*cp++))
3853         {
3854           cp = skip_spaces (cp);
3855           while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
3856             cp++;
3857           pfnote (NULL, TRUE,
3858                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3859         }
3860
3861       cp = lb.buffer;
3862       if (*cp++ == 'c'
3863           && *cp++ == 'l'
3864           && *cp++ == 'a'
3865           && *cp++ == 's'
3866           && *cp++ == 's' && iswhite (*cp++))
3867         {
3868           cp = skip_spaces (cp);
3869           while (*cp != '\0' && !iswhite (*cp) && *cp != '(' && *cp != ':')
3870             cp++;
3871           pfnote (NULL, TRUE,
3872                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3873         }
3874     }
3875 }
3876
3877 \f
3878 /* Idea by Corny de Souza
3879  * Cobol tag functions
3880  * We could look for anything that could be a paragraph name.
3881  * i.e. anything that starts in column 8 is one word and ends in a full stop.
3882  */
3883 static void
3884 Cobol_paragraphs (inf)
3885      FILE *inf;
3886 {
3887   register char *bp, *ep;
3888
3889   LOOP_ON_INPUT_LINES (inf, lb, bp)
3890     {
3891       if (lb.len < 9)
3892         continue;
3893       bp += 8;
3894
3895       /* If eoln, compiler option or comment ignore whole line. */
3896       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
3897         continue;
3898
3899       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
3900         continue;
3901       if (*ep++ == '.')
3902         pfnote (savenstr (bp, ep-bp), TRUE,
3903                 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
3904     }
3905 }
3906
3907 \f
3908 /*
3909  * Makefile support
3910  * Idea by Assar Westerlund <assar@sics.se> (2001)
3911  */
3912 static void
3913 Makefile_targets (inf)
3914      FILE *inf;
3915 {
3916   register char *bp;
3917
3918   LOOP_ON_INPUT_LINES (inf, lb, bp)
3919     {
3920       if (*bp == '\t' || *bp == '#')
3921         continue;
3922       while (*bp != '\0' && *bp != '=' && *bp != ':')
3923         bp++;
3924       if (*bp == ':')
3925         pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
3926                 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
3927     }
3928 }
3929
3930 \f
3931 /* Added by Mosur Mohan, 4/22/88 */
3932 /* Pascal parsing                */
3933
3934 /*
3935  *  Locates tags for procedures & functions.  Doesn't do any type- or
3936  *  var-definitions.  It does look for the keyword "extern" or
3937  *  "forward" immediately following the procedure statement; if found,
3938  *  the tag is skipped.
3939  */
3940 static void
3941 Pascal_functions (inf)
3942      FILE *inf;
3943 {
3944   linebuffer tline;             /* mostly copied from C_entries */
3945   long save_lcno;
3946   int save_lineno, save_len;
3947   char c, *cp, *namebuf;
3948
3949   bool                          /* each of these flags is TRUE iff: */
3950     incomment,                  /* point is inside a comment */
3951     inquote,                    /* point is inside '..' string */
3952     get_tagname,                /* point is after PROCEDURE/FUNCTION
3953                                    keyword, so next item = potential tag */
3954     found_tag,                  /* point is after a potential tag */
3955     inparms,                    /* point is within parameter-list */
3956     verify_tag;                 /* point has passed the parm-list, so the
3957                                    next token will determine whether this
3958                                    is a FORWARD/EXTERN to be ignored, or
3959                                    whether it is a real tag */
3960
3961   save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
3962   namebuf = NULL;               /* keep compiler quiet */
3963   lineno = 0;
3964   charno = 0;
3965   dbp = lb.buffer;
3966   *dbp = '\0';
3967   initbuffer (&tline);
3968
3969   incomment = inquote = FALSE;
3970   found_tag = FALSE;            /* have a proc name; check if extern */
3971   get_tagname = FALSE;          /* have found "procedure" keyword    */
3972   inparms = FALSE;              /* found '(' after "proc"            */
3973   verify_tag = FALSE;           /* check if "extern" is ahead        */
3974
3975
3976   while (!feof (inf))           /* long main loop to get next char */
3977     {
3978       c = *dbp++;
3979       if (c == '\0')            /* if end of line */
3980         {
3981           lineno++;
3982           linecharno = charno;
3983           charno += readline (&lb, inf);
3984           dbp = lb.buffer;
3985           if (*dbp == '\0')
3986             continue;
3987           if (!((found_tag && verify_tag)
3988                 || get_tagname))
3989             c = *dbp++;         /* only if don't need *dbp pointing
3990                                    to the beginning of the name of
3991                                    the procedure or function */
3992         }
3993       if (incomment)
3994         {
3995           if (c == '}')         /* within { } comments */
3996             incomment = FALSE;
3997           else if (c == '*' && *dbp == ')') /* within (* *) comments */
3998             {
3999               dbp++;
4000               incomment = FALSE;
4001             }
4002           continue;
4003         }
4004       else if (inquote)
4005         {
4006           if (c == '\'')
4007             inquote = FALSE;
4008           continue;
4009         }
4010       else
4011         switch (c)
4012           {
4013           case '\'':
4014             inquote = TRUE;     /* found first quote */
4015             continue;
4016           case '{':             /* found open { comment */
4017             incomment = TRUE;
4018             continue;
4019           case '(':
4020             if (*dbp == '*')    /* found open (* comment */
4021               {
4022                 incomment = TRUE;
4023                 dbp++;
4024               }
4025             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4026               inparms = TRUE;
4027             continue;
4028           case ')':             /* end of parms list */
4029             if (inparms)
4030               inparms = FALSE;
4031             continue;
4032           case ';':
4033             if (found_tag && !inparms) /* end of proc or fn stmt */
4034               {
4035                 verify_tag = TRUE;
4036                 break;
4037               }
4038             continue;
4039           }
4040       if (found_tag && verify_tag && (*dbp != ' '))
4041         {
4042           /* check if this is an "extern" declaration */
4043           if (*dbp == '\0')
4044             continue;
4045           if (lowcase (*dbp == 'e'))
4046             {
4047               if (tail ("extern"))      /* superfluous, really! */
4048                 {
4049                   found_tag = FALSE;
4050                   verify_tag = FALSE;
4051                 }
4052             }
4053           else if (lowcase (*dbp) == 'f')
4054             {
4055               if (tail ("forward"))     /*  check for forward reference */
4056                 {
4057                   found_tag = FALSE;
4058                   verify_tag = FALSE;
4059                 }
4060             }
4061           if (found_tag && verify_tag) /* not external proc, so make tag */
4062             {
4063               found_tag = FALSE;
4064               verify_tag = FALSE;
4065               pfnote (namebuf, TRUE,
4066                       tline.buffer, save_len, save_lineno, save_lcno);
4067               continue;
4068             }
4069         }
4070       if (get_tagname)          /* grab name of proc or fn */
4071         {
4072           if (*dbp == '\0')
4073             continue;
4074
4075           /* save all values for later tagging */
4076           linebuffer_setlen (&tline, lb.len);
4077           strcpy (tline.buffer, lb.buffer);
4078           save_lineno = lineno;
4079           save_lcno = linecharno;
4080
4081           /* grab block name */
4082           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4083             continue;
4084           namebuf = savenstr (dbp, cp-dbp);
4085           dbp = cp;             /* set dbp to e-o-token */
4086           save_len = dbp - lb.buffer + 1;
4087           get_tagname = FALSE;
4088           found_tag = TRUE;
4089           continue;
4090
4091           /* and proceed to check for "extern" */
4092         }
4093       else if (!incomment && !inquote && !found_tag)
4094         {
4095           /* check for proc/fn keywords */
4096           switch (lowcase (c))
4097             {
4098             case 'p':
4099               if (tail ("rocedure"))    /* c = 'p', dbp has advanced */
4100                 get_tagname = TRUE;
4101               continue;
4102             case 'f':
4103               if (tail ("unction"))
4104                 get_tagname = TRUE;
4105               continue;
4106             }
4107         }
4108     }                           /* while not eof */
4109
4110   free (tline.buffer);
4111 }
4112
4113 \f
4114 /*
4115  * Lisp tag functions
4116  *  look for (def or (DEF, quote or QUOTE
4117  */
4118
4119 static int L_isdef P_((char *));
4120 static int L_isquote P_((char *));
4121 static void L_getit P_((void));
4122
4123 static int
4124 L_isdef (strp)
4125      register char *strp;
4126 {
4127   return ((strp[1] == 'd' || strp[1] == 'D')
4128           && (strp[2] == 'e' || strp[2] == 'E')
4129           && (strp[3] == 'f' || strp[3] == 'F'));
4130 }
4131
4132 static int
4133 L_isquote (strp)
4134      register char *strp;
4135 {
4136   return ((*++strp == 'q' || *strp == 'Q')
4137           && (*++strp == 'u' || *strp == 'U')
4138           && (*++strp == 'o' || *strp == 'O')
4139           && (*++strp == 't' || *strp == 'T')
4140           && (*++strp == 'e' || *strp == 'E')
4141           && iswhite (*++strp));
4142 }
4143
4144 static void
4145 L_getit ()
4146 {
4147   register char *cp;
4148
4149   if (*dbp == '\'')             /* Skip prefix quote */
4150     dbp++;
4151   else if (*dbp == '(')
4152   {
4153     if (L_isquote (dbp))
4154       dbp += 7;                 /* Skip "(quote " */
4155     else
4156       dbp += 1;                 /* Skip "(" before name in (defstruct (foo)) */
4157     dbp = skip_spaces (dbp);
4158   }
4159
4160   for (cp = dbp /*+1*/;
4161        *cp != '\0' && *cp != '(' && !iswhite(*cp) && *cp != ')';
4162        cp++)
4163     continue;
4164   if (cp == dbp)
4165     return;
4166
4167   pfnote (savenstr (dbp, cp-dbp), TRUE,
4168           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4169 }
4170
4171 static void
4172 Lisp_functions (inf)
4173      FILE *inf;
4174 {
4175   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4176     {
4177       if (dbp[0] == '(')
4178         {
4179           if (L_isdef (dbp))
4180             {
4181               dbp = skip_non_spaces (dbp);
4182               dbp = skip_spaces (dbp);
4183               L_getit ();
4184             }
4185           else
4186             {
4187               /* Check for (foo::defmumble name-defined ... */
4188               do
4189                 dbp++;
4190               while (*dbp != '\0' && !iswhite (*dbp)
4191                      && *dbp != ':' && *dbp != '(' && *dbp != ')');
4192               if (*dbp == ':')
4193                 {
4194                   do
4195                     dbp++;
4196                   while (*dbp == ':');
4197
4198                   if (L_isdef (dbp - 1))
4199                     {
4200                       dbp = skip_non_spaces (dbp);
4201                       dbp = skip_spaces (dbp);
4202                       L_getit ();
4203                     }
4204                 }
4205             }
4206         }
4207     }
4208 }
4209
4210 \f
4211 /*
4212  * Postscript tag functions
4213  * Just look for lines where the first character is '/'
4214  * Also look at "defineps" for PSWrap
4215  * Richard Mlynarik <mly@adoc.xerox.com> (1997)
4216  * Ideas by Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4217  */
4218 static void
4219 Postscript_functions (inf)
4220      FILE *inf;
4221 {
4222   register char *bp, *ep;
4223
4224   LOOP_ON_INPUT_LINES (inf, lb, bp)
4225     {
4226       if (bp[0] == '/')
4227         {
4228           for (ep = bp+1;
4229                *ep != '\0' && *ep != ' ' && *ep != '{';
4230                ep++)
4231             continue;
4232           pfnote (savenstr (bp, ep-bp), TRUE,
4233                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4234         }
4235       else if (strneq (bp, "defineps", 8))
4236         {
4237           bp = skip_non_spaces (bp);
4238           bp = skip_spaces (bp);
4239           get_tag (bp);
4240         }
4241     }
4242 }
4243
4244 \f
4245 /*
4246  * Scheme tag functions
4247  * look for (def... xyzzy
4248  * look for (def... (xyzzy
4249  * look for (def ... ((...(xyzzy ....
4250  * look for (set! xyzzy
4251  */
4252
4253 static void
4254 Scheme_functions (inf)
4255      FILE *inf;
4256 {
4257   register char *bp;
4258
4259   LOOP_ON_INPUT_LINES (inf, lb, bp)
4260     {
4261       if (bp[0] == '('
4262           && (bp[1] == 'D' || bp[1] == 'd')
4263           && (bp[2] == 'E' || bp[2] == 'e')
4264           && (bp[3] == 'F' || bp[3] == 'f'))
4265         {
4266           bp = skip_non_spaces (bp);
4267           /* Skip over open parens and white space */
4268           while (iswhite (*bp) || *bp == '(')
4269             bp++;
4270           get_tag (bp);
4271         }
4272       if (bp[0] == '('
4273           && (bp[1] == 'S' || bp[1] == 's')
4274           && (bp[2] == 'E' || bp[2] == 'e')
4275           && (bp[3] == 'T' || bp[3] == 't')
4276           && (bp[4] == '!' || bp[4] == '!')
4277           && (iswhite (bp[5])))
4278         {
4279           bp = skip_non_spaces (bp);
4280           bp = skip_spaces (bp);
4281           get_tag (bp);
4282         }
4283     }
4284 }
4285
4286 \f
4287 /* Find tags in TeX and LaTeX input files.  */
4288
4289 /* TEX_toktab is a table of TeX control sequences that define tags.
4290    Each TEX_tabent records one such control sequence.
4291    CONVERT THIS TO USE THE Stab TYPE!! */
4292 struct TEX_tabent
4293 {
4294   char *name;
4295   int len;
4296 };
4297
4298 struct TEX_tabent *TEX_toktab = NULL;   /* Table with tag tokens */
4299
4300 /* Default set of control sequences to put into TEX_toktab.
4301    The value of environment var TEXTAGS is prepended to this.  */
4302
4303 char *TEX_defenv = "\
4304 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4305 :part:appendix:entry:index";
4306
4307 static void TEX_mode P_((FILE *));
4308 static struct TEX_tabent *TEX_decode_env P_((char *, char *));
4309 static int TEX_Token P_((char *));
4310
4311 char TEX_esc = '\\';
4312 char TEX_opgrp = '{';
4313 char TEX_clgrp = '}';
4314
4315 /*
4316  * TeX/LaTeX scanning loop.
4317  */
4318 static void
4319 TeX_commands (inf)
4320      FILE *inf;
4321 {
4322   char *cp, *lasthit;
4323   register int i;
4324
4325   /* Select either \ or ! as escape character.  */
4326   TEX_mode (inf);
4327
4328   /* Initialize token table once from environment. */
4329   if (!TEX_toktab)
4330     TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4331
4332   LOOP_ON_INPUT_LINES (inf, lb, cp)
4333     {
4334       lasthit = cp;
4335       /* Look at each esc in line. */
4336       while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4337         {
4338           if (*++cp == '\0')
4339             break;
4340           linecharno += cp - lasthit;
4341           lasthit = cp;
4342           i = TEX_Token (lasthit);
4343           if (i >= 0)
4344             {
4345               /* We seem to include the TeX command in the tag name.
4346               register char *p;
4347               for (p = lasthit + TEX_toktab[i].len;
4348                    *p != '\0' && *p != TEX_clgrp;
4349                    p++)
4350                 continue; */
4351               pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4352                       lb.buffer, lb.len, lineno, linecharno);
4353               break;            /* We only tag a line once */
4354             }
4355         }
4356     }
4357 }
4358
4359 #define TEX_LESC '\\'
4360 #define TEX_SESC '!'
4361 #define TEX_cmt  '%'
4362
4363 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4364    chars accordingly. */
4365 static void
4366 TEX_mode (inf)
4367      FILE *inf;
4368 {
4369   int c;
4370
4371   while ((c = getc (inf)) != EOF)
4372     {
4373       /* Skip to next line if we hit the TeX comment char. */
4374       if (c == TEX_cmt)
4375         while (c != '\n')
4376           c = getc (inf);
4377       else if (c == TEX_LESC || c == TEX_SESC )
4378         break;
4379     }
4380
4381   if (c == TEX_LESC)
4382     {
4383       TEX_esc = TEX_LESC;
4384       TEX_opgrp = '{';
4385       TEX_clgrp = '}';
4386     }
4387   else
4388     {
4389       TEX_esc = TEX_SESC;
4390       TEX_opgrp = '<';
4391       TEX_clgrp = '>';
4392     }
4393   /* If the input file is compressed, inf is a pipe, and rewind may fail.
4394      No attempt is made to correct the situation. */
4395   rewind (inf);
4396 }
4397
4398 /* Read environment and prepend it to the default string.
4399    Build token table. */
4400 static struct TEX_tabent *
4401 TEX_decode_env (evarname, defenv)
4402      char *evarname;
4403      char *defenv;
4404 {
4405   register char *env, *p;
4406
4407   struct TEX_tabent *tab;
4408   int size, i;
4409
4410   /* Append default string to environment. */
4411   env = getenv (evarname);
4412   if (!env)
4413     env = defenv;
4414   else
4415     {
4416       char *oldenv = env;
4417       env = concat (oldenv, defenv, "");
4418     }
4419
4420   /* Allocate a token table */
4421   for (size = 1, p = env; p;)
4422     if ((p = etags_strchr (p, ':')) && *++p != '\0')
4423       size++;
4424   /* Add 1 to leave room for null terminator.  */
4425   tab = xnew (size + 1, struct TEX_tabent);
4426
4427   /* Unpack environment string into token table. Be careful about */
4428   /* zero-length strings (leading ':', "::" and trailing ':') */
4429   for (i = 0; *env;)
4430     {
4431       p = etags_strchr (env, ':');
4432       if (!p)                   /* End of environment string. */
4433         p = env + strlen (env);
4434       if (p - env > 0)
4435         {                       /* Only non-zero strings. */
4436           tab[i].name = savenstr (env, p - env);
4437           tab[i].len = strlen (tab[i].name);
4438           i++;
4439         }
4440       if (*p)
4441         env = p + 1;
4442       else
4443         {
4444           tab[i].name = NULL;   /* Mark end of table. */
4445           tab[i].len = 0;
4446           break;
4447         }
4448     }
4449   return tab;
4450 }
4451
4452 /* If the text at CP matches one of the tag-defining TeX command names,
4453    return the pointer to the first occurrence of that command in TEX_toktab.
4454    Otherwise return -1.
4455    Keep the capital `T' in `token' for dumb truncating compilers
4456    (this distinguishes it from `TEX_toktab' */
4457 static int
4458 TEX_Token (cp)
4459      char *cp;
4460 {
4461   int i;
4462
4463   for (i = 0; TEX_toktab[i].len > 0; i++)
4464     if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4465       return i;
4466   return -1;
4467 }
4468
4469 \f
4470 /* Texinfo support.  Dave Love, Mar. 2000.  */
4471 static void
4472 Texinfo_nodes (inf)
4473      FILE * inf;
4474 {
4475   char *cp, *start;
4476   LOOP_ON_INPUT_LINES (inf, lb, cp)
4477     {
4478       if ((*cp++ == '@'
4479            && *cp++ == 'n'
4480            && *cp++ == 'o'
4481            && *cp++ == 'd'
4482            && *cp++ == 'e' && iswhite (*cp++)))
4483         {
4484           start = cp = skip_spaces(cp);
4485           while (*cp != '\0' && *cp != ',')
4486             cp++;
4487           pfnote (savenstr (start, cp - start), TRUE,
4488                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4489         }
4490     }
4491 }
4492
4493 \f
4494 /*
4495  * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4496  *
4497  * Assumes that the predicate starts at column 0.
4498  * Only the first clause of a predicate is added.
4499  */
4500 static int prolog_pred P_((char *, char *));
4501 static void prolog_skip_comment P_((linebuffer *, FILE *));
4502 static int prolog_atom P_((char *, int));
4503
4504 static void
4505 Prolog_functions (inf)
4506      FILE *inf;
4507 {
4508   char *cp, *last;
4509   int len;
4510   int allocated;
4511
4512   allocated = 0;
4513   len = 0;
4514   last = NULL;
4515
4516   LOOP_ON_INPUT_LINES (inf, lb, cp)
4517     {
4518       if (cp[0] == '\0')        /* Empty line */
4519         continue;
4520       else if (iswhite (cp[0])) /* Not a predicate */
4521         continue;
4522       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
4523         prolog_skip_comment (&lb, inf);
4524       else if ((len = prolog_pred (cp, last)) > 0)
4525         {
4526           /* Predicate.  Store the function name so that we only
4527              generate a tag for the first clause.  */
4528           if (last == NULL)
4529             last = xnew(len + 1, char);
4530           else if (len + 1 > allocated)
4531             xrnew (last, len + 1, char);
4532           allocated = len + 1;
4533           strncpy (last, cp, len);
4534           last[len] = '\0';
4535         }
4536     }
4537 }
4538
4539
4540 static void
4541 prolog_skip_comment (plb, inf)
4542      linebuffer *plb;
4543      FILE *inf;
4544 {
4545   char *cp;
4546
4547   do
4548     {
4549       for (cp = plb->buffer; *cp != '\0'; cp++)
4550         if (cp[0] == '*' && cp[1] == '/')
4551           return;
4552       lineno++;
4553       linecharno += readline (plb, inf);
4554     }
4555   while (!feof(inf));
4556 }
4557
4558 /*
4559  * A predicate definition is added if it matches:
4560  *     <beginning of line><Prolog Atom><whitespace>(
4561  *
4562  * It is added to the tags database if it doesn't match the
4563  * name of the previous clause header.
4564  *
4565  * Return the size of the name of the predicate, or 0 if no header
4566  * was found.
4567  */
4568 static int
4569 prolog_pred (s, last)
4570      char *s;
4571      char *last;                /* Name of last clause. */
4572 {
4573   int pos;
4574   int len;
4575
4576   pos = prolog_atom (s, 0);
4577   if (pos < 1)
4578     return 0;
4579
4580   len = pos;
4581   pos = skip_spaces (s + pos) - s;
4582
4583   if ((s[pos] == '(') || (s[pos] == '.'))
4584     {
4585       if (s[pos] == '(')
4586         pos++;
4587
4588       /* Save only the first clause. */
4589       if (last == NULL
4590           || len != (int)strlen (last)
4591           || !strneq (s, last, len))
4592         {
4593           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4594           return len;
4595         }
4596     }
4597   return 0;
4598 }
4599
4600 /*
4601  * Consume a Prolog atom.
4602  * Return the number of bytes consumed, or -1 if there was an error.
4603  *
4604  * A prolog atom, in this context, could be one of:
4605  * - An alphanumeric sequence, starting with a lower case letter.
4606  * - A quoted arbitrary string. Single quotes can escape themselves.
4607  *   Backslash quotes everything.
4608  */
4609 static int
4610 prolog_atom (s, pos)
4611      char *s;
4612      int pos;
4613 {
4614   int origpos;
4615
4616   origpos = pos;
4617
4618   if (ISLOWER(s[pos]) || (s[pos] == '_'))
4619     {
4620       /* The atom is unquoted. */
4621       pos++;
4622       while (ISALNUM(s[pos]) || (s[pos] == '_'))
4623         {
4624           pos++;
4625         }
4626       return pos - origpos;
4627     }
4628   else if (s[pos] == '\'')
4629     {
4630       pos++;
4631
4632       while (1)
4633         {
4634           if (s[pos] == '\'')
4635             {
4636               pos++;
4637               if (s[pos] != '\'')
4638                 break;
4639               pos++;            /* A double quote */
4640             }
4641           else if (s[pos] == '\0')
4642             /* Multiline quoted atoms are ignored. */
4643             return -1;
4644           else if (s[pos] == '\\')
4645             {
4646               if (s[pos+1] == '\0')
4647                 return -1;
4648               pos += 2;
4649             }
4650           else
4651             pos++;
4652         }
4653       return pos - origpos;
4654     }
4655   else
4656     return -1;
4657 }
4658
4659 \f
4660 /*
4661  * Support for Erlang  --  Anders Lindgren, Feb 1996.
4662  *
4663  * Generates tags for functions, defines, and records.
4664  *
4665  * Assumes that Erlang functions start at column 0.
4666  */
4667 static int erlang_func P_((char *, char *));
4668 static void erlang_attribute P_((char *));
4669 static int erlang_atom P_((char *, int));
4670
4671 static void
4672 Erlang_functions (inf)
4673      FILE *inf;
4674 {
4675   char *cp, *last;
4676   int len;
4677   int allocated;
4678
4679   allocated = 0;
4680   len = 0;
4681   last = NULL;
4682
4683   LOOP_ON_INPUT_LINES (inf, lb, cp)
4684     {
4685       if (cp[0] == '\0')        /* Empty line */
4686         continue;
4687       else if (iswhite (cp[0])) /* Not function nor attribute */
4688         continue;
4689       else if (cp[0] == '%')    /* comment */
4690         continue;
4691       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
4692         continue;
4693       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
4694         {
4695           erlang_attribute (cp);
4696           last = NULL;
4697         }
4698       else if ((len = erlang_func (cp, last)) > 0)
4699         {
4700           /*
4701            * Function.  Store the function name so that we only
4702            * generates a tag for the first clause.
4703            */
4704           if (last == NULL)
4705             last = xnew (len + 1, char);
4706           else if (len + 1 > allocated)
4707             xrnew (last, len + 1, char);
4708           allocated = len + 1;
4709           strncpy (last, cp, len);
4710           last[len] = '\0';
4711         }
4712     }
4713 }
4714
4715
4716 /*
4717  * A function definition is added if it matches:
4718  *     <beginning of line><Erlang Atom><whitespace>(
4719  *
4720  * It is added to the tags database if it doesn't match the
4721  * name of the previous clause header.
4722  *
4723  * Return the size of the name of the function, or 0 if no function
4724  * was found.
4725  */
4726 static int
4727 erlang_func (s, last)
4728      char *s;
4729      char *last;                /* Name of last clause. */
4730 {
4731   int pos;
4732   int len;
4733
4734   pos = erlang_atom (s, 0);
4735   if (pos < 1)
4736     return 0;
4737
4738   len = pos;
4739   pos = skip_spaces (s + pos) - s;
4740
4741   /* Save only the first clause. */
4742   if (s[pos++] == '('
4743       && (last == NULL
4744           || len != (int)strlen (last)
4745           || !strneq (s, last, len)))
4746         {
4747           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4748           return len;
4749         }
4750
4751   return 0;
4752 }
4753
4754
4755 /*
4756  * Handle attributes.  Currently, tags are generated for defines
4757  * and records.
4758  *
4759  * They are on the form:
4760  * -define(foo, bar).
4761  * -define(Foo(M, N), M+N).
4762  * -record(graph, {vtab = notable, cyclic = true}).
4763  */
4764 static void
4765 erlang_attribute (s)
4766      char *s;
4767 {
4768   int pos;
4769   int len;
4770
4771   if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4772     {
4773       pos = skip_spaces (s + 7) - s;
4774       if (s[pos++] == '(')
4775         {
4776           pos = skip_spaces (s + pos) - s;
4777           len = erlang_atom (s, pos);
4778           if (len != 0)
4779             pfnote (savenstr (& s[pos], len), TRUE,
4780                     s, pos + len, lineno, linecharno);
4781         }
4782     }
4783   return;
4784 }
4785
4786
4787 /*
4788  * Consume an Erlang atom (or variable).
4789  * Return the number of bytes consumed, or -1 if there was an error.
4790  */
4791 static int
4792 erlang_atom (s, pos)
4793      char *s;
4794      int pos;
4795 {
4796   int origpos;
4797
4798   origpos = pos;
4799
4800   if (ISALPHA (s[pos]) || s[pos] == '_')
4801     {
4802       /* The atom is unquoted. */
4803       pos++;
4804       while (ISALNUM (s[pos]) || s[pos] == '_')
4805         pos++;
4806       return pos - origpos;
4807     }
4808   else if (s[pos] == '\'')
4809     {
4810       pos++;
4811
4812       while (1)
4813         {
4814           if (s[pos] == '\'')
4815             {
4816               pos++;
4817               break;
4818             }
4819           else if (s[pos] == '\0')
4820             /* Multiline quoted atoms are ignored. */
4821             return -1;
4822           else if (s[pos] == '\\')
4823             {
4824               if (s[pos+1] == '\0')
4825                 return -1;
4826               pos += 2;
4827             }
4828           else
4829             pos++;
4830         }
4831       return pos - origpos;
4832     }
4833   else
4834     return -1;
4835 }
4836
4837 \f
4838 #ifdef ETAGS_REGEXPS
4839
4840 static char *scan_separators P_((char *));
4841 static void analyse_regex P_((char *, bool));
4842 static void add_regex P_((char *, bool, language *));
4843 static char *substitute P_((char *, char *, struct re_registers *));
4844
4845 /* Take a string like "/blah/" and turn it into "blah", making sure
4846    that the first and last characters are the same, and handling
4847    quoted separator characters.  Actually, stops on the occurrence of
4848    an unquoted separator.  Also turns "\t" into a Tab character.
4849    Returns pointer to terminating separator.  Works in place.  Null
4850    terminates name string. */
4851 static char *
4852 scan_separators (name)
4853      char *name;
4854 {
4855   char sep = name[0];
4856   char *copyto = name;
4857   bool quoted = FALSE;
4858
4859   for (++name; *name != '\0'; ++name)
4860     {
4861       if (quoted)
4862         {
4863           if (*name == 't')
4864             *copyto++ = '\t';
4865           else if (*name == sep)
4866             *copyto++ = sep;
4867           else
4868             {
4869               /* Something else is quoted, so preserve the quote. */
4870               *copyto++ = '\\';
4871               *copyto++ = *name;
4872             }
4873           quoted = FALSE;
4874         }
4875       else if (*name == '\\')
4876         quoted = TRUE;
4877       else if (*name == sep)
4878         break;
4879       else
4880         *copyto++ = *name;
4881     }
4882
4883   /* Terminate copied string. */
4884   *copyto = '\0';
4885   return name;
4886 }
4887
4888 /* Look at the argument of --regex or --no-regex and do the right
4889    thing.  Same for each line of a regexp file. */
4890 static void
4891 analyse_regex (regex_arg, ignore_case)
4892      char *regex_arg;
4893      bool ignore_case;
4894 {
4895   if (regex_arg == NULL)
4896     free_patterns ();           /* --no-regex: remove existing regexps */
4897
4898   /* A real --regexp option or a line in a regexp file. */
4899   switch (regex_arg[0])
4900     {
4901       /* Comments in regexp file or null arg to --regex. */
4902     case '\0':
4903     case ' ':
4904     case '\t':
4905       break;
4906
4907       /* Read a regex file.  This is recursive and may result in a
4908          loop, which will stop when the file descriptors are exhausted. */
4909     case '@':
4910       {
4911         FILE *regexfp;
4912         linebuffer regexbuf;
4913         char *regexfile = regex_arg + 1;
4914
4915         /* regexfile is a file containing regexps, one per line. */
4916         regexfp = fopen (regexfile, "r");
4917         if (regexfp == NULL)
4918           {
4919             pfatal (regexfile);
4920             return;
4921           }
4922         initbuffer (&regexbuf);
4923         while (readline_internal (&regexbuf, regexfp) > 0)
4924           analyse_regex (regexbuf.buffer, ignore_case);
4925         free (regexbuf.buffer);
4926         fclose (regexfp);
4927       }
4928       break;
4929
4930       /* Regexp to be used for a specific language only. */
4931     case '{':
4932       {
4933         language *lang;
4934         char *lang_name = regex_arg + 1;
4935         char *cp;
4936
4937         for (cp = lang_name; *cp != '}'; cp++)
4938           if (*cp == '\0')
4939             {
4940               error ("unterminated language name in regex: %s", regex_arg);
4941               return;
4942             }
4943         *cp = '\0';
4944         lang = get_language_from_langname (lang_name);
4945         if (lang == NULL)
4946           return;
4947         add_regex (cp + 1, ignore_case, lang);
4948       }
4949       break;
4950
4951       /* Regexp to be used for any language. */
4952     default:
4953       add_regex (regex_arg, ignore_case, NULL);
4954       break;
4955     }
4956 }
4957
4958 /* Turn a name, which is an ed-style (but Emacs syntax) regular
4959    expression, into a real regular expression by compiling it. */
4960 static void
4961 add_regex (regexp_pattern, ignore_case, lang)
4962      char *regexp_pattern;
4963      bool ignore_case;
4964      language *lang;
4965 {
4966   char *name;
4967   const char *err;
4968   struct re_pattern_buffer *patbuf;
4969   pattern *pp;
4970
4971
4972   if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
4973     {
4974       error ("%s: unterminated regexp", regexp_pattern);
4975       return;
4976     }
4977   name = scan_separators (regexp_pattern);
4978   if (regexp_pattern[0] == '\0')
4979     {
4980       error ("null regexp", (char *)NULL);
4981       return;
4982     }
4983   (void) scan_separators (name);
4984
4985   patbuf = xnew (1, struct re_pattern_buffer);
4986   /* Translation table to fold case if appropriate. */
4987   patbuf->translate = (ignore_case) ? lc_trans : NULL;
4988   patbuf->fastmap = NULL;
4989   patbuf->buffer = NULL;
4990   patbuf->allocated = 0;
4991
4992   err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
4993   if (err != NULL)
4994     {
4995       error ("%s while compiling pattern", err);
4996       return;
4997     }
4998
4999   pp = p_head;
5000   p_head = xnew (1, pattern);
5001   p_head->regex = savestr (regexp_pattern);
5002   p_head->p_next = pp;
5003   p_head->language = lang;
5004   p_head->pattern = patbuf;
5005   p_head->name_pattern = savestr (name);
5006   p_head->error_signaled = FALSE;
5007 }
5008
5009 /*
5010  * Do the substitutions indicated by the regular expression and
5011  * arguments.
5012  */
5013 static char *
5014 substitute (in, out, regs)
5015      char *in, *out;
5016      struct re_registers *regs;
5017 {
5018   char *result, *t;
5019   int size, dig, diglen;
5020
5021   result = NULL;
5022   size = strlen (out);
5023
5024   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5025   if (out[size - 1] == '\\')
5026     fatal ("pattern error in \"%s\"", out);
5027   for (t = etags_strchr (out, '\\');
5028        t != NULL;
5029        t = etags_strchr (t + 2, '\\'))
5030     if (ISDIGIT (t[1]))
5031       {
5032         dig = t[1] - '0';
5033         diglen = regs->end[dig] - regs->start[dig];
5034         size += diglen - 2;
5035       }
5036     else
5037       size -= 1;
5038
5039   /* Allocate space and do the substitutions. */
5040   result = xnew (size + 1, char);
5041
5042   for (t = result; *out != '\0'; out++)
5043     if (*out == '\\' && ISDIGIT (*++out))
5044       {
5045         dig = *out - '0';
5046         diglen = regs->end[dig] - regs->start[dig];
5047         strncpy (t, in + regs->start[dig], diglen);
5048         t += diglen;
5049       }
5050     else
5051       *t++ = *out;
5052   *t = '\0';
5053
5054   assert (t <= result + size && t - result == (int)strlen (result));
5055
5056   return result;
5057 }
5058
5059 /* Deallocate all patterns. */
5060 static void
5061 free_patterns ()
5062 {
5063   pattern *pp;
5064   while (p_head != NULL)
5065     {
5066       pp = p_head->p_next;
5067       free (p_head->regex);
5068       free (p_head->name_pattern);
5069       free (p_head);
5070       p_head = pp;
5071     }
5072   return;
5073 }
5074
5075 \f
5076 static void
5077 get_tag (bp)
5078      register char *bp;
5079 {
5080   register char *cp;
5081
5082   if (*bp == '\0')
5083     return;
5084   /* Go till you get to white space or a syntactic break */
5085   for (cp = bp + 1;
5086        *cp != '\0' && *cp != '(' && *cp != ')' && !iswhite (*cp);
5087        cp++)
5088     continue;
5089   pfnote (savenstr (bp, cp-bp), TRUE,
5090           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5091 }
5092
5093 #endif /* ETAGS_REGEXPS */
5094 /* Initialize a linebuffer for use */
5095 static void
5096 initbuffer (lbp)
5097      linebuffer *lbp;
5098 {
5099   lbp->size = (DEBUG) ? 3 : 200;
5100   lbp->buffer = xnew (lbp->size, char);
5101   lbp->buffer[0] = '\0';
5102   lbp->len = 0;
5103 }
5104
5105 /*
5106  * Read a line of text from `stream' into `lbp', excluding the
5107  * newline or CR-NL, if any.  Return the number of characters read from
5108  * `stream', which is the length of the line including the newline.
5109  *
5110  * On DOS or Windows we do not count the CR character, if any, before the
5111  * NL, in the returned length; this mirrors the behavior of emacs on those
5112  * platforms (for text files, it translates CR-NL to NL as it reads in the
5113  * file).
5114  */
5115 static long
5116 readline_internal (lbp, stream)
5117      linebuffer *lbp;
5118      register FILE *stream;
5119 {
5120   char *buffer = lbp->buffer;
5121   register char *p = lbp->buffer;
5122   register char *pend;
5123   int chars_deleted;
5124
5125   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5126
5127   while (1)
5128     {
5129       register int c = getc (stream);
5130       if (p == pend)
5131         {
5132           /* We're at the end of linebuffer: expand it. */
5133           lbp->size *= 2;
5134           xrnew (buffer, lbp->size, char);
5135           p += buffer - lbp->buffer;
5136           pend = buffer + lbp->size;
5137           lbp->buffer = buffer;
5138         }
5139       if (c == EOF)
5140         {
5141           *p = '\0';
5142           chars_deleted = 0;
5143           break;
5144         }
5145       if (c == '\n')
5146         {
5147           if (p > buffer && p[-1] == '\r')
5148             {
5149               p -= 1;
5150 #ifdef DOS_NT
5151              /* Assume CRLF->LF translation will be performed by Emacs
5152                 when loading this file, so CRs won't appear in the buffer.
5153                 It would be cleaner to compensate within Emacs;
5154                 however, Emacs does not know how many CRs were deleted
5155                 before any given point in the file.  */
5156               chars_deleted = 1;
5157 #else
5158               chars_deleted = 2;
5159 #endif
5160             }
5161           else
5162             {
5163               chars_deleted = 1;
5164             }
5165           *p = '\0';
5166           break;
5167         }
5168       *p++ = c;
5169     }
5170   lbp->len = p - buffer;
5171
5172   return lbp->len + chars_deleted;
5173 }
5174
5175 /*
5176  * Like readline_internal, above, but in addition try to match the
5177  * input line against relevant regular expressions.
5178  */
5179 static long
5180 readline (lbp, stream)
5181      linebuffer *lbp;
5182      FILE *stream;
5183 {
5184   /* Read new line. */
5185   long result = readline_internal (lbp, stream);
5186 #ifdef ETAGS_REGEXPS
5187   int match;
5188   pattern *pp;
5189
5190   /* Match against relevant patterns. */
5191   if (lbp->len > 0)
5192     for (pp = p_head; pp != NULL; pp = pp->p_next)
5193       {
5194         /* Only use generic regexps or those for the current language. */
5195         if (pp->language != NULL && pp->language != curlang)
5196           continue;
5197
5198         match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5199         switch (match)
5200           {
5201           case -2:
5202             /* Some error. */
5203             if (!pp->error_signaled)
5204               {
5205                 error ("error while matching \"%s\"", pp->regex);
5206                 pp->error_signaled = TRUE;
5207               }
5208             break;
5209           case -1:
5210             /* No match. */
5211             break;
5212           default:
5213             /* Match occurred.  Construct a tag. */
5214             if (pp->name_pattern[0] != '\0')
5215               {
5216                 /* Make a named tag. */
5217                 char *name = substitute (lbp->buffer,
5218                                          pp->name_pattern, &pp->regs);
5219                 if (name != NULL)
5220                   pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5221               }
5222             else
5223               {
5224                 /* Make an unnamed tag. */
5225                 pfnote ((char *)NULL, TRUE,
5226                         lbp->buffer, match, lineno, linecharno);
5227               }
5228             break;
5229           }
5230       }
5231 #endif /* ETAGS_REGEXPS */
5232
5233   return result;
5234 }
5235
5236 \f
5237 /*
5238  * Return a pointer to a space of size strlen(cp)+1 allocated
5239  * with xnew where the string CP has been copied.
5240  */
5241 static char *
5242 savestr (cp)
5243      char *cp;
5244 {
5245   return savenstr (cp, strlen (cp));
5246 }
5247
5248 /*
5249  * Return a pointer to a space of size LEN+1 allocated with xnew where
5250  * the string CP has been copied for at most the first LEN characters.
5251  */
5252 static char *
5253 savenstr (cp, len)
5254      char *cp;
5255      int len;
5256 {
5257   register char *dp;
5258
5259   dp = xnew (len + 1, char);
5260   strncpy (dp, cp, len);
5261   dp[len] = '\0';
5262   return dp;
5263 }
5264
5265 /*
5266  * Return the ptr in sp at which the character c last
5267  * appears; NULL if not found
5268  *
5269  * Identical to POSIX strrchr, included for portability.
5270  */
5271 static char *
5272 etags_strrchr (sp, c)
5273      register const char *sp;
5274      register int c;
5275 {
5276   register const char *r;
5277
5278   r = NULL;
5279   do
5280     {
5281       if (*sp == c)
5282         r = sp;
5283   } while (*sp++);
5284   return (char *)r;
5285 }
5286
5287
5288 /*
5289  * Return the ptr in sp at which the character c first
5290  * appears; NULL if not found
5291  *
5292  * Identical to POSIX strchr, included for portability.
5293  */
5294 static char *
5295 etags_strchr (sp, c)
5296      register const char *sp;
5297      register int c;
5298 {
5299   do
5300     {
5301       if (*sp == c)
5302         return (char *)sp;
5303     } while (*sp++);
5304   return NULL;
5305 }
5306
5307 /* Skip spaces, return new pointer. */
5308 static char *
5309 skip_spaces (cp)
5310      char *cp;
5311 {
5312   while (iswhite (*cp))
5313     cp++;
5314   return cp;
5315 }
5316
5317 /* Skip non spaces, return new pointer. */
5318 static char *
5319 skip_non_spaces (cp)
5320      char *cp;
5321 {
5322   while (*cp != '\0' && !iswhite (*cp))
5323     cp++;
5324   return cp;
5325 }
5326
5327 /* Print error message and exit.  */
5328 void
5329 fatal (s1, s2)
5330      char *s1, *s2;
5331 {
5332   error (s1, s2);
5333   exit (BAD);
5334 }
5335
5336 static void
5337 pfatal (s1)
5338      char *s1;
5339 {
5340   perror (s1);
5341   exit (BAD);
5342 }
5343
5344 static void
5345 suggest_asking_for_help ()
5346 {
5347   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5348            progname,
5349 #ifdef LONG_OPTIONS
5350            "--help"
5351 #else
5352            "-h"
5353 #endif
5354            );
5355   exit (BAD);
5356 }
5357
5358 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
5359 static void
5360 error (s1, s2)
5361      const char *s1, *s2;
5362 {
5363   fprintf (stderr, "%s: ", progname);
5364   fprintf (stderr, s1, s2);
5365   fprintf (stderr, "\n");
5366 }
5367
5368 /* Return a newly-allocated string whose contents
5369    concatenate those of s1, s2, s3.  */
5370 static char *
5371 concat (s1, s2, s3)
5372      char *s1, *s2, *s3;
5373 {
5374   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5375   char *result = xnew (len1 + len2 + len3 + 1, char);
5376
5377   strcpy (result, s1);
5378   strcpy (result + len1, s2);
5379   strcpy (result + len1 + len2, s3);
5380   result[len1 + len2 + len3] = '\0';
5381
5382   return result;
5383 }
5384
5385 \f
5386 /* Does the same work as the system V getcwd, but does not need to
5387    guess the buffer size in advance. */
5388 static char *
5389 etags_getcwd ()
5390 {
5391 #ifdef HAVE_GETCWD
5392   int bufsize = 200;
5393   char *path = xnew (bufsize, char);
5394
5395   while (getcwd (path, bufsize) == NULL)
5396     {
5397       if (errno != ERANGE)
5398         pfatal ("getcwd");
5399       bufsize *= 2;
5400       free (path);
5401       path = xnew (bufsize, char);
5402     }
5403
5404   canonicalize_filename (path);
5405   return path;
5406
5407 #else /* not HAVE_GETCWD */
5408 #ifdef MSDOS
5409
5410   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
5411
5412   getwd (path);
5413
5414   for (p = path; *p != '\0'; p++)
5415     if (*p == '\\')
5416       *p = '/';
5417     else
5418       *p = lowcase (*p);
5419
5420   return strdup (path);
5421 #else /* not MSDOS */
5422   linebuffer path;
5423   FILE *pipe;
5424
5425   initbuffer (&path);
5426   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5427   if (pipe == NULL || readline_internal (&path, pipe) == 0)
5428     pfatal ("pwd");
5429   pclose (pipe);
5430
5431   return path.buffer;
5432 #endif /* not MSDOS */
5433 #endif /* not HAVE_GETCWD */
5434 }
5435
5436 /* Return a newly allocated string containing the file name of FILE
5437    relative to the absolute directory DIR (which should end with a slash). */
5438 static char *
5439 relative_filename (file, dir)
5440      char *file, *dir;
5441 {
5442   char *fp, *dp, *afn, *res;
5443   int i;
5444
5445   /* Find the common root of file and dir (with a trailing slash). */
5446   afn = absolute_filename (file, cwd);
5447   fp = afn;
5448   dp = dir;
5449   while (*fp++ == *dp++)
5450     continue;
5451   fp--, dp--;                   /* back to the first differing char */
5452 #ifdef DOS_NT
5453   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5454     return afn;
5455 #endif
5456   do                            /* look at the equal chars until '/' */
5457     fp--, dp--;
5458   while (*fp != '/');
5459
5460   /* Build a sequence of "../" strings for the resulting relative file name. */
5461   i = 0;
5462   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5463     i += 1;
5464   res = xnew (3*i + strlen (fp + 1) + 1, char);
5465   res[0] = '\0';
5466   while (i-- > 0)
5467     strcat (res, "../");
5468
5469   /* Add the file name relative to the common root of file and dir. */
5470   strcat (res, fp + 1);
5471   free (afn);
5472
5473   return res;
5474 }
5475
5476 /* Return a newly allocated string containing the absolute file name
5477    of FILE given DIR (which should end with a slash). */
5478 static char *
5479 absolute_filename (file, dir)
5480      char *file, *dir;
5481 {
5482   char *slashp, *cp, *res;
5483
5484   if (filename_is_absolute (file))
5485     res = savestr (file);
5486 #ifdef DOS_NT
5487   /* We don't support non-absolute file names with a drive
5488      letter, like `d:NAME' (it's too much hassle).  */
5489   else if (file[1] == ':')
5490     fatal ("%s: relative file names with drive letters not supported", file);
5491 #endif
5492   else
5493     res = concat (dir, file, "");
5494
5495   /* Delete the "/dirname/.." and "/." substrings. */
5496   slashp = etags_strchr (res, '/');
5497   while (slashp != NULL && slashp[0] != '\0')
5498     {
5499       if (slashp[1] == '.')
5500         {
5501           if (slashp[2] == '.'
5502               && (slashp[3] == '/' || slashp[3] == '\0'))
5503             {
5504               cp = slashp;
5505               do
5506                 cp--;
5507               while (cp >= res && !filename_is_absolute (cp));
5508               if (cp < res)
5509                 cp = slashp;    /* the absolute name begins with "/.." */
5510 #ifdef DOS_NT
5511               /* Under MSDOS and NT we get `d:/NAME' as absolute
5512                  file name, so the luser could say `d:/../NAME'.
5513                  We silently treat this as `d:/NAME'.  */
5514               else if (cp[0] != '/')
5515                 cp = slashp;
5516 #endif
5517               strcpy (cp, slashp + 3);
5518               slashp = cp;
5519               continue;
5520             }
5521           else if (slashp[2] == '/' || slashp[2] == '\0')
5522             {
5523               strcpy (slashp, slashp + 2);
5524               continue;
5525             }
5526         }
5527
5528       slashp = etags_strchr (slashp + 1, '/');
5529     }
5530
5531   if (res[0] == '\0')
5532     return savestr ("/");
5533   else
5534     return res;
5535 }
5536
5537 /* Return a newly allocated string containing the absolute
5538    file name of dir where FILE resides given DIR (which should
5539    end with a slash). */
5540 static char *
5541 absolute_dirname (file, dir)
5542      char *file, *dir;
5543 {
5544   char *slashp, *res;
5545   char save;
5546
5547   canonicalize_filename (file);
5548   slashp = etags_strrchr (file, '/');
5549   if (slashp == NULL)
5550     return savestr (dir);
5551   save = slashp[1];
5552   slashp[1] = '\0';
5553   res = absolute_filename (file, dir);
5554   slashp[1] = save;
5555
5556   return res;
5557 }
5558
5559 /* Whether the argument string is an absolute file name.  The argument
5560    string must have been canonicalized with canonicalize_filename. */
5561 static bool
5562 filename_is_absolute (fn)
5563      char *fn;
5564 {
5565   return (fn[0] == '/'
5566 #ifdef DOS_NT
5567           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
5568 #endif
5569           );
5570 }
5571
5572 /* Translate backslashes into slashes.  Works in place. */
5573 static void
5574 canonicalize_filename (fn)
5575      register char *fn;
5576 {
5577 #ifdef DOS_NT
5578   /* Canonicalize drive letter case.  */
5579   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
5580     fn[0] = upcase (fn[0]);
5581   /* Convert backslashes to slashes.  */
5582   for (; *fn != '\0'; fn++)
5583     if (*fn == '\\')
5584       *fn = '/';
5585 #else
5586   /* No action. */
5587   fn = NULL;                    /* shut up the compiler */
5588 #endif
5589 }
5590
5591 /* Set the minimum size of a string contained in a linebuffer. */
5592 static void
5593 linebuffer_setlen (lbp, toksize)
5594      linebuffer *lbp;
5595      int toksize;
5596 {
5597   while (lbp->size <= toksize)
5598     {
5599       lbp->size *= 2;
5600       xrnew (lbp->buffer, lbp->size, char);
5601     }
5602   lbp->len = toksize;
5603 }
5604
5605 /* Like malloc but get fatal error if memory is exhausted.  */
5606 long *
5607 xmalloc (size)
5608      unsigned int size;
5609 {
5610   long *result = (long *) malloc (size);
5611   if (result == NULL)
5612     fatal ("virtual memory exhausted", (char *)NULL);
5613   return result;
5614 }
5615
5616 long *
5617 xrealloc (ptr, size)
5618      char *ptr;
5619      unsigned int size;
5620 {
5621   long *result =  (long *) realloc (ptr, size);
5622   if (result == NULL)
5623     fatal ("virtual memory exhausted", (char *)NULL);
5624   return result;
5625 }