lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
   3    Free Software Foundation, Inc. and Ken Arnold
   4
   5 This file is not considered part of GNU Emacs.
   6
   7 This program is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 This program is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with this program; if not, write to the Free Software Foundation,
  19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  20
  21 /*
  22  * Authors:
  23  *      Ctags originally by Ken Arnold.
  24  *      Fortran added by Jim Kleckner.
  25  *      Ed Pelegri-Llopart added C typedefs.
  26  *      Gnu Emacs TAGS format and modifications by RMS?
  27  * 1989 Sam Kendall added C++.
  28  * 1992 Joseph B. Wells improved C and C++ parsing.
  29  * 1993 Francesco Potortì reorganised C and C++.
  30  * 1994 Regexp tags by Tom Tromey.
  31  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  32  * 2002 #line directives by Francesco Potortì.
  33  *
  34  *      Francesco Potortì <pot@gnu.org> has maintained it since 1993.
  35  */
  36
  37 char pot_etags_version[] = "@(#) pot revision number is 16.10";
  38
  39 #define TRUE    1
  40 #define FALSE   0
  41
  42 #ifdef DEBUG
  43 #  undef DEBUG
  44 #  define DEBUG TRUE
  45 #else
  46 #  define DEBUG  FALSE
  47 #  define NDEBUG                /* disable assert */
  48 #endif
  49
  50 #ifdef HAVE_CONFIG_H
  51 # include <config.h>
  52   /* On some systems, Emacs defines static as nothing for the sake
  53      of unexec.  We don't want that here since we don't use unexec. */
  54 # undef static
  55 # define ETAGS_REGEXPS          /* use the regexp features */
  56 # define LONG_OPTIONS           /* accept long options */
  57 # ifndef PTR                    /* for Xemacs */
  58 #   define PTR void *
  59 # endif
  60 # ifndef __P                    /* for Xemacs */
  61 #   define __P(args) args
  62 # endif
  63 #else
  64 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  65 #   define __P(args) args       /* use prototypes */
  66 #   define PTR void *           /* for generic pointers */
  67 # else
  68 #   define __P(args) ()         /* no prototypes */
  69 #   define const                /* remove const for old compilers' sake */
  70 #   define PTR long *           /* don't use void* */
  71 # endif
  72 #endif /* !HAVE_CONFIG_H */
  73
  74 #ifndef _GNU_SOURCE
  75 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  76 #endif
  77
  78 /* WIN32_NATIVE is for Xemacs.
  79    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  80 #ifdef WIN32_NATIVE
  81 # undef MSDOS
  82 # undef  WINDOWSNT
  83 # define WINDOWSNT
  84 #endif /* WIN32_NATIVE */
  85
  86 #ifdef MSDOS
  87 # undef MSDOS
  88 # define MSDOS TRUE
  89 # include <fcntl.h>
  90 # include <sys/param.h>
  91 # include <io.h>
  92 # ifndef HAVE_CONFIG_H
  93 #   define DOS_NT
  94 #   include <sys/config.h>
  95 # endif
  96 #else
  97 # define MSDOS FALSE
  98 #endif /* MSDOS */
  99
 100 #ifdef WINDOWSNT
 101 # include <stdlib.h>
 102 # include <fcntl.h>
 103 # include <string.h>
 104 # include <direct.h>
 105 # include <io.h>
 106 # define MAXPATHLEN _MAX_PATH
 107 # undef HAVE_NTGUI
 108 # undef  DOS_NT
 109 # define DOS_NT
 110 # ifndef HAVE_GETCWD
 111 #   define HAVE_GETCWD
 112 # endif /* undef HAVE_GETCWD */
 113 #else /* !WINDOWSNT */
 114 # ifdef STDC_HEADERS
 115 #  include <stdlib.h>
 116 #  include <string.h>
 117 # else
 118     extern char *getenv ();
 119 # endif
 120 #endif /* !WINDOWSNT */
 121
 122 #ifdef HAVE_UNISTD_H
 123 # include <unistd.h>
 124 #else
 125 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 126     extern char *getcwd (char *buf, size_t size);
 127 # endif
 128 #endif /* HAVE_UNISTD_H */
 129
 130 #include <stdio.h>
 131 #include <ctype.h>
 132 #include <errno.h>
 133 #ifndef errno
 134   extern int errno;
 135 #endif
 136 #include <sys/types.h>
 137 #include <sys/stat.h>
 138
 139 #include <assert.h>
 140 #ifdef NDEBUG
 141 # undef  assert                 /* some systems have a buggy assert.h */
 142 # define assert(x) ((void) 0)
 143 #endif
 144
 145 #if !defined (S_ISREG) && defined (S_IFREG)
 146 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 147 #endif
 148
 149 #ifdef LONG_OPTIONS
 150 # include <getopt.h>
 151 #else
 152 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 153   extern char *optarg;
 154   extern int optind, opterr;
 155 #endif /* LONG_OPTIONS */
 156
 157 #ifdef ETAGS_REGEXPS
 158 # ifndef HAVE_CONFIG_H          /* this is a standalone compilation */
 159 #   ifdef __CYGWIN__            /* compiling on Cygwin */
 160                              !!! NOTICE !!!
 161  the regex.h distributed with Cygwin is not compatible with etags, alas!
 162 If you want regular expression support, you should delete this notice and
 163               arrange to use the GNU regex.h and regex.c.
 164 #   endif
 165 # endif
 166 # include <regex.h>
 167 #endif /* ETAGS_REGEXPS */
 168
 169 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 170  Leave it undefined to make the program "etags", which makes emacs-style
 171  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 172 #ifdef CTAGS
 173 # undef  CTAGS
 174 # define CTAGS TRUE
 175 #else
 176 # define CTAGS FALSE
 177 #endif
 178
 179 /* Exit codes for success and failure.  */
 180 #ifdef VMS
 181 # define        GOOD    1
 182 # define        BAD     0
 183 #else
 184 # define        GOOD    0
 185 # define        BAD     1
 186 #endif
 187
 188 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 189 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 190
 191 #define CHARS 256               /* 2^sizeof(char) */
 192 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 193 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white */
 194 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name */
 195 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token */
 196 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token */
 197 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens */
 198
 199 #define ISALNUM(c)      isalnum (CHAR(c))
 200 #define ISALPHA(c)      isalpha (CHAR(c))
 201 #define ISDIGIT(c)      isdigit (CHAR(c))
 202 #define ISLOWER(c)      islower (CHAR(c))
 203
 204 #define lowcase(c)      tolower (CHAR(c))
 205 #define upcase(c)       toupper (CHAR(c))
 206
 207
 208 /*
 209  *      xnew, xrnew -- allocate, reallocate storage
 210  *
 211  * SYNOPSIS:    Type *xnew (int n, Type);
 212  *              void xrnew (OldPointer, int n, Type);
 213  */
 214 #if DEBUG
 215 # include "chkmalloc.h"
 216 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 217                                                   (n) * sizeof (Type)))
 218 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 219                                         (char *) (op), (n) * sizeof (Type)))
 220 #else
 221 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 222 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 223                                         (char *) (op), (n) * sizeof (Type)))
 224 #endif
 225
 226 #define bool int
 227
 228 typedef void Lang_function __P((FILE *));
 229
 230 typedef struct
 231 {
 232   char *suffix;                 /* file name suffix for this compressor */
 233   char *command;                /* takes one arg and decompresses to stdout */
 234 } compressor;
 235
 236 typedef struct
 237 {
 238   char *name;                   /* language name */
 239   bool metasource;              /* source used to generate other sources */
 240   Lang_function *function;      /* parse function */
 241   char **filenames;             /* names of this language's files */
 242   char **suffixes;              /* name suffixes of this language's files */
 243   char **interpreters;          /* interpreters for this language */
 244 } language;
 245
 246 typedef struct fdesc
 247 {
 248   struct fdesc *next;           /* for the linked list */
 249   char *infname;                /* uncompressed input file name */
 250   char *infabsname;             /* absolute uncompressed input file name */
 251   char *infabsdir;              /* absolute dir of input file */
 252   char *taggedfname;            /* file name to write in tagfile */
 253   language *lang;               /* language of file */
 254   char *prop;                   /* file properties to write in tagfile */
 255   bool usecharno;               /* etags tags shall contain char number */
 256 } fdesc;
 257
 258 typedef struct node_st
 259 {                               /* sorting structure */
 260   struct node_st *left, *right; /* left and right sons */
 261   fdesc *fdp;                   /* description of file to whom tag belongs */
 262   char *name;                   /* tag name */
 263   char *pat;                    /* search pattern */
 264   bool valid;                   /* write this tag on the tag file */
 265   bool is_func;                 /* function tag: use pattern in CTAGS mode */
 266   bool been_warned;             /* warning already given for duplicated tag */
 267   int lno;                      /* line number tag is on */
 268   long cno;                     /* character number line starts on */
 269 } node;
 270
 271 /*
 272  * A `linebuffer' is a structure which holds a line of text.
 273  * `readline_internal' reads a line from a stream into a linebuffer
 274  * and works regardless of the length of the line.
 275  * SIZE is the size of BUFFER, LEN is the length of the string in
 276  * BUFFER after readline reads it.
 277  */
 278 typedef struct
 279 {
 280   long size;
 281   int len;
 282   char *buffer;
 283 } linebuffer;
 284
 285 /* Used to support mixing of --lang and file names. */
 286 typedef struct
 287 {
 288   enum {
 289     at_language,                /* a language specification */
 290     at_regexp,                  /* a regular expression */
 291     at_icregexp,                /* same, but with case ignored */
 292     at_filename,                /* a file name */
 293     at_stdin                    /* read from stdin here */
 294   } arg_type;                   /* argument type */
 295   language *lang;               /* language associated with the argument */
 296   char *what;                   /* the argument itself */
 297 } argument;
 298
 299 #ifdef ETAGS_REGEXPS
 300 /* Structure defining a regular expression. */
 301 typedef struct pattern
 302 {
 303   struct pattern *p_next;
 304   language *lang;
 305   char *regex;
 306   struct re_pattern_buffer *pat;
 307   struct re_registers regs;
 308   char *name_pattern;
 309   bool error_signaled;
 310   bool ignore_case;
 311 } pattern;
 312 #endif /* ETAGS_REGEXPS */
 313
 314
 315 /* Many compilers barf on this:
 316         Lang_function Ada_funcs;
 317    so let's write it this way */
 318 static void Ada_funcs __P((FILE *));
 319 static void Asm_labels __P((FILE *));
 320 static void C_entries __P((int c_ext, FILE *));
 321 static void default_C_entries __P((FILE *));
 322 static void plain_C_entries __P((FILE *));
 323 static void Cjava_entries __P((FILE *));
 324 static void Cobol_paragraphs __P((FILE *));
 325 static void Cplusplus_entries __P((FILE *));
 326 static void Cstar_entries __P((FILE *));
 327 static void Erlang_functions __P((FILE *));
 328 static void Fortran_functions __P((FILE *));
 329 static void Yacc_entries __P((FILE *));
 330 static void Lisp_functions __P((FILE *));
 331 static void Makefile_targets __P((FILE *));
 332 static void Pascal_functions __P((FILE *));
 333 static void Perl_functions __P((FILE *));
 334 static void PHP_functions __P((FILE *));
 335 static void Postscript_functions __P((FILE *));
 336 static void Prolog_functions __P((FILE *));
 337 static void Python_functions __P((FILE *));
 338 static void Scheme_functions __P((FILE *));
 339 static void TeX_commands __P((FILE *));
 340 static void Texinfo_nodes __P((FILE *));
 341 static void just_read_file __P((FILE *));
 342
 343 static void print_language_names __P((void));
 344 static void print_version __P((void));
 345 static void print_help __P((void));
 346 int main __P((int, char **));
 347
 348 static compressor *get_compressor_from_suffix __P((char *, char **));
 349 static language *get_language_from_langname __P((const char *));
 350 static language *get_language_from_interpreter __P((char *));
 351 static language *get_language_from_filename __P((char *, bool));
 352 static void readline __P((linebuffer *, FILE *));
 353 static long readline_internal __P((linebuffer *, FILE *));
 354 static bool nocase_tail __P((char *));
 355 static char *get_tag __P((char *));
 356
 357 #ifdef ETAGS_REGEXPS
 358 static void analyse_regex __P((char *, bool));
 359 static void add_regex __P((char *, bool, language *));
 360 static void free_patterns __P((void));
 361 #endif /* ETAGS_REGEXPS */
 362 static void error __P((const char *, const char *));
 363 static void suggest_asking_for_help __P((void));
 364 void fatal __P((char *, char *));
 365 static void pfatal __P((char *));
 366 static void add_node __P((node *, node **));
 367
 368 static void init __P((void));
 369 static void initbuffer __P((linebuffer *));
 370 static void process_file_name __P((char *, language *));
 371 static void process_file __P((FILE *, char *, language *));
 372 static void find_entries __P((FILE *));
 373 static void free_tree __P((node *));
 374 static void free_fdesc __P((fdesc *));
 375 static void pfnote __P((char *, bool, char *, int, int, long));
 376 static void new_pfnote __P((char *, int, bool, char *, int, int, long));
 377 static void invalidate_nodes __P((fdesc *, node **));
 378 static void put_entries __P((node *));
 379
 380 static char *concat __P((char *, char *, char *));
 381 static char *skip_spaces __P((char *));
 382 static char *skip_non_spaces __P((char *));
 383 static char *savenstr __P((char *, int));
 384 static char *savestr __P((char *));
 385 static char *etags_strchr __P((const char *, int));
 386 static char *etags_strrchr __P((const char *, int));
 387 static bool strcaseeq __P((const char *, const char *));
 388 static char *etags_getcwd __P((void));
 389 static char *relative_filename __P((char *, char *));
 390 static char *absolute_filename __P((char *, char *));
 391 static char *absolute_dirname __P((char *, char *));
 392 static bool filename_is_absolute __P((char *f));
 393 static void canonicalize_filename __P((char *));
 394 static void linebuffer_setlen __P((linebuffer *, int));
 395 static PTR xmalloc __P((unsigned int));
 396 static PTR xrealloc __P((char *, unsigned int));
 397
 398 \f
 399 static char searchar = '/';     /* use /.../ searches */
 400
 401 static char *tagfile;           /* output file */
 402 static char *progname;          /* name this program was invoked with */
 403 static char *cwd;               /* current working directory */
 404 static char *tagfiledir;        /* directory of tagfile */
 405 static FILE *tagf;              /* ioptr for tags file */
 406
 407 static fdesc *fdhead;           /* head of file description list */
 408 static fdesc *curfdp;           /* current file description */
 409 static int lineno;              /* line number of current line */
 410 static long charno;             /* current character number */
 411 static long linecharno;         /* charno of start of current line */
 412 static char *dbp;               /* pointer to start of current tag */
 413
 414 static const int invalidcharno = -1;
 415
 416 static node *nodehead;          /* the head of the binary tree of tags */
 417 static node *last_node;         /* the last node created */
 418
 419 static linebuffer lb;           /* the current line */
 420
 421 /* boolean "functions" (see init)       */
 422 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 423 static char
 424   /* white chars */
 425   *white = " \f\t\n\r\v",
 426   /* not in a name */
 427   *nonam = " \f\t\n\r()=,;",
 428   /* token ending chars */
 429   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 430   /* token starting chars */
 431   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 432   /* valid in-token chars */
 433   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 434
 435 static bool append_to_tagfile;  /* -a: append to tags */
 436 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 437 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 438 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 439                                 /* 0 struct/enum/union decls, and C++ */
 440                                 /* member functions. */
 441 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 442                                 /* constants and variables. */
 443                                 /* -D: opposite of -d.  Default under ctags. */
 444 static bool globals;            /* create tags for global variables */
 445 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 446 static bool members;            /* create tags for C member variables */
 447 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 448 static bool update;             /* -u: update tags */
 449 static bool vgrind_style;       /* -v: create vgrind style index output */
 450 static bool no_warnings;        /* -w: suppress warnings */
 451 static bool cxref_style;        /* -x: create cxref style output */
 452 static bool cplusplus;          /* .[hc] means C++, not C */
 453 static bool noindentypedefs;    /* -I: ignore indentation in C */
 454 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 455
 456 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 457 static bool parsing_stdin;      /* --parse-stdin used */
 458
 459 #ifdef ETAGS_REGEXPS
 460 /* List of all regexps. */
 461 static pattern *p_head;
 462
 463 /* How many characters in the character set.  (From regex.c.)  */
 464 #define CHAR_SET_SIZE 256
 465 /* Translation table for case-insensitive matching. */
 466 static char lc_trans[CHAR_SET_SIZE];
 467 #endif /* ETAGS_REGEXPS */
 468
 469 #ifdef LONG_OPTIONS
 470 static struct option longopts[] =
 471 {
 472   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 473   { "c++",                no_argument,       NULL,               'C'   },
 474   { "declarations",       no_argument,       &declarations,      TRUE  },
 475   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 476   { "help",               no_argument,       NULL,               'h'   },
 477   { "help",               no_argument,       NULL,               'H'   },
 478   { "ignore-indentation", no_argument,       NULL,               'I'   },
 479   { "language",           required_argument, NULL,               'l'   },
 480   { "members",            no_argument,       &members,           TRUE  },
 481   { "no-members",         no_argument,       &members,           FALSE },
 482   { "output",             required_argument, NULL,               'o'   },
 483 #ifdef ETAGS_REGEXPS
 484   { "regex",              required_argument, NULL,               'r'   },
 485   { "no-regex",           no_argument,       NULL,               'R'   },
 486   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 487 #endif /* ETAGS_REGEXPS */
 488   { "parse-stdin",        required_argument, NULL,               STDIN },
 489   { "version",            no_argument,       NULL,               'V'   },
 490
 491 #if CTAGS /* Etags options */
 492   { "backward-search",    no_argument,       NULL,               'B'   },
 493   { "cxref",              no_argument,       NULL,               'x'   },
 494   { "defines",            no_argument,       NULL,               'd'   },
 495   { "globals",            no_argument,       &globals,           TRUE  },
 496   { "typedefs",           no_argument,       NULL,               't'   },
 497   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 498   { "update",             no_argument,       NULL,               'u'   },
 499   { "vgrind",             no_argument,       NULL,               'v'   },
 500   { "no-warn",            no_argument,       NULL,               'w'   },
 501
 502 #else /* Ctags options */
 503   { "append",             no_argument,       NULL,               'a'   },
 504   { "no-defines",         no_argument,       NULL,               'D'   },
 505   { "no-globals",         no_argument,       &globals,           FALSE },
 506   { "include",            required_argument, NULL,               'i'   },
 507 #endif
 508   { NULL }
 509 };
 510 #endif /* LONG_OPTIONS */
 511
 512 static compressor compressors[] =
 513 {
 514   { "z", "gzip -d -c"},
 515   { "Z", "gzip -d -c"},
 516   { "gz", "gzip -d -c"},
 517   { "GZ", "gzip -d -c"},
 518   { "bz2", "bzip2 -d -c" },
 519   { NULL }
 520 };
 521
 522 /*
 523  * Language stuff.
 524  */
 525
 526 /* Ada code */
 527 static char *Ada_suffixes [] =
 528   { "ads", "adb", "ada", NULL };
 529
 530 /* Assembly code */
 531 static char *Asm_suffixes [] =
 532   { "a",        /* Unix assembler */
 533     "asm", /* Microcontroller assembly */
 534     "def", /* BSO/Tasking definition includes  */
 535     "inc", /* Microcontroller include files */
 536     "ins", /* Microcontroller include files */
 537     "s", "sa", /* Unix assembler */
 538     "S",   /* cpp-processed Unix assembler */
 539     "src", /* BSO/Tasking C compiler output */
 540     NULL
 541   };
 542
 543 /* Note that .c and .h can be considered C++, if the --c++ flag was
 544    given, or if the `class' keyowrd is met inside the file.
 545    That is why default_C_entries is called for these. */
 546 static char *default_C_suffixes [] =
 547   { "c", "h", NULL };
 548
 549 static char *Cplusplus_suffixes [] =
 550   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 551     "M",                        /* Objective C++ */
 552     "pdb",                      /* Postscript with C syntax */
 553     NULL };
 554
 555 static char *Cjava_suffixes [] =
 556   { "java", NULL };
 557
 558 static char *Cobol_suffixes [] =
 559   { "COB", "cob", NULL };
 560
 561 static char *Cstar_suffixes [] =
 562   { "cs", "hs", NULL };
 563
 564 static char *Erlang_suffixes [] =
 565   { "erl", "hrl", NULL };
 566
 567 static char *Fortran_suffixes [] =
 568   { "F", "f", "f90", "for", NULL };
 569
 570 static char *Lisp_suffixes [] =
 571   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 572
 573 static char *Makefile_filenames [] =
 574   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 575
 576 static char *Pascal_suffixes [] =
 577   { "p", "pas", NULL };
 578
 579 static char *Perl_suffixes [] =
 580   { "pl", "pm", NULL };
 581
 582 static char *Perl_interpreters [] =
 583   { "perl", "@PERL@", NULL };
 584
 585 static char *PHP_suffixes [] =
 586   { "php", "php3", "php4", NULL };
 587
 588 static char *plain_C_suffixes [] =
 589   { "lm",                       /* Objective lex file */
 590     "m",                        /* Objective C file */
 591     "pc",                       /* Pro*C file */
 592      NULL };
 593
 594 static char *Postscript_suffixes [] =
 595   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 596
 597 static char *Prolog_suffixes [] =
 598   { "prolog", NULL };
 599
 600 static char *Python_suffixes [] =
 601   { "py", NULL };
 602
 603 /* Can't do the `SCM' or `scm' prefix with a version number. */
 604 static char *Scheme_suffixes [] =
 605   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 606
 607 static char *TeX_suffixes [] =
 608   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 609
 610 static char *Texinfo_suffixes [] =
 611   { "texi", "texinfo", "txi", NULL };
 612
 613 static char *Yacc_suffixes [] =
 614   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 615
 616 /*
 617  * Table of languages.
 618  *
 619  * It is ok for a given function to be listed under more than one
 620  * name.  I just didn't.
 621  */
 622
 623 static language lang_names [] =
 624 {
 625   { "ada",      FALSE, Ada_funcs,            NULL, Ada_suffixes,        NULL },
 626   { "asm",      FALSE, Asm_labels,           NULL, Asm_suffixes,        NULL },
 627   { "c",        FALSE, default_C_entries,    NULL, default_C_suffixes,  NULL },
 628   { "c++",      FALSE, Cplusplus_entries,    NULL, Cplusplus_suffixes,  NULL },
 629   { "c*",       FALSE, Cstar_entries,        NULL, Cstar_suffixes,      NULL },
 630   { "cobol",    FALSE, Cobol_paragraphs,     NULL, Cobol_suffixes,      NULL },
 631   { "erlang",   FALSE, Erlang_functions,     NULL, Erlang_suffixes,     NULL },
 632   { "fortran",  FALSE, Fortran_functions,    NULL, Fortran_suffixes,    NULL },
 633   { "java",     FALSE, Cjava_entries,        NULL, Cjava_suffixes,      NULL },
 634   { "lisp",     FALSE, Lisp_functions,       NULL, Lisp_suffixes,       NULL },
 635   { "makefile", FALSE, Makefile_targets,     Makefile_filenames, NULL,  NULL },
 636   { "pascal",   FALSE, Pascal_functions,     NULL, Pascal_suffixes,     NULL },
 637   { "perl",     FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
 638   { "php",      FALSE, PHP_functions,        NULL, PHP_suffixes,        NULL },
 639   { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
 640   { "proc",     FALSE, plain_C_entries,      NULL, plain_C_suffixes,    NULL },
 641   { "prolog",   FALSE, Prolog_functions,     NULL, Prolog_suffixes,     NULL },
 642   { "python",   FALSE, Python_functions,     NULL, Python_suffixes,     NULL },
 643   { "scheme",   FALSE, Scheme_functions,     NULL, Scheme_suffixes,     NULL },
 644   { "tex",      FALSE, TeX_commands,         NULL, TeX_suffixes,        NULL },
 645   { "texinfo",  FALSE, Texinfo_nodes,        NULL, Texinfo_suffixes,    NULL },
 646   { "yacc",      TRUE, Yacc_entries,         NULL, Yacc_suffixes,       NULL },
 647   { "auto", FALSE, NULL },             /* default guessing scheme */
 648   { "none", FALSE, just_read_file },   /* regexp matching only */
 649   { NULL, FALSE, NULL }                /* end of list */
 650 };
 651
 652 \f
 653 static void
 654 print_language_names ()
 655 {
 656   language *lang;
 657   char **name, **ext;
 658
 659   puts ("\nThese are the currently supported languages, along with the\n\
 660 default file names and dot suffixes:");
 661   for (lang = lang_names; lang->name != NULL; lang++)
 662     {
 663       printf ("  %-*s", 10, lang->name);
 664       if (lang->filenames != NULL)
 665         for (name = lang->filenames; *name != NULL; name++)
 666           printf (" %s", *name);
 667       if (lang->suffixes != NULL)
 668         for (ext = lang->suffixes; *ext != NULL; ext++)
 669           printf (" .%s", *ext);
 670       puts ("");
 671     }
 672   puts ("Where `auto' means use default language for files based on file\n\
 673 name suffix, and `none' means only do regexp processing on files.\n\
 674 If no language is specified and no matching suffix is found,\n\
 675 the first line of the file is read for a sharp-bang (#!) sequence\n\
 676 followed by the name of an interpreter.  If no such sequence is found,\n\
 677 Fortran is tried first; if no tags are found, C is tried next.\n\
 678 When parsing any C file, a \"class\" keyword switches to C++.\n\
 679 Compressed files are supported using gzip and bzip2.");
 680 }
 681
 682 #ifndef EMACS_NAME
 683 # define EMACS_NAME "GNU Emacs"
 684 #endif
 685 #ifndef VERSION
 686 # define VERSION "21"
 687 #endif
 688 static void
 689 print_version ()
 690 {
 691   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 692   puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
 693   puts ("This program is distributed under the same terms as Emacs");
 694
 695   exit (GOOD);
 696 }
 697
 698 static void
 699 print_help ()
 700 {
 701   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 702 \n\
 703 These are the options accepted by %s.\n", progname, progname);
 704 #ifdef LONG_OPTIONS
 705   puts ("You may use unambiguous abbreviations for the long option names.");
 706 #else
 707   puts ("Long option names do not work with this executable, as it is not\n\
 708 linked with GNU getopt.");
 709 #endif /* LONG_OPTIONS */
 710   puts ("  A - as file name means read names from stdin (one per line).\n\
 711 Absolute names are stored in the output file as they are.\n\
 712 Relative ones are stored relative to the output file's directory.\n");
 713
 714   if (!CTAGS)
 715     puts ("-a, --append\n\
 716         Append tag entries to existing tags file.");
 717
 718   puts ("--packages-only\n\
 719         For Ada files, only generate tags for packages.");
 720
 721   if (CTAGS)
 722     puts ("-B, --backward-search\n\
 723         Write the search commands for the tag entries using '?', the\n\
 724         backward-search command instead of '/', the forward-search command.");
 725
 726   /* This option is mostly obsolete, because etags can now automatically
 727      detect C++.  Retained for backward compatibility and for debugging and
 728      experimentation.  In principle, we could want to tag as C++ even
 729      before any "class" keyword.
 730   puts ("-C, --c++\n\
 731         Treat files whose name suffix defaults to C language as C++ files.");
 732   */
 733
 734   puts ("--declarations\n\
 735         In C and derived languages, create tags for function declarations,");
 736   if (CTAGS)
 737     puts ("\tand create tags for extern variables if --globals is used.");
 738   else
 739     puts
 740       ("\tand create tags for extern variables unless --no-globals is used.");
 741
 742   if (CTAGS)
 743     puts ("-d, --defines\n\
 744         Create tag entries for C #define constants and enum constants, too.");
 745   else
 746     puts ("-D, --no-defines\n\
 747         Don't create tag entries for C #define constants and enum constants.\n\
 748         This makes the tags file smaller.");
 749
 750   if (!CTAGS)
 751     puts ("-i FILE, --include=FILE\n\
 752         Include a note in tag file indicating that, when searching for\n\
 753         a tag, one should also consult the tags file FILE after\n\
 754         checking the current file.");
 755
 756   puts ("-l LANG, --language=LANG\n\
 757         Force the following files to be considered as written in the\n\
 758         named language up to the next --language=LANG option.");
 759
 760   if (CTAGS)
 761     puts ("--globals\n\
 762         Create tag entries for global variables in some languages.");
 763   else
 764     puts ("--no-globals\n\
 765         Do not create tag entries for global variables in some\n\
 766         languages.  This makes the tags file smaller.");
 767   puts ("--members\n\
 768         Create tag entries for member variables in C and derived languages.");
 769
 770 #ifdef ETAGS_REGEXPS
 771   puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
 772         Make a tag for each line matching pattern REGEXP in the following\n\
 773         files.  {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
 774         regexfile is a file containing one REGEXP per line.\n\
 775         REGEXP is anchored (as if preceded by ^).\n\
 776         The form /REGEXP/NAME/ creates a named tag.\n\
 777         For example Tcl named tags can be created with:\n\
 778         --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
 779   puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
 780         Like -r, --regex but ignore case when matching expressions.");
 781   puts ("-R, --no-regex\n\
 782         Don't create tags from regexps for the following files.");
 783 #endif /* ETAGS_REGEXPS */
 784   puts ("-I, --ignore-indentation\n\
 785         Don't rely on indentation quite as much as normal.  Currently,\n\
 786         this means not to assume that a closing brace in the first\n\
 787         column is the final brace of a function or structure\n\
 788         definition in C and C++.");
 789   puts ("-o FILE, --output=FILE\n\
 790         Write the tags to FILE.");
 791   puts ("--parse-stdin=NAME\n\
 792         Read from standard input and record tags as belonging to file NAME.");
 793
 794   if (CTAGS)
 795     {
 796       puts ("-t, --typedefs\n\
 797         Generate tag entries for C and Ada typedefs.");
 798       puts ("-T, --typedefs-and-c++\n\
 799         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 800         and C++ member functions.");
 801     }
 802
 803   if (CTAGS)
 804     puts ("-u, --update\n\
 805         Update the tag entries for the given files, leaving tag\n\
 806         entries for other files in place.  Currently, this is\n\
 807         implemented by deleting the existing entries for the given\n\
 808         files and then rewriting the new entries at the end of the\n\
 809         tags file.  It is often faster to simply rebuild the entire\n\
 810         tag file than to use this.");
 811
 812   if (CTAGS)
 813     {
 814       puts ("-v, --vgrind\n\
 815         Generates an index of items intended for human consumption,\n\
 816         similar to the output of vgrind.  The index is sorted, and\n\
 817         gives the page number of each item.");
 818       puts ("-w, --no-warn\n\
 819         Suppress warning messages about entries defined in multiple\n\
 820         files.");
 821       puts ("-x, --cxref\n\
 822         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 823         The output uses line numbers instead of page numbers, but\n\
 824         beyond that the differences are cosmetic; try both to see\n\
 825         which you like.");
 826     }
 827
 828   puts ("-V, --version\n\
 829         Print the version of the program.\n\
 830 -h, --help\n\
 831         Print this help message.");
 832
 833   print_language_names ();
 834
 835   puts ("");
 836   puts ("Report bugs to bug-gnu-emacs@gnu.org");
 837
 838   exit (GOOD);
 839 }
 840
 841 \f
 842 #ifdef VMS                      /* VMS specific functions */
 843
 844 #define EOS     '\0'
 845
 846 /* This is a BUG!  ANY arbitrary limit is a BUG!
 847    Won't someone please fix this?  */
 848 #define MAX_FILE_SPEC_LEN       255
 849 typedef struct  {
 850   short   curlen;
 851   char    body[MAX_FILE_SPEC_LEN + 1];
 852 } vspec;
 853
 854 /*
 855  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
 856  returning in each successive call the next file name matching the input
 857  spec. The function expects that each in_spec passed
 858  to it will be processed to completion; in particular, up to and
 859  including the call following that in which the last matching name
 860  is returned, the function ignores the value of in_spec, and will
 861  only start processing a new spec with the following call.
 862  If an error occurs, on return out_spec contains the value
 863  of in_spec when the error occurred.
 864
 865  With each successive file name returned in out_spec, the
 866  function's return value is one. When there are no more matching
 867  names the function returns zero. If on the first call no file
 868  matches in_spec, or there is any other error, -1 is returned.
 869 */
 870
 871 #include        <rmsdef.h>
 872 #include        <descrip.h>
 873 #define         OUTSIZE MAX_FILE_SPEC_LEN
 874 static short
 875 fn_exp (out, in)
 876      vspec *out;
 877      char *in;
 878 {
 879   static long context = 0;
 880   static struct dsc$descriptor_s o;
 881   static struct dsc$descriptor_s i;
 882   static bool pass1 = TRUE;
 883   long status;
 884   short retval;
 885
 886   if (pass1)
 887     {
 888       pass1 = FALSE;
 889       o.dsc$a_pointer = (char *) out;
 890       o.dsc$w_length = (short)OUTSIZE;
 891       i.dsc$a_pointer = in;
 892       i.dsc$w_length = (short)strlen(in);
 893       i.dsc$b_dtype = DSC$K_DTYPE_T;
 894       i.dsc$b_class = DSC$K_CLASS_S;
 895       o.dsc$b_dtype = DSC$K_DTYPE_VT;
 896       o.dsc$b_class = DSC$K_CLASS_VS;
 897     }
 898   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
 899     {
 900       out->body[out->curlen] = EOS;
 901       return 1;
 902     }
 903   else if (status == RMS$_NMF)
 904     retval = 0;
 905   else
 906     {
 907       strcpy(out->body, in);
 908       retval = -1;
 909     }
 910   lib$find_file_end(&context);
 911   pass1 = TRUE;
 912   return retval;
 913 }
 914
 915 /*
 916   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
 917   name of each file specified by the provided arg expanding wildcards.
 918 */
 919 static char *
 920 gfnames (arg, p_error)
 921      char *arg;
 922      bool *p_error;
 923 {
 924   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
 925
 926   switch (fn_exp (&filename, arg))
 927     {
 928     case 1:
 929       *p_error = FALSE;
 930       return filename.body;
 931     case 0:
 932       *p_error = FALSE;
 933       return NULL;
 934     default:
 935       *p_error = TRUE;
 936       return filename.body;
 937     }
 938 }
 939
 940 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
 941 system (cmd)
 942      char *cmd;
 943 {
 944   error ("%s", "system() function not implemented under VMS");
 945 }
 946 #endif
 947
 948 #define VERSION_DELIM   ';'
 949 char *massage_name (s)
 950      char *s;
 951 {
 952   char *start = s;
 953
 954   for ( ; *s; s++)
 955     if (*s == VERSION_DELIM)
 956       {
 957         *s = EOS;
 958         break;
 959       }
 960     else
 961       *s = lowcase (*s);
 962   return start;
 963 }
 964 #endif /* VMS */
 965
 966 \f
 967 int
 968 main (argc, argv)
 969      int argc;
 970      char *argv[];
 971 {
 972   int i;
 973   unsigned int nincluded_files;
 974   char **included_files;
 975   argument *argbuffer;
 976   int current_arg, file_count;
 977   linebuffer filename_lb;
 978 #ifdef VMS
 979   bool got_err;
 980 #endif
 981  char *optstring;
 982  int opt;
 983
 984
 985 #ifdef DOS_NT
 986   _fmode = O_BINARY;   /* all of files are treated as binary files */
 987 #endif /* DOS_NT */
 988
 989   progname = argv[0];
 990   nincluded_files = 0;
 991   included_files = xnew (argc, char *);
 992   current_arg = 0;
 993   file_count = 0;
 994
 995   /* Allocate enough no matter what happens.  Overkill, but each one
 996      is small. */
 997   argbuffer = xnew (argc, argument);
 998
 999 #ifdef ETAGS_REGEXPS
1000   /* Set syntax for regular expression routines. */
1001   re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
1002   /* Translation table for case-insensitive search. */
1003   for (i = 0; i < CHAR_SET_SIZE; i++)
1004     lc_trans[i] = lowcase (i);
1005 #endif /* ETAGS_REGEXPS */
1006
1007   /*
1008    * If etags, always find typedefs and structure tags.  Why not?
1009    * Also default to find macro constants, enum constants and
1010    * global variables.
1011    */
1012   if (!CTAGS)
1013     {
1014       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1015       globals = TRUE;
1016     }
1017
1018   optstring = "-";
1019 #ifdef ETAGS_REGEXPS
1020   optstring = "-r:Rc:";
1021 #endif /* ETAGS_REGEXPS */
1022 #ifndef LONG_OPTIONS
1023   optstring = optstring + 1;
1024 #endif /* LONG_OPTIONS */
1025   optstring = concat (optstring,
1026                       "Cf:Il:o:SVhH",
1027                       (CTAGS) ? "BxdtTuvw" : "aDi:");
1028
1029   while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1030     switch (opt)
1031       {
1032       case 0:
1033         /* If getopt returns 0, then it has already processed a
1034            long-named option.  We should do nothing.  */
1035         break;
1036
1037       case 1:
1038         /* This means that a file name has been seen.  Record it. */
1039         argbuffer[current_arg].arg_type = at_filename;
1040         argbuffer[current_arg].what     = optarg;
1041         ++current_arg;
1042         ++file_count;
1043         break;
1044
1045       case STDIN:
1046         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1047         argbuffer[current_arg].arg_type = at_stdin;
1048         argbuffer[current_arg].what     = optarg;
1049         ++current_arg;
1050         ++file_count;
1051         if (parsing_stdin)
1052           fatal ("cannot parse standard input more than once", (char *)NULL);
1053         parsing_stdin = TRUE;
1054         break;
1055
1056         /* Common options. */
1057       case 'C': cplusplus = TRUE;               break;
1058       case 'f':         /* for compatibility with old makefiles */
1059       case 'o':
1060         if (tagfile)
1061           {
1062             error ("-o option may only be given once.", (char *)NULL);
1063             suggest_asking_for_help ();
1064           }
1065         tagfile = optarg;
1066         break;
1067       case 'I':
1068       case 'S':         /* for backward compatibility */
1069         noindentypedefs = TRUE;
1070         break;
1071       case 'l':
1072         {
1073           language *lang = get_language_from_langname (optarg);
1074           if (lang != NULL)
1075             {
1076               argbuffer[current_arg].lang = lang;
1077               argbuffer[current_arg].arg_type = at_language;
1078               ++current_arg;
1079             }
1080         }
1081         break;
1082       case 'r':
1083         argbuffer[current_arg].arg_type = at_regexp;
1084         argbuffer[current_arg].what = optarg;
1085         ++current_arg;
1086         break;
1087       case 'R':
1088         argbuffer[current_arg].arg_type = at_regexp;
1089         argbuffer[current_arg].what = NULL;
1090         ++current_arg;
1091         break;
1092       case 'c':
1093         argbuffer[current_arg].arg_type = at_icregexp;
1094         argbuffer[current_arg].what = optarg;
1095         ++current_arg;
1096         break;
1097       case 'V':
1098         print_version ();
1099         break;
1100       case 'h':
1101       case 'H':
1102         print_help ();
1103         break;
1104
1105         /* Etags options */
1106       case 'a': append_to_tagfile = TRUE;                       break;
1107       case 'D': constantypedefs = FALSE;                        break;
1108       case 'i': included_files[nincluded_files++] = optarg;     break;
1109
1110         /* Ctags options. */
1111       case 'B': searchar = '?';                                 break;
1112       case 'd': constantypedefs = TRUE;                         break;
1113       case 't': typedefs = TRUE;                                break;
1114       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1115       case 'u': update = TRUE;                                  break;
1116       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1117       case 'x': cxref_style = TRUE;                             break;
1118       case 'w': no_warnings = TRUE;                             break;
1119       default:
1120         suggest_asking_for_help ();
1121       }
1122
1123   for (; optind < argc; ++optind)
1124     {
1125       argbuffer[current_arg].arg_type = at_filename;
1126       argbuffer[current_arg].what = argv[optind];
1127       ++current_arg;
1128       ++file_count;
1129     }
1130
1131   if (nincluded_files == 0 && file_count == 0)
1132     {
1133       error ("no input files specified.", (char *)NULL);
1134       suggest_asking_for_help ();
1135     }
1136
1137   if (tagfile == NULL)
1138     tagfile = CTAGS ? "tags" : "TAGS";
1139   cwd = etags_getcwd ();        /* the current working directory */
1140   if (cwd[strlen (cwd) - 1] != '/')
1141     {
1142       char *oldcwd = cwd;
1143       cwd = concat (oldcwd, "/", "");
1144       free (oldcwd);
1145     }
1146   if (streq (tagfile, "-"))
1147     tagfiledir = cwd;
1148   else
1149     tagfiledir = absolute_dirname (tagfile, cwd);
1150
1151   init ();                      /* set up boolean "functions" */
1152
1153   initbuffer (&lb);
1154   initbuffer (&filename_lb);
1155
1156   if (!CTAGS)
1157     {
1158       if (streq (tagfile, "-"))
1159         {
1160           tagf = stdout;
1161 #ifdef DOS_NT
1162           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1163              doesn't take effect until after `stdout' is already open). */
1164           if (!isatty (fileno (stdout)))
1165             setmode (fileno (stdout), O_BINARY);
1166 #endif /* DOS_NT */
1167         }
1168       else
1169         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1170       if (tagf == NULL)
1171         pfatal (tagfile);
1172     }
1173
1174   /*
1175    * Loop through files finding functions.
1176    */
1177   for (i = 0; i < current_arg; ++i)
1178     {
1179       static language *lang;    /* non-NULL if language is forced */
1180       char *this_file;
1181
1182       switch (argbuffer[i].arg_type)
1183         {
1184         case at_language:
1185           lang = argbuffer[i].lang;
1186           break;
1187 #ifdef ETAGS_REGEXPS
1188         case at_regexp:
1189           analyse_regex (argbuffer[i].what, FALSE);
1190           break;
1191         case at_icregexp:
1192           analyse_regex (argbuffer[i].what, TRUE);
1193           break;
1194 #endif
1195         case at_filename:
1196 #ifdef VMS
1197           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1198             {
1199               if (got_err)
1200                 {
1201                   error ("can't find file %s\n", this_file);
1202                   argc--, argv++;
1203                 }
1204               else
1205                 {
1206                   this_file = massage_name (this_file);
1207                 }
1208 #else
1209               this_file = argbuffer[i].what;
1210 #endif
1211               /* Input file named "-" means read file names from stdin
1212                  (one per line) and use them. */
1213               if (streq (this_file, "-"))
1214                 {
1215                   if (parsing_stdin)
1216                     fatal ("cannot parse standard input AND read file names from it",
1217                            (char *)NULL);
1218                   while (readline_internal (&filename_lb, stdin) > 0)
1219                     process_file_name (filename_lb.buffer, lang);
1220                 }
1221               else
1222                 process_file_name (this_file, lang);
1223 #ifdef VMS
1224             }
1225 #endif
1226           break;
1227         case at_stdin:
1228           this_file = argbuffer[i].what;
1229           process_file (stdin, this_file, lang);
1230           break;
1231         }
1232     }
1233
1234 #ifdef ETAGS_REGEXPS
1235   free_patterns ();
1236 #endif /* ETAGS_REGEXPS */
1237
1238   if (!CTAGS || cxref_style)
1239     {
1240       put_entries (nodehead);
1241       free_tree (nodehead);
1242       nodehead = NULL;
1243       if (!CTAGS)
1244         while (nincluded_files-- > 0)
1245           fprintf (tagf, "\f\n%s,include\n", *included_files++);
1246
1247       if (fclose (tagf) == EOF)
1248         pfatal (tagfile);
1249       exit (GOOD);
1250     }
1251
1252   if (update)
1253     {
1254       char cmd[BUFSIZ];
1255       for (i = 0; i < current_arg; ++i)
1256         {
1257           switch (argbuffer[i].arg_type)
1258             {
1259             case at_filename:
1260             case at_stdin:
1261               break;
1262             default:
1263               continue;         /* the for loop */
1264             }
1265           sprintf (cmd,
1266                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1267                    tagfile, argbuffer[i].what, tagfile);
1268           if (system (cmd) != GOOD)
1269             fatal ("failed to execute shell command", (char *)NULL);
1270         }
1271       append_to_tagfile = TRUE;
1272     }
1273
1274   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1275   if (tagf == NULL)
1276     pfatal (tagfile);
1277   put_entries (nodehead);
1278   free_tree (nodehead);
1279   nodehead = NULL;
1280   if (fclose (tagf) == EOF)
1281     pfatal (tagfile);
1282
1283   if (update)
1284     {
1285       char cmd[2*BUFSIZ+10];
1286       sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1287       exit (system (cmd));
1288     }
1289   return GOOD;
1290 }
1291
1292
1293 /*
1294  * Return a compressor given the file name.  If EXTPTR is non-zero,
1295  * return a pointer into FILE where the compressor-specific
1296  * extension begins.  If no compressor is found, NULL is returned
1297  * and EXTPTR is not significant.
1298  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1299  */
1300 static compressor *
1301 get_compressor_from_suffix (file, extptr)
1302      char *file;
1303      char **extptr;
1304 {
1305   compressor *compr;
1306   char *slash, *suffix;
1307
1308   /* This relies on FN to be after canonicalize_filename,
1309      so we don't need to consider backslashes on DOS_NT.  */
1310   slash = etags_strrchr (file, '/');
1311   suffix = etags_strrchr (file, '.');
1312   if (suffix == NULL || suffix < slash)
1313     return NULL;
1314   if (extptr != NULL)
1315     *extptr = suffix;
1316   suffix += 1;
1317   /* Let those poor souls who live with DOS 8+3 file name limits get
1318      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1319      Only the first do loop is run if not MSDOS */
1320   do
1321     {
1322       for (compr = compressors; compr->suffix != NULL; compr++)
1323         if (streq (compr->suffix, suffix))
1324           return compr;
1325       if (!MSDOS)
1326         break;                  /* do it only once: not really a loop */
1327       if (extptr != NULL)
1328         *extptr = ++suffix;
1329     } while (*suffix != '\0');
1330   return NULL;
1331 }
1332
1333
1334
1335 /*
1336  * Return a language given the name.
1337  */
1338 static language *
1339 get_language_from_langname (name)
1340      const char *name;
1341 {
1342   language *lang;
1343
1344   if (name == NULL)
1345     error ("empty language name", (char *)NULL);
1346   else
1347     {
1348       for (lang = lang_names; lang->name != NULL; lang++)
1349         if (streq (name, lang->name))
1350           return lang;
1351       error ("unknown language \"%s\"", name);
1352     }
1353
1354   return NULL;
1355 }
1356
1357
1358 /*
1359  * Return a language given the interpreter name.
1360  */
1361 static language *
1362 get_language_from_interpreter (interpreter)
1363      char *interpreter;
1364 {
1365   language *lang;
1366   char **iname;
1367
1368   if (interpreter == NULL)
1369     return NULL;
1370   for (lang = lang_names; lang->name != NULL; lang++)
1371     if (lang->interpreters != NULL)
1372       for (iname = lang->interpreters; *iname != NULL; iname++)
1373         if (streq (*iname, interpreter))
1374             return lang;
1375
1376   return NULL;
1377 }
1378
1379
1380
1381 /*
1382  * Return a language given the file name.
1383  */
1384 static language *
1385 get_language_from_filename (file, case_sensitive)
1386      char *file;
1387      bool case_sensitive;
1388 {
1389   language *lang;
1390   char **name, **ext, *suffix;
1391
1392   /* Try whole file name first. */
1393   for (lang = lang_names; lang->name != NULL; lang++)
1394     if (lang->filenames != NULL)
1395       for (name = lang->filenames; *name != NULL; name++)
1396         if ((case_sensitive)
1397             ? streq (*name, file)
1398             : strcaseeq (*name, file))
1399           return lang;
1400
1401   /* If not found, try suffix after last dot. */
1402   suffix = etags_strrchr (file, '.');
1403   if (suffix == NULL)
1404     return NULL;
1405   suffix += 1;
1406   for (lang = lang_names; lang->name != NULL; lang++)
1407     if (lang->suffixes != NULL)
1408       for (ext = lang->suffixes; *ext != NULL; ext++)
1409         if ((case_sensitive)
1410             ? streq (*ext, suffix)
1411             : strcaseeq (*ext, suffix))
1412           return lang;
1413   return NULL;
1414 }
1415
1416 \f
1417 /*
1418  * This routine is called on each file argument.
1419  */
1420 static void
1421 process_file_name (file, lang)
1422      char *file;
1423      language *lang;
1424 {
1425   struct stat stat_buf;
1426   FILE *inf;
1427   fdesc *fdp;
1428   compressor *compr;
1429   char *compressed_name, *uncompressed_name;
1430   char *ext, *real_name;
1431   int retval;
1432
1433   canonicalize_filename (file);
1434   if (streq (file, tagfile) && !streq (tagfile, "-"))
1435     {
1436       error ("skipping inclusion of %s in self.", file);
1437       return;
1438     }
1439   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1440     {
1441       compressed_name = NULL;
1442       real_name = uncompressed_name = savestr (file);
1443     }
1444   else
1445     {
1446       real_name = compressed_name = savestr (file);
1447       uncompressed_name = savenstr (file, ext - file);
1448     }
1449
1450   /* If the canonicalized uncompressed name
1451      has already been dealt with, skip it silently. */
1452   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1453     {
1454       assert (fdp->infname != NULL);
1455       if (streq (uncompressed_name, fdp->infname))
1456         goto cleanup;
1457     }
1458
1459   if (stat (real_name, &stat_buf) != 0)
1460     {
1461       /* Reset real_name and try with a different name. */
1462       real_name = NULL;
1463       if (compressed_name != NULL) /* try with the given suffix */
1464         {
1465           if (stat (uncompressed_name, &stat_buf) == 0)
1466             real_name = uncompressed_name;
1467         }
1468       else                      /* try all possible suffixes */
1469         {
1470           for (compr = compressors; compr->suffix != NULL; compr++)
1471             {
1472               compressed_name = concat (file, ".", compr->suffix);
1473               if (stat (compressed_name, &stat_buf) != 0)
1474                 {
1475                   if (MSDOS)
1476                     {
1477                       char *suf = compressed_name + strlen (file);
1478                       size_t suflen = strlen (compr->suffix) + 1;
1479                       for ( ; suf[1]; suf++, suflen--)
1480                         {
1481                           memmove (suf, suf + 1, suflen);
1482                           if (stat (compressed_name, &stat_buf) == 0)
1483                             {
1484                               real_name = compressed_name;
1485                               break;
1486                             }
1487                         }
1488                       if (real_name != NULL)
1489                         break;
1490                     } /* MSDOS */
1491                   free (compressed_name);
1492                   compressed_name = NULL;
1493                 }
1494               else
1495                 {
1496                   real_name = compressed_name;
1497                   break;
1498                 }
1499             }
1500         }
1501       if (real_name == NULL)
1502         {
1503           perror (file);
1504           goto cleanup;
1505         }
1506     } /* try with a different name */
1507
1508   if (!S_ISREG (stat_buf.st_mode))
1509     {
1510       error ("skipping %s: it is not a regular file.", real_name);
1511       goto cleanup;
1512     }
1513   if (real_name == compressed_name)
1514     {
1515       char *cmd = concat (compr->command, " ", real_name);
1516       inf = (FILE *) popen (cmd, "r");
1517       free (cmd);
1518     }
1519   else
1520     inf = fopen (real_name, "r");
1521   if (inf == NULL)
1522     {
1523       perror (real_name);
1524       goto cleanup;
1525     }
1526
1527   process_file (inf, uncompressed_name, lang);
1528
1529   if (real_name == compressed_name)
1530     retval = pclose (inf);
1531   else
1532     retval = fclose (inf);
1533   if (retval < 0)
1534     pfatal (file);
1535
1536  cleanup:
1537   if (compressed_name) free (compressed_name);
1538   if (uncompressed_name) free (uncompressed_name);
1539   last_node = NULL;
1540   curfdp = NULL;
1541   return;
1542 }
1543
1544 static void
1545 process_file (fh, fn, lang)
1546      FILE *fh;
1547      char *fn;
1548      language *lang;
1549 {
1550   static const fdesc emptyfdesc;
1551   fdesc *fdp;
1552
1553   /* Create a new input file description entry. */
1554   fdp = xnew (1, fdesc);
1555   *fdp = emptyfdesc;
1556   fdp->next = fdhead;
1557   fdp->infname = savestr (fn);
1558   fdp->lang = lang;
1559   fdp->infabsname = absolute_filename (fn, cwd);
1560   fdp->infabsdir = absolute_dirname (fn, cwd);
1561   if (filename_is_absolute (fn))
1562     {
1563       /* An absolute file name.  Canonicalize it. */
1564       fdp->taggedfname = absolute_filename (fn, NULL);
1565     }
1566   else
1567     {
1568       /* A file name relative to cwd.  Make it relative
1569          to the directory of the tags file. */
1570       fdp->taggedfname = relative_filename (fn, tagfiledir);
1571     }
1572   fdp->usecharno = TRUE;        /* use char position when making tags */
1573   fdp->prop = NULL;
1574
1575   fdhead = fdp;
1576   curfdp = fdhead;              /* the current file description */
1577
1578   find_entries (fh);
1579
1580   /* If not Ctags, and if this is not metasource and if it contained no #line
1581      directives, we can write the tags and free all nodes pointing to
1582      curfdp. */
1583   if (!CTAGS
1584       && curfdp->usecharno      /* no #line directives in this file */
1585       && !curfdp->lang->metasource)
1586     {
1587       node *np, *prev;
1588
1589       /* Look for the head of the sublist relative to this file.  See add_node
1590          for the structure of the node tree. */
1591       prev = NULL;
1592       for (np = nodehead; np != NULL; prev = np, np = np->left)
1593         if (np->fdp == curfdp)
1594           break;
1595
1596       /* If we generated tags for this file, write and delete them. */
1597       if (np != NULL)
1598         {
1599           /* This is the head of the last sublist, if any.  The following
1600              instructions depend on this being true. */
1601           assert (np->left == NULL);
1602
1603           assert (fdhead == curfdp);
1604           assert (last_node->fdp == curfdp);
1605           put_entries (np);     /* write tags for file curfdp->taggedfname */
1606           free_tree (np);       /* remove the written nodes */
1607           if (prev == NULL)
1608             nodehead = NULL;    /* no nodes left */
1609           else
1610             prev->left = NULL;  /* delete the pointer to the sublist */
1611         }
1612     }
1613 }
1614
1615 /*
1616  * This routine sets up the boolean pseudo-functions which work
1617  * by setting boolean flags dependent upon the corresponding character.
1618  * Every char which is NOT in that string is not a white char.  Therefore,
1619  * all of the array "_wht" is set to FALSE, and then the elements
1620  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1621  * of a char is TRUE if it is the string "white", else FALSE.
1622  */
1623 static void
1624 init ()
1625 {
1626   register char *sp;
1627   register int i;
1628
1629   for (i = 0; i < CHARS; i++)
1630     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1631   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1632   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1633   notinname('\0') = notinname('\n');
1634   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1635   begtoken('\0') = begtoken('\n');
1636   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1637   intoken('\0') = intoken('\n');
1638   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1639   endtoken('\0') = endtoken('\n');
1640 }
1641
1642 /*
1643  * This routine opens the specified file and calls the function
1644  * which finds the function and type definitions.
1645  */
1646 static void
1647 find_entries (inf)
1648      FILE *inf;
1649 {
1650   char *cp;
1651   node *old_last_node;
1652   language *lang = curfdp->lang;
1653   Lang_function *parser = NULL;
1654
1655   /* If user specified a language, use it. */
1656   if (lang != NULL && lang->function != NULL)
1657     {
1658       parser = lang->function;
1659     }
1660
1661   /* Else try to guess the language given the file name. */
1662   if (parser == NULL)
1663     {
1664       lang = get_language_from_filename (curfdp->infname, TRUE);
1665       if (lang != NULL && lang->function != NULL)
1666         {
1667           curfdp->lang = lang;
1668           parser = lang->function;
1669         }
1670     }
1671
1672   /* Else look for sharp-bang as the first two characters. */
1673   if (parser == NULL
1674       && readline_internal (&lb, inf) > 0
1675       && lb.len >= 2
1676       && lb.buffer[0] == '#'
1677       && lb.buffer[1] == '!')
1678     {
1679       char *lp;
1680
1681       /* Set lp to point at the first char after the last slash in the
1682          line or, if no slashes, at the first nonblank.  Then set cp to
1683          the first successive blank and terminate the string. */
1684       lp = etags_strrchr (lb.buffer+2, '/');
1685       if (lp != NULL)
1686         lp += 1;
1687       else
1688         lp = skip_spaces (lb.buffer + 2);
1689       cp = skip_non_spaces (lp);
1690       *cp = '\0';
1691
1692       if (strlen (lp) > 0)
1693         {
1694           lang = get_language_from_interpreter (lp);
1695           if (lang != NULL && lang->function != NULL)
1696             {
1697               curfdp->lang = lang;
1698               parser = lang->function;
1699             }
1700         }
1701     }
1702
1703   /* We rewind here, even if inf may be a pipe.  We fail if the
1704      length of the first line is longer than the pipe block size,
1705      which is unlikely. */
1706     rewind (inf);
1707
1708   /* Else try to guess the language given the case insensitive file name. */
1709   if (parser == NULL)
1710     {
1711       lang = get_language_from_filename (curfdp->infname, FALSE);
1712       if (lang != NULL && lang->function != NULL)
1713         {
1714           curfdp->lang = lang;
1715           parser = lang->function;
1716         }
1717     }
1718
1719   if (!no_line_directive
1720       && curfdp->lang != NULL && curfdp->lang->metasource)
1721     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1722        file, or anyway we parsed a file that is automatically generated from
1723        this one.  If this is the case, the bingo.c file contained #line
1724        directives that generated tags pointing to this file.  Let's delete
1725        them all before parsing this file, which is the real source. */
1726     {
1727       fdesc **fdpp = &fdhead;
1728       while (*fdpp != NULL)
1729         if (*fdpp != curfdp
1730             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1731           /* We found one of those!  We must delete both the file description
1732              and all tags referring to it. */
1733           {
1734             fdesc *badfdp = *fdpp;
1735
1736             if (DEBUG)
1737               fprintf (stderr,
1738                        "Removing references to \"%s\" obtained from \"%s\"\n",
1739                        badfdp->taggedfname, badfdp->infname);
1740
1741             /* Delete the tags referring to badfdp. */
1742             invalidate_nodes (badfdp, &nodehead);
1743
1744             *fdpp = badfdp->next; /* remove the bad description from the list */
1745             free_fdesc (badfdp);
1746           }
1747         else
1748           fdpp = &(*fdpp)->next; /* advance the list pointer */
1749     }
1750
1751   if (parser != NULL)
1752     {
1753       /* Generic initialisations before reading from file. */
1754       lineno = 0;               /* reset global line number */
1755       charno = 0;               /* reset global char number */
1756       linecharno = 0;           /* reset global char number of line start */
1757
1758       parser (inf);
1759       return;
1760     }
1761
1762   /* Else try Fortran. */
1763   old_last_node = last_node;
1764   curfdp->lang = get_language_from_langname ("fortran");
1765   find_entries (inf);
1766
1767   if (old_last_node == last_node)
1768     /* No Fortran entries found.  Try C. */
1769     {
1770       /* We do not tag if rewind fails.
1771          Only the file name will be recorded in the tags file. */
1772       rewind (inf);
1773       curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1774       find_entries (inf);
1775     }
1776   return;
1777 }
1778
1779 \f
1780 /* Record a tag. */
1781 static void
1782 pfnote (name, is_func, linestart, linelen, lno, cno)
1783      char *name;                /* tag name, or NULL if unnamed */
1784      bool is_func;              /* tag is a function */
1785      char *linestart;           /* start of the line where tag is */
1786      int linelen;               /* length of the line where tag is */
1787      int lno;                   /* line number */
1788      long cno;                  /* character number */
1789 {
1790   register node *np;
1791
1792   if (CTAGS && name == NULL)
1793     return;
1794
1795   np = xnew (1, node);
1796
1797   /* If ctags mode, change name "main" to M<thisfilename>. */
1798   if (CTAGS && !cxref_style && streq (name, "main"))
1799     {
1800       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1801       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1802       fp = etags_strrchr (np->name, '.');
1803       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1804         fp[0] = '\0';
1805     }
1806   else
1807     np->name = name;
1808   np->valid = TRUE;
1809   np->been_warned = FALSE;
1810   np->fdp = curfdp;
1811   np->is_func = is_func;
1812   np->lno = lno;
1813   if (np->fdp->usecharno)
1814     /* Our char numbers are 0-base, because of C language tradition?
1815        ctags compatibility?  old versions compatibility?   I don't know.
1816        Anyway, since emacs's are 1-base we expect etags.el to take care
1817        of the difference.  If we wanted to have 1-based numbers, we would
1818        uncomment the +1 below. */
1819     np->cno = cno /* + 1 */ ;
1820   else
1821     np->cno = invalidcharno;
1822   np->left = np->right = NULL;
1823   if (CTAGS && !cxref_style)
1824     {
1825       if (strlen (linestart) < 50)
1826         np->pat = concat (linestart, "$", "");
1827       else
1828         np->pat = savenstr (linestart, 50);
1829     }
1830   else
1831     np->pat = savenstr (linestart, linelen);
1832
1833   add_node (np, &nodehead);
1834 }
1835
1836 /*
1837  * TAGS format specification
1838  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1839  *
1840  * pfnote should emit the optimized form [unnamed tag] only if:
1841  *  1. name does not contain any of the characters " \t\r\n(),;";
1842  *  2. linestart contains name as either a rightmost, or rightmost but
1843  *     one character, substring;
1844  *  3. the character, if any, immediately before name in linestart must
1845  *     be one of the characters " \t(),;";
1846  *  4. the character, if any, immediately after name in linestart must
1847  *     also be one of the characters " \t(),;".
1848  *
1849  * The real implementation uses the notinname() macro, which recognises
1850  * characters slightly different from " \t\r\n(),;".  See the variable
1851  * `nonam'.
1852  */
1853 #define traditional_tag_style TRUE
1854 static void
1855 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1856      char *name;                /* tag name, or NULL if unnamed */
1857      int namelen;               /* tag length */
1858      bool is_func;              /* tag is a function */
1859      char *linestart;           /* start of the line where tag is */
1860      int linelen;               /* length of the line where tag is */
1861      int lno;                   /* line number */
1862      long cno;                  /* character number */
1863 {
1864   register char *cp;
1865   bool named;
1866
1867   named = TRUE;
1868   if (!CTAGS)
1869     {
1870       for (cp = name; !notinname (*cp); cp++)
1871         continue;
1872       if (*cp == '\0')                          /* rule #1 */
1873         {
1874           cp = linestart + linelen - namelen;
1875           if (notinname (linestart[linelen-1]))
1876             cp -= 1;                            /* rule #4 */
1877           if (cp >= linestart                   /* rule #2 */
1878               && (cp == linestart
1879                   || notinname (cp[-1]))        /* rule #3 */
1880               && strneq (name, cp, namelen))    /* rule #2 */
1881             named = FALSE;      /* use unnamed tag */
1882         }
1883     }
1884
1885   if (named)
1886     name = savenstr (name, namelen);
1887   else
1888     name = NULL;
1889   pfnote (name, is_func, linestart, linelen, lno, cno);
1890 }
1891
1892 /*
1893  * free_tree ()
1894  *      recurse on left children, iterate on right children.
1895  */
1896 static void
1897 free_tree (np)
1898      register node *np;
1899 {
1900   while (np)
1901     {
1902       register node *node_right = np->right;
1903       free_tree (np->left);
1904       if (np->name != NULL)
1905         free (np->name);
1906       free (np->pat);
1907       free (np);
1908       np = node_right;
1909     }
1910 }
1911
1912 /*
1913  * free_fdesc ()
1914  *      delete a file description
1915  */
1916 static void
1917 free_fdesc (fdp)
1918      register fdesc *fdp;
1919 {
1920   if (fdp->infname != NULL) free (fdp->infname);
1921   if (fdp->infabsname != NULL) free (fdp->infabsname);
1922   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
1923   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
1924   if (fdp->prop != NULL) free (fdp->prop);
1925   free (fdp);
1926 }
1927
1928 /*
1929  * add_node ()
1930  *      Adds a node to the tree of nodes.  In etags mode, sort by file
1931  *      name.  In ctags mode, sort by tag name.  Make no attempt at
1932  *      balancing.
1933  *
1934  *      add_node is the only function allowed to add nodes, so it can
1935  *      maintain state.
1936  */
1937 static void
1938 add_node (np, cur_node_p)
1939      node *np, **cur_node_p;
1940 {
1941   register int dif;
1942   register node *cur_node = *cur_node_p;
1943
1944   if (cur_node == NULL)
1945     {
1946       *cur_node_p = np;
1947       last_node = np;
1948       return;
1949     }
1950
1951   if (!CTAGS)
1952     /* Etags Mode */
1953     {
1954       /* For each file name, tags are in a linked sublist on the right
1955          pointer.  The first tags of different files are a linked list
1956          on the left pointer.  last_node points to the end of the last
1957          used sublist. */
1958       if (last_node != NULL && last_node->fdp == np->fdp)
1959         {
1960           /* Let's use the same sublist as the last added node. */
1961           assert (last_node->right == NULL);
1962           last_node->right = np;
1963           last_node = np;
1964         }
1965       else if (cur_node->fdp == np->fdp)
1966         {
1967           /* Scanning the list we found the head of a sublist which is
1968              good for us.  Let's scan this sublist. */
1969           add_node (np, &cur_node->right);
1970         }
1971       else
1972         /* The head of this sublist is not good for us.  Let's try the
1973            next one. */
1974         add_node (np, &cur_node->left);
1975     } /* if ETAGS mode */
1976
1977   else
1978     {
1979       /* Ctags Mode */
1980       dif = strcmp (np->name, cur_node->name);
1981
1982       /*
1983        * If this tag name matches an existing one, then
1984        * do not add the node, but maybe print a warning.
1985        */
1986       if (!dif)
1987         {
1988           if (np->fdp == cur_node->fdp)
1989             {
1990               if (!no_warnings)
1991                 {
1992                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1993                            np->fdp->infname, lineno, np->name);
1994                   fprintf (stderr, "Second entry ignored\n");
1995                 }
1996             }
1997           else if (!cur_node->been_warned && !no_warnings)
1998             {
1999               fprintf
2000                 (stderr,
2001                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2002                  np->fdp->infname, cur_node->fdp->infname, np->name);
2003               cur_node->been_warned = TRUE;
2004             }
2005           return;
2006         }
2007
2008       /* Actually add the node */
2009       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2010     } /* if CTAGS mode */
2011 }
2012
2013 /*
2014  * invalidate_nodes ()
2015  *      Scan the node tree and invalidate all nodes pointing to the
2016  *      given file description (CTAGS case) or free them (ETAGS case).
2017  */
2018 static void
2019 invalidate_nodes (badfdp, npp)
2020      fdesc *badfdp;
2021      node **npp;
2022 {
2023   node *np = *npp;
2024
2025   if (np == NULL)
2026     return;
2027
2028   if (CTAGS)
2029     {
2030       if (np->left != NULL)
2031         invalidate_nodes (badfdp, &np->left);
2032       if (np->fdp == badfdp)
2033         np-> valid = FALSE;
2034       if (np->right != NULL)
2035         invalidate_nodes (badfdp, &np->right);
2036     }
2037   else
2038     {
2039       node **next = &np->left;
2040       if (np->fdp == badfdp)
2041         {
2042           *npp = *next;         /* detach the sublist from the list */
2043           np->left = NULL;      /* isolate it */
2044           free_tree (np);       /* free it */
2045         }
2046       invalidate_nodes (badfdp, next);
2047     }
2048 }
2049
2050 \f
2051 static int total_size_of_entries __P((node *));
2052 static int number_len __P((long));
2053
2054 /* Length of a non-negative number's decimal representation. */
2055 static int
2056 number_len (num)
2057      long num;
2058 {
2059   int len = 1;
2060   while ((num /= 10) > 0)
2061     len += 1;
2062   return len;
2063 }
2064
2065 /*
2066  * Return total number of characters that put_entries will output for
2067  * the nodes in the linked list at the right of the specified node.
2068  * This count is irrelevant with etags.el since emacs 19.34 at least,
2069  * but is still supplied for backward compatibility.
2070  */
2071 static int
2072 total_size_of_entries (np)
2073      register node *np;
2074 {
2075   register int total = 0;
2076
2077   for (; np != NULL; np = np->right)
2078     {
2079       total += strlen (np->pat) + 1;            /* pat\177 */
2080       if (np->name != NULL)
2081         total += strlen (np->name) + 1;         /* name\001 */
2082       total += number_len ((long) np->lno) + 1; /* lno, */
2083       if (np->cno != invalidcharno)             /* cno */
2084         total += number_len (np->cno);
2085       total += 1;                               /* newline */
2086     }
2087
2088   return total;
2089 }
2090
2091 static void
2092 put_entries (np)
2093      register node *np;
2094 {
2095   register char *sp;
2096   static fdesc *fdp = NULL;
2097
2098   if (np == NULL)
2099     return;
2100
2101   /* Output subentries that precede this one */
2102   if (CTAGS)
2103     put_entries (np->left);
2104
2105   /* Output this entry */
2106   if (np->valid)
2107     {
2108       if (!CTAGS)
2109         {
2110           /* Etags mode */
2111           if (fdp != np->fdp)
2112             {
2113               fdp = np->fdp;
2114               fprintf (tagf, "\f\n%s,%d\n",
2115                        fdp->taggedfname, total_size_of_entries (np));
2116             }
2117           fputs (np->pat, tagf);
2118           fputc ('\177', tagf);
2119           if (np->name != NULL)
2120             {
2121               fputs (np->name, tagf);
2122               fputc ('\001', tagf);
2123             }
2124           fprintf (tagf, "%d,", np->lno);
2125           if (np->cno != invalidcharno)
2126             fprintf (tagf, "%ld", np->cno);
2127           fputs ("\n", tagf);
2128         }
2129       else
2130         {
2131           /* Ctags mode */
2132           if (np->name == NULL)
2133             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2134
2135           if (cxref_style)
2136             {
2137               if (vgrind_style)
2138                 fprintf (stdout, "%s %s %d\n",
2139                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2140               else
2141                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2142                          np->name, np->lno, np->fdp->taggedfname, np->pat);
2143             }
2144           else
2145             {
2146               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2147
2148               if (np->is_func)
2149                 {               /* function or #define macro with args */
2150                   putc (searchar, tagf);
2151                   putc ('^', tagf);
2152
2153                   for (sp = np->pat; *sp; sp++)
2154                     {
2155                       if (*sp == '\\' || *sp == searchar)
2156                         putc ('\\', tagf);
2157                       putc (*sp, tagf);
2158                     }
2159                   putc (searchar, tagf);
2160                 }
2161               else
2162                 {               /* anything else; text pattern inadequate */
2163                   fprintf (tagf, "%d", np->lno);
2164                 }
2165               putc ('\n', tagf);
2166             }
2167         }
2168     } /* if this node contains a valid tag */
2169
2170   /* Output subentries that follow this one */
2171   put_entries (np->right);
2172   if (!CTAGS)
2173     put_entries (np->left);
2174 }
2175
2176 \f
2177 /* C extensions. */
2178 #define C_EXT   0x00fff         /* C extensions */
2179 #define C_PLAIN 0x00000         /* C */
2180 #define C_PLPL  0x00001         /* C++ */
2181 #define C_STAR  0x00003         /* C* */
2182 #define C_JAVA  0x00005         /* JAVA */
2183 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2184 #define YACC    0x10000         /* yacc file */
2185
2186 /*
2187  * The C symbol tables.
2188  */
2189 enum sym_type
2190 {
2191   st_none,
2192   st_C_objprot, st_C_objimpl, st_C_objend,
2193   st_C_gnumacro,
2194   st_C_ignore,
2195   st_C_javastruct,
2196   st_C_operator,
2197   st_C_class, st_C_template,
2198   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
2199 };
2200
2201 static unsigned int hash __P((const char *, unsigned int));
2202 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2203 static enum sym_type C_symtype __P((char *, int, int));
2204
2205 /* Feed stuff between (but not including) %[ and %] lines to:
2206       gperf -c -k 1,3 -o -p -r -t
2207 %[
2208 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2209 %%
2210 if,             0,      st_C_ignore
2211 for,            0,      st_C_ignore
2212 while,          0,      st_C_ignore
2213 switch,         0,      st_C_ignore
2214 return,         0,      st_C_ignore
2215 @interface,     0,      st_C_objprot
2216 @protocol,      0,      st_C_objprot
2217 @implementation,0,      st_C_objimpl
2218 @end,           0,      st_C_objend
2219 import,         C_JAVA, st_C_ignore
2220 package,        C_JAVA, st_C_ignore
2221 friend,         C_PLPL, st_C_ignore
2222 extends,        C_JAVA, st_C_javastruct
2223 implements,     C_JAVA, st_C_javastruct
2224 interface,      C_JAVA, st_C_struct
2225 class,          0,      st_C_class
2226 namespace,      C_PLPL, st_C_struct
2227 domain,         C_STAR, st_C_struct
2228 union,          0,      st_C_struct
2229 struct,         0,      st_C_struct
2230 extern,         0,      st_C_extern
2231 enum,           0,      st_C_enum
2232 typedef,        0,      st_C_typedef
2233 define,         0,      st_C_define
2234 operator,       C_PLPL, st_C_operator
2235 template,       0,      st_C_template
2236 bool,           C_PLPL, st_C_typespec
2237 long,           0,      st_C_typespec
2238 short,          0,      st_C_typespec
2239 int,            0,      st_C_typespec
2240 char,           0,      st_C_typespec
2241 float,          0,      st_C_typespec
2242 double,         0,      st_C_typespec
2243 signed,         0,      st_C_typespec
2244 unsigned,       0,      st_C_typespec
2245 auto,           0,      st_C_typespec
2246 void,           0,      st_C_typespec
2247 static,         0,      st_C_typespec
2248 const,          0,      st_C_typespec
2249 volatile,       0,      st_C_typespec
2250 explicit,       C_PLPL, st_C_typespec
2251 mutable,        C_PLPL, st_C_typespec
2252 typename,       C_PLPL, st_C_typespec
2253 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2254 DEFUN,          0,      st_C_gnumacro
2255 SYSCALL,        0,      st_C_gnumacro
2256 ENTRY,          0,      st_C_gnumacro
2257 PSEUDO,         0,      st_C_gnumacro
2258 # These are defined inside C functions, so currently they are not met.
2259 # EXFUN used in glibc, DEFVAR_* in emacs.
2260 #EXFUN,         0,      st_C_gnumacro
2261 #DEFVAR_,       0,      st_C_gnumacro
2262 %]
2263 and replace lines between %< and %> with its output,
2264 then make in_word_set and C_stab_entry static. */
2265 /*%<*/
2266 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2267 /* Command-line: gperf -c -k 1,3 -o -p -r -t  */
2268 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2269
2270 #define TOTAL_KEYWORDS 47
2271 #define MIN_WORD_LENGTH 2
2272 #define MAX_WORD_LENGTH 15
2273 #define MIN_HASH_VALUE 18
2274 #define MAX_HASH_VALUE 138
2275 /* maximum key range = 121, duplicates = 0 */
2276
2277 #ifdef __GNUC__
2278 __inline
2279 #endif
2280 static unsigned int
2281 hash (str, len)
2282      register const char *str;
2283      register unsigned int len;
2284 {
2285   static unsigned char asso_values[] =
2286     {
2287       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2288       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2289       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2290       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2291       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2292       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2293       139, 139, 139, 139,  63, 139, 139, 139,  33,  44,
2294        62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2295        42, 139, 139,  12,  32, 139, 139, 139, 139, 139,
2296       139, 139, 139, 139, 139, 139, 139,  34,  59,  37,
2297        24,  58,  33,   3, 139,  16, 139, 139,  42,  60,
2298        18,  11,  39, 139,  23,  57,   4,  63,   6,  20,
2299       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2300       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2301       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2302       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2303       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2304       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2305       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2306       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2307       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2308       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2309       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2310       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2311       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2312       139, 139, 139, 139, 139, 139
2313     };
2314   register int hval = len;
2315
2316   switch (hval)
2317     {
2318       default:
2319       case 3:
2320         hval += asso_values[(unsigned char)str[2]];
2321       case 2:
2322       case 1:
2323         hval += asso_values[(unsigned char)str[0]];
2324         break;
2325     }
2326   return hval;
2327 }
2328
2329 #ifdef __GNUC__
2330 __inline
2331 #endif
2332 static struct C_stab_entry *
2333 in_word_set (str, len)
2334      register const char *str;
2335      register unsigned int len;
2336 {
2337   static struct C_stab_entry wordlist[] =
2338     {
2339       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2340       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2341       {"if",            0,      st_C_ignore},
2342       {""}, {""}, {""}, {""},
2343       {"int",           0,      st_C_typespec},
2344       {""}, {""},
2345       {"void",          0,      st_C_typespec},
2346       {""}, {""},
2347       {"interface",     C_JAVA, st_C_struct},
2348       {""},
2349       {"SYSCALL",       0,      st_C_gnumacro},
2350       {""},
2351       {"return",                0,      st_C_ignore},
2352       {""}, {""}, {""}, {""}, {""}, {""}, {""},
2353       {"while",         0,      st_C_ignore},
2354       {"auto",          0,      st_C_typespec},
2355       {""}, {""}, {""}, {""}, {""}, {""},
2356       {"float",         0,      st_C_typespec},
2357       {"typedef",       0,      st_C_typedef},
2358       {"typename",      C_PLPL, st_C_typespec},
2359       {""}, {""}, {""},
2360       {"friend",                C_PLPL, st_C_ignore},
2361       {"volatile",      0,      st_C_typespec},
2362       {""}, {""},
2363       {"for",           0,      st_C_ignore},
2364       {"const",         0,      st_C_typespec},
2365       {"import",                C_JAVA, st_C_ignore},
2366       {""},
2367       {"define",        0,      st_C_define},
2368       {"long",          0,      st_C_typespec},
2369       {"implements",    C_JAVA, st_C_javastruct},
2370       {"signed",        0,      st_C_typespec},
2371       {""},
2372       {"extern",        0,      st_C_extern},
2373       {"extends",       C_JAVA, st_C_javastruct},
2374       {""},
2375       {"mutable",       C_PLPL, st_C_typespec},
2376       {"template",      0,      st_C_template},
2377       {"short",         0,      st_C_typespec},
2378       {"bool",          C_PLPL, st_C_typespec},
2379       {"char",          0,      st_C_typespec},
2380       {"class",         0,      st_C_class},
2381       {"operator",      C_PLPL, st_C_operator},
2382       {""},
2383       {"switch",                0,      st_C_ignore},
2384       {""},
2385       {"ENTRY",         0,      st_C_gnumacro},
2386       {""},
2387       {"package",       C_JAVA, st_C_ignore},
2388       {"union",         0,      st_C_struct},
2389       {"@end",          0,      st_C_objend},
2390       {"struct",        0,      st_C_struct},
2391       {"namespace",     C_PLPL, st_C_struct},
2392       {""}, {""},
2393       {"domain",        C_STAR, st_C_struct},
2394       {"@interface",    0,      st_C_objprot},
2395       {"PSEUDO",                0,      st_C_gnumacro},
2396       {"double",        0,      st_C_typespec},
2397       {""},
2398       {"@protocol",     0,      st_C_objprot},
2399       {""},
2400       {"static",        0,      st_C_typespec},
2401       {""}, {""},
2402       {"DEFUN",         0,      st_C_gnumacro},
2403       {""}, {""}, {""}, {""},
2404       {"explicit",      C_PLPL, st_C_typespec},
2405       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2406       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2407       {""},
2408       {"enum",          0,      st_C_enum},
2409       {""}, {""},
2410       {"unsigned",      0,      st_C_typespec},
2411       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2412       {"@implementation",0,     st_C_objimpl}
2413     };
2414
2415   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2416     {
2417       register int key = hash (str, len);
2418
2419       if (key <= MAX_HASH_VALUE && key >= 0)
2420         {
2421           register const char *s = wordlist[key].name;
2422
2423           if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2424             return &wordlist[key];
2425         }
2426     }
2427   return 0;
2428 }
2429 /*%>*/
2430
2431 static enum sym_type
2432 C_symtype (str, len, c_ext)
2433      char *str;
2434      int len;
2435      int c_ext;
2436 {
2437   register struct C_stab_entry *se = in_word_set (str, len);
2438
2439   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2440     return st_none;
2441   return se->type;
2442 }
2443
2444 \f
2445 /*
2446  * C functions and variables are recognized using a simple
2447  * finite automaton.  fvdef is its state variable.
2448  */
2449 static enum
2450 {
2451   fvnone,                       /* nothing seen */
2452   fdefunkey,                    /* Emacs DEFUN keyword seen */
2453   fdefunname,                   /* Emacs DEFUN name seen */
2454   foperator,                    /* func: operator keyword seen (cplpl) */
2455   fvnameseen,                   /* function or variable name seen */
2456   fstartlist,                   /* func: just after open parenthesis */
2457   finlist,                      /* func: in parameter list */
2458   flistseen,                    /* func: after parameter list */
2459   fignore,                      /* func: before open brace */
2460   vignore                       /* var-like: ignore until ';' */
2461 } fvdef;
2462
2463 static bool fvextern;           /* func or var: extern keyword seen; */
2464
2465 /*
2466  * typedefs are recognized using a simple finite automaton.
2467  * typdef is its state variable.
2468  */
2469 static enum
2470 {
2471   tnone,                        /* nothing seen */
2472   tkeyseen,                     /* typedef keyword seen */
2473   ttypeseen,                    /* defined type seen */
2474   tinbody,                      /* inside typedef body */
2475   tend,                         /* just before typedef tag */
2476   tignore                       /* junk after typedef tag */
2477 } typdef;
2478
2479 /*
2480  * struct-like structures (enum, struct and union) are recognized
2481  * using another simple finite automaton.  `structdef' is its state
2482  * variable.
2483  */
2484 static enum
2485 {
2486   snone,                        /* nothing seen yet,
2487                                    or in struct body if cblev > 0 */
2488   skeyseen,                     /* struct-like keyword seen */
2489   stagseen,                     /* struct-like tag seen */
2490   sintemplate,                  /* inside template (ignore) */
2491   scolonseen                    /* colon seen after struct-like tag */
2492 } structdef;
2493
2494 /*
2495  * When objdef is different from onone, objtag is the name of the class.
2496  */
2497 static char *objtag = "<uninited>";
2498
2499 /*
2500  * Yet another little state machine to deal with preprocessor lines.
2501  */
2502 static enum
2503 {
2504   dnone,                        /* nothing seen */
2505   dsharpseen,                   /* '#' seen as first char on line */
2506   ddefineseen,                  /* '#' and 'define' seen */
2507   dignorerest                   /* ignore rest of line */
2508 } definedef;
2509
2510 /*
2511  * State machine for Objective C protocols and implementations.
2512  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2513  */
2514 static enum
2515 {
2516   onone,                        /* nothing seen */
2517   oprotocol,                    /* @interface or @protocol seen */
2518   oimplementation,              /* @implementations seen */
2519   otagseen,                     /* class name seen */
2520   oparenseen,                   /* parenthesis before category seen */
2521   ocatseen,                     /* category name seen */
2522   oinbody,                      /* in @implementation body */
2523   omethodsign,                  /* in @implementation body, after +/- */
2524   omethodtag,                   /* after method name */
2525   omethodcolon,                 /* after method colon */
2526   omethodparm,                  /* after method parameter */
2527   oignore                       /* wait for @end */
2528 } objdef;
2529
2530
2531 /*
2532  * Use this structure to keep info about the token read, and how it
2533  * should be tagged.  Used by the make_C_tag function to build a tag.
2534  */
2535 static struct tok
2536 {
2537   bool valid;
2538   bool named;
2539   int offset;
2540   int length;
2541   int lineno;
2542   long linepos;
2543   char *line;
2544 } token;                        /* latest token read */
2545 static linebuffer token_name;   /* its name */
2546
2547 /*
2548  * Variables and functions for dealing with nested structures.
2549  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2550  */
2551 static void pushclass_above __P((int, char *, int));
2552 static void popclass_above __P((int));
2553 static void write_classname __P((linebuffer *, char *qualifier));
2554
2555 static struct {
2556   char **cname;                 /* nested class names */
2557   int *cblev;                   /* nested class curly brace level */
2558   int nl;                       /* class nesting level (elements used) */
2559   int size;                     /* length of the array */
2560 } cstack;                       /* stack for nested declaration tags */
2561 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2562 #define nestlev         (cstack.nl)
2563 /* After struct keyword or in struct body, not inside an nested function. */
2564 #define instruct        (structdef == snone && nestlev > 0                      \
2565                          && cblev == cstack.cblev[nestlev-1] + 1)
2566
2567 static void
2568 pushclass_above (cblev, str, len)
2569      int cblev;
2570      char *str;
2571      int len;
2572 {
2573   int nl;
2574
2575   popclass_above (cblev);
2576   nl = cstack.nl;
2577   if (nl >= cstack.size)
2578     {
2579       int size = cstack.size *= 2;
2580       xrnew (cstack.cname, size, char *);
2581       xrnew (cstack.cblev, size, int);
2582     }
2583   assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2584   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2585   cstack.cblev[nl] = cblev;
2586   cstack.nl = nl + 1;
2587 }
2588
2589 static void
2590 popclass_above (cblev)
2591      int cblev;
2592 {
2593   int nl;
2594
2595   for (nl = cstack.nl - 1;
2596        nl >= 0 && cstack.cblev[nl] >= cblev;
2597        nl--)
2598     {
2599       if (cstack.cname[nl] != NULL)
2600         free (cstack.cname[nl]);
2601       cstack.nl = nl;
2602     }
2603 }
2604
2605 static void
2606 write_classname (cn, qualifier)
2607      linebuffer *cn;
2608      char *qualifier;
2609 {
2610   int i, len;
2611   int qlen = strlen (qualifier);
2612
2613   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2614     {
2615       len = 0;
2616       cn->len = 0;
2617       cn->buffer[0] = '\0';
2618     }
2619   else
2620     {
2621       len = strlen (cstack.cname[0]);
2622       linebuffer_setlen (cn, len);
2623       strcpy (cn->buffer, cstack.cname[0]);
2624     }
2625   for (i = 1; i < cstack.nl; i++)
2626     {
2627       char *s;
2628       int slen;
2629
2630       s = cstack.cname[i];
2631       if (s == NULL)
2632         continue;
2633       slen = strlen (s);
2634       len += slen + qlen;
2635       linebuffer_setlen (cn, len);
2636       strncat (cn->buffer, qualifier, qlen);
2637       strncat (cn->buffer, s, slen);
2638     }
2639 }
2640
2641 \f
2642 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2643 static void make_C_tag __P((bool));
2644
2645 /*
2646  * consider_token ()
2647  *      checks to see if the current token is at the start of a
2648  *      function or variable, or corresponds to a typedef, or
2649  *      is a struct/union/enum tag, or #define, or an enum constant.
2650  *
2651  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2652  *      with args.  C_EXTP points to which language we are looking at.
2653  *
2654  * Globals
2655  *      fvdef                   IN OUT
2656  *      structdef               IN OUT
2657  *      definedef               IN OUT
2658  *      typdef                  IN OUT
2659  *      objdef                  IN OUT
2660  */
2661
2662 static bool
2663 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2664      register char *str;        /* IN: token pointer */
2665      register int len;          /* IN: token length */
2666      register int c;            /* IN: first char after the token */
2667      int *c_extp;               /* IN, OUT: C extensions mask */
2668      int cblev;                 /* IN: curly brace level */
2669      int parlev;                /* IN: parenthesis level */
2670      bool *is_func_or_var;      /* OUT: function or variable found */
2671 {
2672   /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2673      structtype is the type of the preceding struct-like keyword, and
2674      structcblev is the curly brace level where it has been seen. */
2675   static enum sym_type structtype;
2676   static int structcblev;
2677   static enum sym_type toktype;
2678
2679
2680   toktype = C_symtype (str, len, *c_extp);
2681
2682   /*
2683    * Advance the definedef state machine.
2684    */
2685   switch (definedef)
2686     {
2687     case dnone:
2688       /* We're not on a preprocessor line. */
2689       if (toktype == st_C_gnumacro)
2690         {
2691           fvdef = fdefunkey;
2692           return FALSE;
2693         }
2694       break;
2695     case dsharpseen:
2696       if (toktype == st_C_define)
2697         {
2698           definedef = ddefineseen;
2699         }
2700       else
2701         {
2702           definedef = dignorerest;
2703         }
2704       return FALSE;
2705     case ddefineseen:
2706       /*
2707        * Make a tag for any macro, unless it is a constant
2708        * and constantypedefs is FALSE.
2709        */
2710       definedef = dignorerest;
2711       *is_func_or_var = (c == '(');
2712       if (!*is_func_or_var && !constantypedefs)
2713         return FALSE;
2714       else
2715         return TRUE;
2716     case dignorerest:
2717       return FALSE;
2718     default:
2719       error ("internal error: definedef value.", (char *)NULL);
2720     }
2721
2722   /*
2723    * Now typedefs
2724    */
2725   switch (typdef)
2726     {
2727     case tnone:
2728       if (toktype == st_C_typedef)
2729         {
2730           if (typedefs)
2731             typdef = tkeyseen;
2732           fvextern = FALSE;
2733           fvdef = fvnone;
2734           return FALSE;
2735         }
2736       break;
2737     case tkeyseen:
2738       switch (toktype)
2739         {
2740         case st_none:
2741         case st_C_typespec:
2742         case st_C_class:
2743         case st_C_struct:
2744         case st_C_enum:
2745           typdef = ttypeseen;
2746           break;
2747         }
2748       break;
2749     case ttypeseen:
2750       if (structdef == snone && fvdef == fvnone)
2751         {
2752           fvdef = fvnameseen;
2753           return TRUE;
2754         }
2755       break;
2756     case tend:
2757       switch (toktype)
2758         {
2759         case st_C_typespec:
2760         case st_C_class:
2761         case st_C_struct:
2762         case st_C_enum:
2763           return FALSE;
2764         }
2765       return TRUE;
2766     }
2767
2768   /*
2769    * This structdef business is NOT invoked when we are ctags and the
2770    * file is plain C.  This is because a struct tag may have the same
2771    * name as another tag, and this loses with ctags.
2772    */
2773   switch (toktype)
2774     {
2775     case st_C_javastruct:
2776       if (structdef == stagseen)
2777         structdef = scolonseen;
2778       return FALSE;
2779     case st_C_template:
2780     case st_C_class:
2781       if (cblev == 0
2782           && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2783           && definedef == dnone && structdef == snone
2784           && typdef == tnone && fvdef == fvnone)
2785         *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2786       if (toktype == st_C_template)
2787         break;
2788       /* FALLTHRU */
2789     case st_C_struct:
2790     case st_C_enum:
2791       if (parlev == 0
2792           && fvdef != vignore
2793           && (typdef == tkeyseen
2794               || (typedefs_or_cplusplus && structdef == snone)))
2795         {
2796           structdef = skeyseen;
2797           structtype = toktype;
2798           structcblev = cblev;
2799         }
2800       return FALSE;
2801     }
2802
2803   if (structdef == skeyseen)
2804     {
2805       structdef = stagseen;
2806       return TRUE;
2807     }
2808
2809   if (typdef != tnone)
2810     definedef = dnone;
2811
2812   /* Detect Objective C constructs. */
2813   switch (objdef)
2814     {
2815     case onone:
2816       switch (toktype)
2817         {
2818         case st_C_objprot:
2819           objdef = oprotocol;
2820           return FALSE;
2821         case st_C_objimpl:
2822           objdef = oimplementation;
2823           return FALSE;
2824         }
2825       break;
2826     case oimplementation:
2827       /* Save the class tag for functions or variables defined inside. */
2828       objtag = savenstr (str, len);
2829       objdef = oinbody;
2830       return FALSE;
2831     case oprotocol:
2832       /* Save the class tag for categories. */
2833       objtag = savenstr (str, len);
2834       objdef = otagseen;
2835       *is_func_or_var = TRUE;
2836       return TRUE;
2837     case oparenseen:
2838       objdef = ocatseen;
2839       *is_func_or_var = TRUE;
2840       return TRUE;
2841     case oinbody:
2842       break;
2843     case omethodsign:
2844       if (parlev == 0)
2845         {
2846           objdef = omethodtag;
2847           linebuffer_setlen (&token_name, len);
2848           strncpy (token_name.buffer, str, len);
2849           token_name.buffer[len] = '\0';
2850           return TRUE;
2851         }
2852       return FALSE;
2853     case omethodcolon:
2854       if (parlev == 0)
2855         objdef = omethodparm;
2856       return FALSE;
2857     case omethodparm:
2858       if (parlev == 0)
2859         {
2860           objdef = omethodtag;
2861           linebuffer_setlen (&token_name, token_name.len + len);
2862           strncat (token_name.buffer, str, len);
2863           return TRUE;
2864         }
2865       return FALSE;
2866     case oignore:
2867       if (toktype == st_C_objend)
2868         {
2869           /* Memory leakage here: the string pointed by objtag is
2870              never released, because many tests would be needed to
2871              avoid breaking on incorrect input code.  The amount of
2872              memory leaked here is the sum of the lengths of the
2873              class tags.
2874           free (objtag); */
2875           objdef = onone;
2876         }
2877       return FALSE;
2878     }
2879
2880   /* A function, variable or enum constant? */
2881   switch (toktype)
2882     {
2883     case st_C_extern:
2884       fvextern = TRUE;
2885       /* FALLTHRU */
2886     case st_C_typespec:
2887       if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2888         fvdef = fvnone;         /* should be useless */
2889       return FALSE;
2890     case st_C_ignore:
2891       fvextern = FALSE;
2892       fvdef = vignore;
2893       return FALSE;
2894     case st_C_operator:
2895       fvdef = foperator;
2896       *is_func_or_var = TRUE;
2897       return TRUE;
2898     case st_none:
2899       if (constantypedefs
2900           && structdef == snone
2901           && structtype == st_C_enum && cblev > structcblev)
2902         return TRUE;            /* enum constant */
2903       switch (fvdef)
2904         {
2905         case fdefunkey:
2906           if (cblev > 0)
2907             break;
2908           fvdef = fdefunname;   /* GNU macro */
2909           *is_func_or_var = TRUE;
2910           return TRUE;
2911         case fvnone:
2912           if ((strneq (str, "asm", 3) && endtoken (str[3]))
2913               || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2914             {
2915               fvdef = vignore;
2916               return FALSE;
2917             }
2918           if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2919             {
2920               fvdef = foperator;
2921               *is_func_or_var = TRUE;
2922               return TRUE;
2923             }
2924           if (cblev > 0 && !instruct)
2925             break;
2926           fvdef = fvnameseen;   /* function or variable */
2927           *is_func_or_var = TRUE;
2928           return TRUE;
2929         }
2930       break;
2931     }
2932
2933   return FALSE;
2934 }
2935
2936 \f
2937 /*
2938  * C_entries often keeps pointers to tokens or lines which are older than
2939  * the line currently read.  By keeping two line buffers, and switching
2940  * them at end of line, it is possible to use those pointers.
2941  */
2942 static struct
2943 {
2944   long linepos;
2945   linebuffer lb;
2946 } lbs[2];
2947
2948 #define current_lb_is_new (newndx == curndx)
2949 #define switch_line_buffers() (curndx = 1 - curndx)
2950
2951 #define curlb (lbs[curndx].lb)
2952 #define newlb (lbs[newndx].lb)
2953 #define curlinepos (lbs[curndx].linepos)
2954 #define newlinepos (lbs[newndx].linepos)
2955
2956 #define CNL_SAVE_DEFINEDEF()                                            \
2957 do {                                                                    \
2958   curlinepos = charno;                                                  \
2959   readline (&curlb, inf);                                               \
2960   lp = curlb.buffer;                                                    \
2961   quotednl = FALSE;                                                     \
2962   newndx = curndx;                                                      \
2963 } while (0)
2964
2965 #define CNL()                                                           \
2966 do {                                                                    \
2967   CNL_SAVE_DEFINEDEF();                                                 \
2968   if (savetoken.valid)                                                  \
2969     {                                                                   \
2970       token = savetoken;                                                \
2971       savetoken.valid = FALSE;                                          \
2972     }                                                                   \
2973   definedef = dnone;                                                    \
2974 } while (0)
2975
2976
2977 static void
2978 make_C_tag (isfun)
2979      bool isfun;
2980 {
2981   /* This function should never be called when token.valid is FALSE, but
2982      we must protect against invalid input or internal errors. */
2983   if (DEBUG || token.valid)
2984     {
2985       if (traditional_tag_style)
2986         {
2987           /* This was the original code.  Now we call new_pfnote instead,
2988              which uses the new method for naming tags (see new_pfnote). */
2989           char *name = NULL;
2990
2991           if (CTAGS || token.named)
2992             name = savestr (token_name.buffer);
2993           if (DEBUG && !token.valid)
2994             {
2995               if (token.named)
2996                 name = concat (name, "##invalid##", "");
2997               else
2998                 name = savestr ("##invalid##");
2999             }
3000           pfnote (name, isfun, token.line,
3001                   token.offset+token.length+1, token.lineno, token.linepos);
3002         }
3003       else
3004         new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
3005                     token.offset+token.length+1, token.lineno, token.linepos);
3006       token.valid = FALSE;
3007     }
3008 }
3009
3010
3011 /*
3012  * C_entries ()
3013  *      This routine finds functions, variables, typedefs,
3014  *      #define's, enum constants and struct/union/enum definitions in
3015  *      C syntax and adds them to the list.
3016  */
3017 static void
3018 C_entries (c_ext, inf)
3019      int c_ext;                 /* extension of C */
3020      FILE *inf;                 /* input file */
3021 {
3022   register char c;              /* latest char read; '\0' for end of line */
3023   register char *lp;            /* pointer one beyond the character `c' */
3024   int curndx, newndx;           /* indices for current and new lb */
3025   register int tokoff;          /* offset in line of start of current token */
3026   register int toklen;          /* length of current token */
3027   char *qualifier;              /* string used to qualify names */
3028   int qlen;                     /* length of qualifier */
3029   int cblev;                    /* current curly brace level */
3030   int parlev;                   /* current parenthesis level */
3031   int typdefcblev;              /* cblev where a typedef struct body begun */
3032   bool incomm, inquote, inchar, quotednl, midtoken;
3033   bool cplpl, cjava;
3034   bool yacc_rules;              /* in the rules part of a yacc file */
3035   struct tok savetoken;         /* token saved during preprocessor handling */
3036
3037
3038   initbuffer (&token_name);
3039   initbuffer (&lbs[0].lb);
3040   initbuffer (&lbs[1].lb);
3041   if (cstack.size == 0)
3042     {
3043       cstack.size = (DEBUG) ? 1 : 4;
3044       cstack.nl = 0;
3045       cstack.cname = xnew (cstack.size, char *);
3046       cstack.cblev = xnew (cstack.size, int);
3047     }
3048
3049   tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
3050   curndx = newndx = 0;
3051   lp = curlb.buffer;
3052   *lp = 0;
3053
3054   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3055   structdef = snone; definedef = dnone; objdef = onone;
3056   yacc_rules = FALSE;
3057   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3058   token.valid = savetoken.valid = FALSE;
3059   cblev = 0;
3060   parlev = 0;
3061   cplpl = (c_ext & C_PLPL) == C_PLPL;
3062   cjava = (c_ext & C_JAVA) == C_JAVA;
3063   if (cjava)
3064     { qualifier = "."; qlen = 1; }
3065   else
3066     { qualifier = "::"; qlen = 2; }
3067
3068
3069   while (!feof (inf))
3070     {
3071       c = *lp++;
3072       if (c == '\\')
3073         {
3074           /* If we're at the end of the line, the next character is a
3075              '\0'; don't skip it, because it's the thing that tells us
3076              to read the next line.  */
3077           if (*lp == '\0')
3078             {
3079               quotednl = TRUE;
3080               continue;
3081             }
3082           lp++;
3083           c = ' ';
3084         }
3085       else if (incomm)
3086         {
3087           switch (c)
3088             {
3089             case '*':
3090               if (*lp == '/')
3091                 {
3092                   c = *lp++;
3093                   incomm = FALSE;
3094                 }
3095               break;
3096             case '\0':
3097               /* Newlines inside comments do not end macro definitions in
3098                  traditional cpp. */
3099               CNL_SAVE_DEFINEDEF ();
3100               break;
3101             }
3102           continue;
3103         }
3104       else if (inquote)
3105         {
3106           switch (c)
3107             {
3108             case '"':
3109               inquote = FALSE;
3110               break;
3111             case '\0':
3112               /* Newlines inside strings do not end macro definitions
3113                  in traditional cpp, even though compilers don't
3114                  usually accept them. */
3115               CNL_SAVE_DEFINEDEF ();
3116               break;
3117             }
3118           continue;
3119         }
3120       else if (inchar)
3121         {
3122           switch (c)
3123             {
3124             case '\0':
3125               /* Hmmm, something went wrong. */
3126               CNL ();
3127               /* FALLTHRU */
3128             case '\'':
3129               inchar = FALSE;
3130               break;
3131             }
3132           continue;
3133         }
3134       else
3135         switch (c)
3136           {
3137           case '"':
3138             inquote = TRUE;
3139             switch (fvdef)
3140               {
3141               case fdefunkey:
3142               case fstartlist:
3143               case finlist:
3144               case fignore:
3145               case vignore:
3146                 break;
3147               default:
3148                 fvextern = FALSE;
3149                 fvdef = fvnone;
3150               }
3151             continue;
3152           case '\'':
3153             inchar = TRUE;
3154             if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3155               {
3156                 fvextern = FALSE;
3157                 fvdef = fvnone;
3158               }
3159             continue;
3160           case '/':
3161             if (*lp == '*')
3162               {
3163                 lp++;
3164                 incomm = TRUE;
3165                 continue;
3166               }
3167             else if (/* cplpl && */ *lp == '/')
3168               {
3169                 c = '\0';
3170                 break;
3171               }
3172             else
3173               break;
3174           case '%':
3175             if ((c_ext & YACC) && *lp == '%')
3176               {
3177                 /* Entering or exiting rules section in yacc file. */
3178                 lp++;
3179                 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3180                 typdef = tnone; structdef = snone;
3181                 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3182                 cblev = 0;
3183                 yacc_rules = !yacc_rules;
3184                 continue;
3185               }
3186             else
3187               break;
3188           case '#':
3189             if (definedef == dnone)
3190               {
3191                 char *cp;
3192                 bool cpptoken = TRUE;
3193
3194                 /* Look back on this line.  If all blanks, or nonblanks
3195                    followed by an end of comment, this is a preprocessor
3196                    token. */
3197                 for (cp = newlb.buffer; cp < lp-1; cp++)
3198                   if (!iswhite (*cp))
3199                     {
3200                       if (*cp == '*' && *(cp+1) == '/')
3201                         {
3202                           cp++;
3203                           cpptoken = TRUE;
3204                         }
3205                       else
3206                         cpptoken = FALSE;
3207                     }
3208                 if (cpptoken)
3209                   definedef = dsharpseen;
3210               } /* if (definedef == dnone) */
3211
3212             continue;
3213           } /* switch (c) */
3214
3215
3216       /* Consider token only if some involved conditions are satisfied. */
3217       if (typdef != tignore
3218           && definedef != dignorerest
3219           && fvdef != finlist
3220           && structdef != sintemplate
3221           && (definedef != dnone
3222               || structdef != scolonseen))
3223         {
3224           if (midtoken)
3225             {
3226               if (endtoken (c))
3227                 {
3228                   if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
3229                     {
3230                       /*
3231                        * This handles :: in the middle, but not at the
3232                        * beginning of an identifier.  Also, space-separated
3233                        * :: is not recognised.
3234                        */
3235                       lp += 2;
3236                       toklen += 2;
3237                       c = lp[-1];
3238                       goto still_in_token;
3239                     }
3240                   else
3241                     {
3242                       bool funorvar = FALSE;
3243
3244                       if (yacc_rules
3245                           || consider_token (newlb.buffer + tokoff, toklen, c,
3246                                              &c_ext, cblev, parlev, &funorvar))
3247                         {
3248                           if (fvdef == foperator)
3249                             {
3250                               char *oldlp = lp;
3251                               lp = skip_spaces (lp-1);
3252                               if (*lp != '\0')
3253                                 lp += 1;
3254                               while (*lp != '\0'
3255                                      && !iswhite (*lp) && *lp != '(')
3256                                 lp += 1;
3257                               c = *lp++;
3258                               toklen += lp - oldlp;
3259                             }
3260                           token.named = FALSE;
3261                           if ((c_ext & C_EXT)   /* not pure C */
3262                               && nestlev > 0 && definedef == dnone)
3263                             /* in struct body */
3264                             {
3265                               write_classname (&token_name, qualifier);
3266                               linebuffer_setlen (&token_name,
3267                                                  token_name.len+qlen+toklen);
3268                               strcat (token_name.buffer, qualifier);
3269                               strncat (token_name.buffer,
3270                                        newlb.buffer + tokoff, toklen);
3271                               token.named = TRUE;
3272                             }
3273                           else if (objdef == ocatseen)
3274                             /* Objective C category */
3275                             {
3276                               int len = strlen (objtag) + 2 + toklen;
3277                               linebuffer_setlen (&token_name, len);
3278                               strcpy (token_name.buffer, objtag);
3279                               strcat (token_name.buffer, "(");
3280                               strncat (token_name.buffer,
3281                                        newlb.buffer + tokoff, toklen);
3282                               strcat (token_name.buffer, ")");
3283                               token.named = TRUE;
3284                             }
3285                           else if (objdef == omethodtag
3286                                    || objdef == omethodparm)
3287                             /* Objective C method */
3288                             {
3289                               token.named = TRUE;
3290                             }
3291                           else if (fvdef == fdefunname)
3292                             /* GNU DEFUN and similar macros */
3293                             {
3294                               bool defun = (newlb.buffer[tokoff] == 'F');
3295                               int off = tokoff;
3296                               int len = toklen;
3297
3298                               /* Rewrite the tag so that emacs lisp DEFUNs
3299                                  can be found by their elisp name */
3300                               if (defun)
3301                                 {
3302                                   off += 1;
3303                                   len -= 1;
3304                                 }
3305                               len = toklen;
3306                               linebuffer_setlen (&token_name, len);
3307                               strncpy (token_name.buffer,
3308                                        newlb.buffer + off, len);
3309                               token_name.buffer[len] = '\0';
3310                               if (defun)
3311                                 while (--len >= 0)
3312                                   if (token_name.buffer[len] == '_')
3313                                     token_name.buffer[len] = '-';
3314                               token.named = defun;
3315                             }
3316                           else
3317                             {
3318                               linebuffer_setlen (&token_name, toklen);
3319                               strncpy (token_name.buffer,
3320                                        newlb.buffer + tokoff, toklen);
3321                               token_name.buffer[toklen] = '\0';
3322                               /* Name macros and members. */
3323                               token.named = (structdef == stagseen
3324                                              || typdef == ttypeseen
3325                                              || typdef == tend
3326                                              || (funorvar
3327                                                  && definedef == dignorerest)
3328                                              || (funorvar
3329                                                  && definedef == dnone
3330                                                  && structdef == snone
3331                                                  && cblev > 0));
3332                             }
3333                           token.lineno = lineno;
3334                           token.offset = tokoff;
3335                           token.length = toklen;
3336                           token.line = newlb.buffer;
3337                           token.linepos = newlinepos;
3338                           token.valid = TRUE;
3339
3340                           if (definedef == dnone
3341                               && (fvdef == fvnameseen
3342                                   || fvdef == foperator
3343                                   || structdef == stagseen
3344                                   || typdef == tend
3345                                   || typdef == ttypeseen
3346                                   || objdef != onone))
3347                             {
3348                               if (current_lb_is_new)
3349                                 switch_line_buffers ();
3350                             }
3351                           else if (definedef != dnone
3352                                    || fvdef == fdefunname
3353                                    || instruct)
3354                             make_C_tag (funorvar);
3355                         }
3356                       midtoken = FALSE;
3357                     }
3358                 } /* if (endtoken (c)) */
3359               else if (intoken (c))
3360                 still_in_token:
3361                 {
3362                   toklen++;
3363                   continue;
3364                 }
3365             } /* if (midtoken) */
3366           else if (begtoken (c))
3367             {
3368               switch (definedef)
3369                 {
3370                 case dnone:
3371                   switch (fvdef)
3372                     {
3373                     case fstartlist:
3374                       fvdef = finlist;
3375                       continue;
3376                     case flistseen:
3377                       make_C_tag (TRUE); /* a function */
3378                       fvdef = fignore;
3379                       break;
3380                     case fvnameseen:
3381                       fvdef = fvnone;
3382                       break;
3383                     }
3384                   if (structdef == stagseen && !cjava)
3385                     {
3386                       popclass_above (cblev);
3387                       structdef = snone;
3388                     }
3389                   break;
3390                 case dsharpseen:
3391                   savetoken = token;
3392                   break;
3393                 }
3394               if (!yacc_rules || lp == newlb.buffer + 1)
3395                 {
3396                   tokoff = lp - 1 - newlb.buffer;
3397                   toklen = 1;
3398                   midtoken = TRUE;
3399                 }
3400               continue;
3401             } /* if (begtoken) */
3402         } /* if must look at token */
3403
3404
3405       /* Detect end of line, colon, comma, semicolon and various braces
3406          after having handled a token.*/
3407       switch (c)
3408         {
3409         case ':':
3410           if (yacc_rules && token.offset == 0 && token.valid)
3411             {
3412               make_C_tag (FALSE); /* a yacc function */
3413               break;
3414             }
3415           if (definedef != dnone)
3416             break;
3417           switch (objdef)
3418             {
3419             case  otagseen:
3420               objdef = oignore;
3421               make_C_tag (TRUE); /* an Objective C class */
3422               break;
3423             case omethodtag:
3424             case omethodparm:
3425               objdef = omethodcolon;
3426               linebuffer_setlen (&token_name, token_name.len + 1);
3427               strcat (token_name.buffer, ":");
3428               break;
3429             }
3430           if (structdef == stagseen)
3431             structdef = scolonseen;
3432           break;
3433         case ';':
3434           if (definedef != dnone)
3435             break;
3436           switch (typdef)
3437             {
3438             case tend:
3439             case ttypeseen:
3440               make_C_tag (FALSE); /* a typedef */
3441               typdef = tnone;
3442               fvdef = fvnone;
3443               break;
3444             case tnone:
3445             case tinbody:
3446             case tignore:
3447               switch (fvdef)
3448                 {
3449                 case fignore:
3450                   if (typdef == tignore)
3451                     fvdef = fvnone;
3452                   break;
3453                 case fvnameseen:
3454                   if ((globals && cblev == 0 && (!fvextern || declarations))
3455                       || (members && instruct))
3456                     make_C_tag (FALSE); /* a variable */
3457                   fvextern = FALSE;
3458                   fvdef = fvnone;
3459                   token.valid = FALSE;
3460                   break;
3461                 case flistseen:
3462                   if ((declarations && typdef == tnone && !instruct)
3463                       || (members && typdef != tignore && instruct))
3464                     make_C_tag (TRUE);  /* a function declaration */
3465                   /* FALLTHRU */
3466                 default:
3467                   fvextern = FALSE;
3468                   fvdef = fvnone;
3469                   if (declarations
3470                       && structdef == stagseen && (c_ext & C_PLPL))
3471                     make_C_tag (FALSE); /* forward declaration */
3472                   else
3473                     /* The following instruction invalidates the token.
3474                        Probably the token should be invalidated in all other
3475                        cases where some state machine is reset prematurely. */
3476                     token.valid = FALSE;
3477                 } /* switch (fvdef) */
3478               /* FALLTHRU */
3479             default:
3480               if (!instruct)
3481                 typdef = tnone;
3482             }
3483           if (structdef == stagseen)
3484             structdef = snone;
3485           break;
3486         case ',':
3487           if (definedef != dnone)
3488             break;
3489           switch (objdef)
3490             {
3491             case omethodtag:
3492             case omethodparm:
3493               make_C_tag (TRUE); /* an Objective C method */
3494               objdef = oinbody;
3495               break;
3496             }
3497           switch (fvdef)
3498             {
3499             case fdefunkey:
3500             case foperator:
3501             case fstartlist:
3502             case finlist:
3503             case fignore:
3504             case vignore:
3505               break;
3506             case fdefunname:
3507               fvdef = fignore;
3508               break;
3509             case fvnameseen:    /* a variable */
3510               if ((globals && cblev == 0 && (!fvextern || declarations))
3511                   || (members && instruct))
3512                 make_C_tag (FALSE);
3513               break;
3514             case flistseen:     /* a function */
3515               if ((declarations && typdef == tnone && !instruct)
3516                   || (members && typdef != tignore && instruct))
3517                 {
3518                   make_C_tag (TRUE); /* a function declaration */
3519                   fvdef = fvnameseen;
3520                 }
3521               else if (!declarations)
3522                 fvdef = fvnone;
3523               token.valid = FALSE;
3524               break;
3525             default:
3526               fvdef = fvnone;
3527             }
3528           if (structdef == stagseen)
3529             structdef = snone;
3530           break;
3531         case '[':
3532           if (definedef != dnone)
3533             break;
3534           if (structdef == stagseen)
3535             structdef = snone;
3536           switch (typdef)
3537             {
3538             case ttypeseen:
3539             case tend:
3540               typdef = tignore;
3541               make_C_tag (FALSE);       /* a typedef */
3542               break;
3543             case tnone:
3544             case tinbody:
3545               switch (fvdef)
3546                 {
3547                 case foperator:
3548                 case finlist:
3549                 case fignore:
3550                 case vignore:
3551                   break;
3552                 case fvnameseen:
3553                   if ((members && cblev == 1)
3554                       || (globals && cblev == 0
3555                           && (!fvextern || declarations)))
3556                     make_C_tag (FALSE); /* a variable */
3557                   /* FALLTHRU */
3558                 default:
3559                   fvdef = fvnone;
3560                 }
3561               break;
3562             }
3563           break;
3564         case '(':
3565           if (definedef != dnone)
3566             break;
3567           if (objdef == otagseen && parlev == 0)
3568             objdef = oparenseen;
3569           switch (fvdef)
3570             {
3571             case fvnameseen:
3572               if (typdef == ttypeseen
3573                   && *lp != '*'
3574                   && !instruct)
3575                 {
3576                   /* This handles constructs like:
3577                      typedef void OperatorFun (int fun); */
3578                   make_C_tag (FALSE);
3579                   typdef = tignore;
3580                   fvdef = fignore;
3581                   break;
3582                 }
3583               /* FALLTHRU */
3584             case foperator:
3585               fvdef = fstartlist;
3586               break;
3587             case flistseen:
3588               fvdef = finlist;
3589               break;
3590             }
3591           parlev++;
3592           break;
3593         case ')':
3594           if (definedef != dnone)
3595             break;
3596           if (objdef == ocatseen && parlev == 1)
3597             {
3598               make_C_tag (TRUE); /* an Objective C category */
3599               objdef = oignore;
3600             }
3601           if (--parlev == 0)
3602             {
3603               switch (fvdef)
3604                 {
3605                 case fstartlist:
3606                 case finlist:
3607                   fvdef = flistseen;
3608                   break;
3609                 }
3610               if (!instruct
3611                   && (typdef == tend
3612                       || typdef == ttypeseen))
3613                 {
3614                   typdef = tignore;
3615                   make_C_tag (FALSE); /* a typedef */
3616                 }
3617             }
3618           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3619             parlev = 0;
3620           break;
3621         case '{':
3622           if (definedef != dnone)
3623             break;
3624           if (typdef == ttypeseen)
3625             {
3626               /* Whenever typdef is set to tinbody (currently only
3627                  here), typdefcblev should be set to cblev. */
3628               typdef = tinbody;
3629               typdefcblev = cblev;
3630             }
3631           switch (fvdef)
3632             {
3633             case flistseen:
3634               make_C_tag (TRUE);    /* a function */
3635               /* FALLTHRU */
3636             case fignore:
3637               fvdef = fvnone;
3638               break;
3639             case fvnone:
3640               switch (objdef)
3641                 {
3642                 case otagseen:
3643                   make_C_tag (TRUE); /* an Objective C class */
3644                   objdef = oignore;
3645                   break;
3646                 case omethodtag:
3647                 case omethodparm:
3648                   make_C_tag (TRUE); /* an Objective C method */
3649                   objdef = oinbody;
3650                   break;
3651                 default:
3652                   /* Neutralize `extern "C" {' grot. */
3653                   if (cblev == 0 && structdef == snone && nestlev == 0
3654                       && typdef == tnone)
3655                     cblev = -1;
3656                 }
3657               break;
3658             }
3659           switch (structdef)
3660             {
3661             case skeyseen:         /* unnamed struct */
3662               pushclass_above (cblev, NULL, 0);
3663               structdef = snone;
3664               break;
3665             case stagseen:         /* named struct or enum */
3666             case scolonseen:       /* a class */
3667               pushclass_above (cblev, token.line+token.offset, token.length);
3668               structdef = snone;
3669               make_C_tag (FALSE);  /* a struct or enum */
3670               break;
3671             }
3672           cblev++;
3673           break;
3674         case '*':
3675           if (definedef != dnone)
3676             break;
3677           if (fvdef == fstartlist)
3678             fvdef = fvnone;     /* avoid tagging `foo' in `foo (*bar()) ()' */
3679           break;
3680         case '}':
3681           if (definedef != dnone)
3682             break;
3683           if (!noindentypedefs && lp == newlb.buffer + 1)
3684             {
3685               cblev = 0;        /* reset curly brace level if first column */
3686               parlev = 0;       /* also reset paren level, just in case... */
3687             }
3688           else if (cblev > 0)
3689             cblev--;
3690           popclass_above (cblev);
3691           structdef = snone;
3692           /* Only if typdef == tinbody is typdefcblev significant. */
3693           if (typdef == tinbody && cblev <= typdefcblev)
3694             {
3695               assert (cblev == typdefcblev);
3696               typdef = tend;
3697             }
3698           break;
3699         case '=':
3700           if (definedef != dnone)
3701             break;
3702           switch (fvdef)
3703             {
3704             case foperator:
3705             case finlist:
3706             case fignore:
3707             case vignore:
3708               break;
3709             case fvnameseen:
3710               if ((members && cblev == 1)
3711                   || (globals && cblev == 0 && (!fvextern || declarations)))
3712                 make_C_tag (FALSE); /* a variable */
3713               /* FALLTHRU */
3714             default:
3715               fvdef = vignore;
3716             }
3717           break;
3718         case '<':
3719           if (cplpl && structdef == stagseen)
3720             {
3721               structdef = sintemplate;
3722               break;
3723             }
3724           goto resetfvdef;
3725         case '>':
3726           if (structdef == sintemplate)
3727             {
3728               structdef = stagseen;
3729               break;
3730             }
3731           goto resetfvdef;
3732         case '+':
3733         case '-':
3734           if (objdef == oinbody && cblev == 0)
3735             {
3736               objdef = omethodsign;
3737               break;
3738             }
3739           /* FALLTHRU */
3740         resetfvdef:
3741         case '#': case '~': case '&': case '%': case '/': case '|':
3742         case '^': case '!': case '.': case '?': case ']':
3743           if (definedef != dnone)
3744             break;
3745           /* These surely cannot follow a function tag in C. */
3746           switch (fvdef)
3747             {
3748             case foperator:
3749             case finlist:
3750             case fignore:
3751             case vignore:
3752               break;
3753             default:
3754               fvdef = fvnone;
3755             }
3756           break;
3757         case '\0':
3758           if (objdef == otagseen)
3759             {
3760               make_C_tag (TRUE); /* an Objective C class */
3761               objdef = oignore;
3762             }
3763           /* If a macro spans multiple lines don't reset its state. */
3764           if (quotednl)
3765             CNL_SAVE_DEFINEDEF ();
3766           else
3767             CNL ();
3768           break;
3769         } /* switch (c) */
3770
3771     } /* while not eof */
3772
3773   free (token_name.buffer);
3774   free (lbs[0].lb.buffer);
3775   free (lbs[1].lb.buffer);
3776 }
3777
3778 /*
3779  * Process either a C++ file or a C file depending on the setting
3780  * of a global flag.
3781  */
3782 static void
3783 default_C_entries (inf)
3784      FILE *inf;
3785 {
3786   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3787 }
3788
3789 /* Always do plain C. */
3790 static void
3791 plain_C_entries (inf)
3792      FILE *inf;
3793 {
3794   C_entries (0, inf);
3795 }
3796
3797 /* Always do C++. */
3798 static void
3799 Cplusplus_entries (inf)
3800      FILE *inf;
3801 {
3802   C_entries (C_PLPL, inf);
3803 }
3804
3805 /* Always do Java. */
3806 static void
3807 Cjava_entries (inf)
3808      FILE *inf;
3809 {
3810   C_entries (C_JAVA, inf);
3811 }
3812
3813 /* Always do C*. */
3814 static void
3815 Cstar_entries (inf)
3816      FILE *inf;
3817 {
3818   C_entries (C_STAR, inf);
3819 }
3820
3821 /* Always do Yacc. */
3822 static void
3823 Yacc_entries (inf)
3824      FILE *inf;
3825 {
3826   C_entries (YACC, inf);
3827 }
3828
3829 \f
3830 /* Useful macros. */
3831 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3832   for (;                        /* loop initialization */               \
3833        !feof (file_pointer)     /* loop test */                         \
3834        && (char_pointer = lb.buffer, /* instructions at start of loop */ \
3835            readline (&line_buffer, file_pointer),                       \
3836            TRUE);                                                       \
3837       )
3838 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */      \
3839   (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
3840    && notinname ((cp)[sizeof(keyword)-1])       /* end of keyword */    \
3841    && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
3842
3843 /*
3844  * Read a file, but do no processing.  This is used to do regexp
3845  * matching on files that have no language defined.
3846  */
3847 static void
3848 just_read_file (inf)
3849      FILE *inf;
3850 {
3851   register char *dummy;
3852
3853   LOOP_ON_INPUT_LINES (inf, lb, dummy)
3854     continue;
3855 }
3856
3857 \f
3858 /* Fortran parsing */
3859
3860 static void F_takeprec __P((void));
3861 static void F_getit __P((FILE *));
3862
3863 static void
3864 F_takeprec ()
3865 {
3866   dbp = skip_spaces (dbp);
3867   if (*dbp != '*')
3868     return;
3869   dbp++;
3870   dbp = skip_spaces (dbp);
3871   if (strneq (dbp, "(*)", 3))
3872     {
3873       dbp += 3;
3874       return;
3875     }
3876   if (!ISDIGIT (*dbp))
3877     {
3878       --dbp;                    /* force failure */
3879       return;
3880     }
3881   do
3882     dbp++;
3883   while (ISDIGIT (*dbp));
3884 }
3885
3886 static void
3887 F_getit (inf)
3888      FILE *inf;
3889 {
3890   register char *cp;
3891
3892   dbp = skip_spaces (dbp);
3893   if (*dbp == '\0')
3894     {
3895       readline (&lb, inf);
3896       dbp = lb.buffer;
3897       if (dbp[5] != '&')
3898         return;
3899       dbp += 6;
3900       dbp = skip_spaces (dbp);
3901     }
3902   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3903     return;
3904   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3905     continue;
3906   pfnote (savenstr (dbp, cp-dbp), TRUE,
3907           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3908 }
3909
3910
3911 static void
3912 Fortran_functions (inf)
3913      FILE *inf;
3914 {
3915   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3916     {
3917       if (*dbp == '%')
3918         dbp++;                  /* Ratfor escape to fortran */
3919       dbp = skip_spaces (dbp);
3920       if (*dbp == '\0')
3921         continue;
3922       switch (lowcase (*dbp))
3923         {
3924         case 'i':
3925           if (nocase_tail ("integer"))
3926             F_takeprec ();
3927           break;
3928         case 'r':
3929           if (nocase_tail ("real"))
3930             F_takeprec ();
3931           break;
3932         case 'l':
3933           if (nocase_tail ("logical"))
3934             F_takeprec ();
3935           break;
3936         case 'c':
3937           if (nocase_tail ("complex") || nocase_tail ("character"))
3938             F_takeprec ();
3939           break;
3940         case 'd':
3941           if (nocase_tail ("double"))
3942             {
3943               dbp = skip_spaces (dbp);
3944               if (*dbp == '\0')
3945                 continue;
3946               if (nocase_tail ("precision"))
3947                 break;
3948               continue;
3949             }
3950           break;
3951         }
3952       dbp = skip_spaces (dbp);
3953       if (*dbp == '\0')
3954         continue;
3955       switch (lowcase (*dbp))
3956         {
3957         case 'f':
3958           if (nocase_tail ("function"))
3959             F_getit (inf);
3960           continue;
3961         case 's':
3962           if (nocase_tail ("subroutine"))
3963             F_getit (inf);
3964           continue;
3965         case 'e':
3966           if (nocase_tail ("entry"))
3967             F_getit (inf);
3968           continue;
3969         case 'b':
3970           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
3971             {
3972               dbp = skip_spaces (dbp);
3973               if (*dbp == '\0') /* assume un-named */
3974                 pfnote (savestr ("blockdata"), TRUE,
3975                         lb.buffer, dbp - lb.buffer, lineno, linecharno);
3976               else
3977                 F_getit (inf);  /* look for name */
3978             }
3979           continue;
3980         }
3981     }
3982 }
3983
3984 \f
3985 /*
3986  * Ada parsing
3987  * Original code by
3988  * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3989  */
3990
3991 static void Ada_getit __P((FILE *, char *));
3992
3993 /* Once we are positioned after an "interesting" keyword, let's get
3994    the real tag value necessary. */
3995 static void
3996 Ada_getit (inf, name_qualifier)
3997      FILE *inf;
3998      char *name_qualifier;
3999 {
4000   register char *cp;
4001   char *name;
4002   char c;
4003
4004   while (!feof (inf))
4005     {
4006       dbp = skip_spaces (dbp);
4007       if (*dbp == '\0'
4008           || (dbp[0] == '-' && dbp[1] == '-'))
4009         {
4010           readline (&lb, inf);
4011           dbp = lb.buffer;
4012         }
4013       switch (lowcase(*dbp))
4014         {
4015         case 'b':
4016           if (nocase_tail ("body"))
4017             {
4018               /* Skipping body of   procedure body   or   package body or ....
4019                  resetting qualifier to body instead of spec. */
4020               name_qualifier = "/b";
4021               continue;
4022             }
4023           break;
4024         case 't':
4025           /* Skipping type of   task type   or   protected type ... */
4026           if (nocase_tail ("type"))
4027             continue;
4028           break;
4029         }
4030       if (*dbp == '"')
4031         {
4032           dbp += 1;
4033           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4034             continue;
4035         }
4036       else
4037         {
4038           dbp = skip_spaces (dbp);
4039           for (cp = dbp;
4040                (*cp != '\0'
4041                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4042                cp++)
4043             continue;
4044           if (cp == dbp)
4045             return;
4046         }
4047       c = *cp;
4048       *cp = '\0';
4049       name = concat (dbp, name_qualifier, "");
4050       *cp = c;
4051       pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4052       if (c == '"')
4053         dbp = cp + 1;
4054       return;
4055     }
4056 }
4057
4058 static void
4059 Ada_funcs (inf)
4060      FILE *inf;
4061 {
4062   bool inquote = FALSE;
4063
4064   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4065     {
4066       while (*dbp != '\0')
4067         {
4068           /* Skip a string i.e. "abcd". */
4069           if (inquote || (*dbp == '"'))
4070             {
4071               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4072               if (dbp != NULL)
4073                 {
4074                   inquote = FALSE;
4075                   dbp += 1;
4076                   continue;     /* advance char */
4077                 }
4078               else
4079                 {
4080                   inquote = TRUE;
4081                   break;        /* advance line */
4082                 }
4083             }
4084
4085           /* Skip comments. */
4086           if (dbp[0] == '-' && dbp[1] == '-')
4087             break;              /* advance line */
4088
4089           /* Skip character enclosed in single quote i.e. 'a'
4090              and skip single quote starting an attribute i.e. 'Image. */
4091           if (*dbp == '\'')
4092             {
4093               dbp++ ;
4094               if (*dbp != '\0')
4095                 dbp++;
4096               continue;
4097             }
4098
4099           /* Search for beginning of a token.  */
4100           if (!begtoken (*dbp))
4101             {
4102               dbp++;
4103               continue;         /* advance char */
4104             }
4105
4106           /* We are at the beginning of a token. */
4107           switch (lowcase(*dbp))
4108             {
4109             case 'f':
4110               if (!packages_only && nocase_tail ("function"))
4111                 Ada_getit (inf, "/f");
4112               else
4113                 break;          /* from switch */
4114               continue;         /* advance char */
4115             case 'p':
4116               if (!packages_only && nocase_tail ("procedure"))
4117                 Ada_getit (inf, "/p");
4118               else if (nocase_tail ("package"))
4119                 Ada_getit (inf, "/s");
4120               else if (nocase_tail ("protected")) /* protected type */
4121                 Ada_getit (inf, "/t");
4122               else
4123                 break;          /* from switch */
4124               continue;         /* advance char */
4125             case 't':
4126               if (!packages_only && nocase_tail ("task"))
4127                 Ada_getit (inf, "/k");
4128               else if (typedefs && !packages_only && nocase_tail ("type"))
4129                 {
4130                   Ada_getit (inf, "/t");
4131                   while (*dbp != '\0')
4132                     dbp += 1;
4133                 }
4134               else
4135                 break;          /* from switch */
4136               continue;         /* advance char */
4137             }
4138
4139           /* Look for the end of the token. */
4140           while (!endtoken (*dbp))
4141             dbp++;
4142
4143         } /* advance char */
4144     } /* advance line */
4145 }
4146
4147 \f
4148 /*
4149  * Unix and microcontroller assembly tag handling
4150  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4151  * Idea by Bob Weiner, Motorola Inc. (1994)
4152  */
4153 static void
4154 Asm_labels (inf)
4155      FILE *inf;
4156 {
4157   register char *cp;
4158
4159   LOOP_ON_INPUT_LINES (inf, lb, cp)
4160     {
4161       /* If first char is alphabetic or one of [_.$], test for colon
4162          following identifier. */
4163       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4164         {
4165           /* Read past label. */
4166           cp++;
4167           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4168             cp++;
4169           if (*cp == ':' || iswhite (*cp))
4170             {
4171               /* Found end of label, so copy it and add it to the table. */
4172               pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
4173                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4174             }
4175         }
4176     }
4177 }
4178
4179 \f
4180 /*
4181  * Perl support
4182  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4183  * Perl variable names: /^(my|local).../
4184  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4185  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4186  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4187  */
4188 static void
4189 Perl_functions (inf)
4190      FILE *inf;
4191 {
4192   char *package = savestr ("main"); /* current package name */
4193   register char *cp;
4194
4195   LOOP_ON_INPUT_LINES (inf, lb, cp)
4196     {
4197       skip_spaces(cp);
4198
4199       if (LOOKING_AT (cp, "package"))
4200         {
4201           free (package);
4202           package = get_tag (cp);
4203           if (package == NULL)  /* can't parse package name */
4204             package = savestr ("");
4205           else
4206             package = savestr(package); /* make a copy */
4207         }
4208       else if (LOOKING_AT (cp, "sub"))
4209         {
4210           char *name, *fullname, *pos;
4211           char *sp = cp;
4212
4213           while (!notinname (*cp))
4214             cp++;
4215           if (cp == sp)
4216             continue;
4217           name = savenstr (sp, cp-sp);
4218           if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
4219             fullname = name;
4220           else
4221             fullname = concat (package, "::", name);
4222           pfnote (fullname, TRUE,
4223                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4224           if (name != fullname)
4225             free (name);
4226         }
4227        else if (globals         /* only if tagging global vars is enabled */
4228                 && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
4229         {
4230           /* After "my" or "local", but before any following paren or space. */
4231           char *varname = NULL;
4232
4233           if (*cp == '$' || *cp == '@' || *cp == '%')
4234             {
4235               char* varstart = ++cp;
4236               while (ISALNUM (*cp) || *cp == '_')
4237                 cp++;
4238               varname = savenstr (varstart, cp-varstart);
4239             }
4240           else
4241             {
4242               /* Should be examining a variable list at this point;
4243                  could insist on seeing an open parenthesis. */
4244               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4245                 cp++;
4246             }
4247
4248           /* Perhaps I should back cp up one character, so the TAGS table
4249              doesn't mention (and so depend upon) the following char. */
4250           pfnote (varname, FALSE,
4251                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4252         }
4253     }
4254 }
4255
4256
4257 /*
4258  * Python support
4259  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4260  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4261  * More ideas by seb bacon <seb@jamkit.com> (2002)
4262  */
4263 static void
4264 Python_functions (inf)
4265      FILE *inf;
4266 {
4267   register char *cp;
4268
4269   LOOP_ON_INPUT_LINES (inf, lb, cp)
4270     {
4271       cp = skip_spaces (cp);
4272       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4273         {
4274           char *name = cp;
4275           while (!notinname (*cp) && *cp != ':')
4276             cp++;
4277           pfnote (savenstr (name, cp-name), TRUE,
4278                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4279         }
4280     }
4281 }
4282
4283 \f
4284 /*
4285  * PHP support
4286  * Look for:
4287  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4288  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4289  *  - /^[ \t]*define\(\"[^\"]+/
4290  * Only with --members:
4291  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4292  * Idea by Diez B. Roggisch (2001)
4293  */
4294 static void
4295 PHP_functions (inf)
4296      FILE *inf;
4297 {
4298   register char *cp, *name;
4299   bool search_identifier = FALSE;
4300
4301   LOOP_ON_INPUT_LINES (inf, lb, cp)
4302     {
4303       cp = skip_spaces (cp);
4304       name = cp;
4305       if (search_identifier
4306           && *cp != '\0')
4307         {
4308           while (!notinname (*cp))
4309             cp++;
4310           pfnote (savenstr (name, cp-name), TRUE,
4311                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4312           search_identifier = FALSE;
4313         }
4314       else if (LOOKING_AT (cp, "function"))
4315         {
4316           if(*cp == '&')
4317             cp = skip_spaces (cp+1);
4318           if(*cp != '\0')
4319             {
4320               name = cp;
4321               while (!notinname (*cp))
4322                 cp++;
4323               pfnote (savenstr (name, cp-name), TRUE,
4324                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4325             }
4326           else
4327             search_identifier = TRUE;
4328         }
4329       else if (LOOKING_AT (cp, "class"))
4330         {
4331           if (*cp != '\0')
4332             {
4333               name = cp;
4334               while (*cp != '\0' && !iswhite (*cp))
4335                 cp++;
4336               pfnote (savenstr (name, cp-name), FALSE,
4337                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4338             }
4339           else
4340             search_identifier = TRUE;
4341         }
4342       else if (strneq (cp, "define", 6)
4343                && (cp = skip_spaces (cp+6))
4344                && *cp++ == '('
4345                && (*cp == '"' || *cp == '\''))
4346         {
4347           char quote = *cp++;
4348           name = cp;
4349           while (*cp != quote && *cp != '\0')
4350             cp++;
4351           pfnote (savenstr (name, cp-name), FALSE,
4352                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4353         }
4354       else if (members
4355                && LOOKING_AT (cp, "var")
4356                && *cp == '$')
4357         {
4358           name = cp;
4359           while (!notinname(*cp))
4360             cp++;
4361           pfnote (savenstr (name, cp-name), FALSE,
4362                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4363         }
4364     }
4365 }
4366
4367 \f
4368 /*
4369  * Cobol tag functions
4370  * We could look for anything that could be a paragraph name.
4371  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4372  * Idea by Corny de Souza (1993)
4373  */
4374 static void
4375 Cobol_paragraphs (inf)
4376      FILE *inf;
4377 {
4378   register char *bp, *ep;
4379
4380   LOOP_ON_INPUT_LINES (inf, lb, bp)
4381     {
4382       if (lb.len < 9)
4383         continue;
4384       bp += 8;
4385
4386       /* If eoln, compiler option or comment ignore whole line. */
4387       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4388         continue;
4389
4390       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4391         continue;
4392       if (*ep++ == '.')
4393         pfnote (savenstr (bp, ep-bp), TRUE,
4394                 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4395     }
4396 }
4397
4398 \f
4399 /*
4400  * Makefile support
4401  * Idea by Assar Westerlund <assar@sics.se> (2001)
4402  */
4403 static void
4404 Makefile_targets (inf)
4405      FILE *inf;
4406 {
4407   register char *bp;
4408
4409   LOOP_ON_INPUT_LINES (inf, lb, bp)
4410     {
4411       if (*bp == '\t' || *bp == '#')
4412         continue;
4413       while (*bp != '\0' && *bp != '=' && *bp != ':')
4414         bp++;
4415       if (*bp == ':')
4416         pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4417                 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4418     }
4419 }
4420
4421 \f
4422 /*
4423  * Pascal parsing
4424  * Original code by Mosur K. Mohan (1989)
4425  *
4426  *  Locates tags for procedures & functions.  Doesn't do any type- or
4427  *  var-definitions.  It does look for the keyword "extern" or
4428  *  "forward" immediately following the procedure statement; if found,
4429  *  the tag is skipped.
4430  */
4431 static void
4432 Pascal_functions (inf)
4433      FILE *inf;
4434 {
4435   linebuffer tline;             /* mostly copied from C_entries */
4436   long save_lcno;
4437   int save_lineno, save_len;
4438   char c, *cp, *namebuf;
4439
4440   bool                          /* each of these flags is TRUE iff: */
4441     incomment,                  /* point is inside a comment */
4442     inquote,                    /* point is inside '..' string */
4443     get_tagname,                /* point is after PROCEDURE/FUNCTION
4444                                    keyword, so next item = potential tag */
4445     found_tag,                  /* point is after a potential tag */
4446     inparms,                    /* point is within parameter-list */
4447     verify_tag;                 /* point has passed the parm-list, so the
4448                                    next token will determine whether this
4449                                    is a FORWARD/EXTERN to be ignored, or
4450                                    whether it is a real tag */
4451
4452   save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4453   namebuf = NULL;               /* keep compiler quiet */
4454   dbp = lb.buffer;
4455   *dbp = '\0';
4456   initbuffer (&tline);
4457
4458   incomment = inquote = FALSE;
4459   found_tag = FALSE;            /* have a proc name; check if extern */
4460   get_tagname = FALSE;          /* have found "procedure" keyword    */
4461   inparms = FALSE;              /* found '(' after "proc"            */
4462   verify_tag = FALSE;           /* check if "extern" is ahead        */
4463
4464
4465   while (!feof (inf))           /* long main loop to get next char */
4466     {
4467       c = *dbp++;
4468       if (c == '\0')            /* if end of line */
4469         {
4470           readline (&lb, inf);
4471           dbp = lb.buffer;
4472           if (*dbp == '\0')
4473             continue;
4474           if (!((found_tag && verify_tag)
4475                 || get_tagname))
4476             c = *dbp++;         /* only if don't need *dbp pointing
4477                                    to the beginning of the name of
4478                                    the procedure or function */
4479         }
4480       if (incomment)
4481         {
4482           if (c == '}')         /* within { } comments */
4483             incomment = FALSE;
4484           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4485             {
4486               dbp++;
4487               incomment = FALSE;
4488             }
4489           continue;
4490         }
4491       else if (inquote)
4492         {
4493           if (c == '\'')
4494             inquote = FALSE;
4495           continue;
4496         }
4497       else
4498         switch (c)
4499           {
4500           case '\'':
4501             inquote = TRUE;     /* found first quote */
4502             continue;
4503           case '{':             /* found open { comment */
4504             incomment = TRUE;
4505             continue;
4506           case '(':
4507             if (*dbp == '*')    /* found open (* comment */
4508               {
4509                 incomment = TRUE;
4510                 dbp++;
4511               }
4512             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4513               inparms = TRUE;
4514             continue;
4515           case ')':             /* end of parms list */
4516             if (inparms)
4517               inparms = FALSE;
4518             continue;
4519           case ';':
4520             if (found_tag && !inparms) /* end of proc or fn stmt */
4521               {
4522                 verify_tag = TRUE;
4523                 break;
4524               }
4525             continue;
4526           }
4527       if (found_tag && verify_tag && (*dbp != ' '))
4528         {
4529           /* check if this is an "extern" declaration */
4530           if (*dbp == '\0')
4531             continue;
4532           if (lowcase (*dbp == 'e'))
4533             {
4534               if (nocase_tail ("extern")) /* superfluous, really! */
4535                 {
4536                   found_tag = FALSE;
4537                   verify_tag = FALSE;
4538                 }
4539             }
4540           else if (lowcase (*dbp) == 'f')
4541             {
4542               if (nocase_tail ("forward")) /*  check for forward reference */
4543                 {
4544                   found_tag = FALSE;
4545                   verify_tag = FALSE;
4546                 }
4547             }
4548           if (found_tag && verify_tag) /* not external proc, so make tag */
4549             {
4550               found_tag = FALSE;
4551               verify_tag = FALSE;
4552               pfnote (namebuf, TRUE,
4553                       tline.buffer, save_len, save_lineno, save_lcno);
4554               continue;
4555             }
4556         }
4557       if (get_tagname)          /* grab name of proc or fn */
4558         {
4559           if (*dbp == '\0')
4560             continue;
4561
4562           /* save all values for later tagging */
4563           linebuffer_setlen (&tline, lb.len);
4564           strcpy (tline.buffer, lb.buffer);
4565           save_lineno = lineno;
4566           save_lcno = linecharno;
4567
4568           /* grab block name */
4569           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4570             continue;
4571           namebuf = savenstr (dbp, cp-dbp);
4572           dbp = cp;             /* set dbp to e-o-token */
4573           save_len = dbp - lb.buffer + 1;
4574           get_tagname = FALSE;
4575           found_tag = TRUE;
4576           continue;
4577
4578           /* and proceed to check for "extern" */
4579         }
4580       else if (!incomment && !inquote && !found_tag)
4581         {
4582           /* check for proc/fn keywords */
4583           switch (lowcase (c))
4584             {
4585             case 'p':
4586               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4587                 get_tagname = TRUE;
4588               continue;
4589             case 'f':
4590               if (nocase_tail ("unction"))
4591                 get_tagname = TRUE;
4592               continue;
4593             }
4594         }
4595     }                           /* while not eof */
4596
4597   free (tline.buffer);
4598 }
4599
4600 \f
4601 /*
4602  * Lisp tag functions
4603  *  look for (def or (DEF, quote or QUOTE
4604  */
4605
4606 static void L_getit __P((void));
4607
4608 static void
4609 L_getit ()
4610 {
4611   if (*dbp == '\'')             /* Skip prefix quote */
4612     dbp++;
4613   else if (*dbp == '(')
4614   {
4615     dbp++;
4616     /* Try to skip "(quote " */
4617     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4618       /* Ok, then skip "(" before name in (defstruct (foo)) */
4619       dbp = skip_spaces (dbp);
4620   }
4621   get_tag (dbp);
4622 }
4623
4624 static void
4625 Lisp_functions (inf)
4626      FILE *inf;
4627 {
4628   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4629     {
4630       if (dbp[0] != '(')
4631         continue;
4632
4633       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4634         {
4635           dbp = skip_non_spaces (dbp);
4636           dbp = skip_spaces (dbp);
4637           L_getit ();
4638         }
4639       else
4640         {
4641           /* Check for (foo::defmumble name-defined ... */
4642           do
4643             dbp++;
4644           while (!notinname (*dbp) && *dbp != ':');
4645           if (*dbp == ':')
4646             {
4647               do
4648                 dbp++;
4649               while (*dbp == ':');
4650
4651               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4652                 {
4653                   dbp = skip_non_spaces (dbp);
4654                   dbp = skip_spaces (dbp);
4655                   L_getit ();
4656                 }
4657             }
4658         }
4659     }
4660 }
4661
4662 \f
4663 /*
4664  * Postscript tag functions
4665  * Just look for lines where the first character is '/'
4666  * Also look at "defineps" for PSWrap
4667  * Ideas by:
4668  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4669  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4670  */
4671 static void
4672 Postscript_functions (inf)
4673      FILE *inf;
4674 {
4675   register char *bp, *ep;
4676
4677   LOOP_ON_INPUT_LINES (inf, lb, bp)
4678     {
4679       if (bp[0] == '/')
4680         {
4681           for (ep = bp+1;
4682                *ep != '\0' && *ep != ' ' && *ep != '{';
4683                ep++)
4684             continue;
4685           pfnote (savenstr (bp, ep-bp), TRUE,
4686                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4687         }
4688       else if (LOOKING_AT (bp, "defineps"))
4689         get_tag (bp);
4690     }
4691 }
4692
4693 \f
4694 /*
4695  * Scheme tag functions
4696  * look for (def... xyzzy
4697  *          (def... (xyzzy
4698  *          (def ... ((...(xyzzy ....
4699  *          (set! xyzzy
4700  * Original code by Ken Haase (1985?)
4701  */
4702
4703 static void
4704 Scheme_functions (inf)
4705      FILE *inf;
4706 {
4707   register char *bp;
4708
4709   LOOP_ON_INPUT_LINES (inf, lb, bp)
4710     {
4711       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4712         {
4713           bp = skip_non_spaces (bp+4);
4714           /* Skip over open parens and white space */
4715           while (notinname (*bp))
4716             bp++;
4717           get_tag (bp);
4718         }
4719       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4720         get_tag (bp);
4721     }
4722 }
4723
4724 \f
4725 /* Find tags in TeX and LaTeX input files.  */
4726
4727 /* TEX_toktab is a table of TeX control sequences that define tags.
4728  * Each entry records one such control sequence.
4729  *
4730  * Original code from who knows whom.
4731  * Ideas by:
4732  *   Stefan Monnier (2002)
4733  */
4734
4735 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4736
4737 /* Default set of control sequences to put into TEX_toktab.
4738    The value of environment var TEXTAGS is prepended to this.  */
4739 static char *TEX_defenv = "\
4740 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4741 :part:appendix:entry:index:def\
4742 :newcommand:renewcommand:newenvironment:renewenvironment";
4743
4744 static void TEX_mode __P((FILE *));
4745 static void TEX_decode_env __P((char *, char *));
4746
4747 static char TEX_esc = '\\';
4748 static char TEX_opgrp = '{';
4749 static char TEX_clgrp = '}';
4750
4751 /*
4752  * TeX/LaTeX scanning loop.
4753  */
4754 static void
4755 TeX_commands (inf)
4756      FILE *inf;
4757 {
4758   char *cp;
4759   linebuffer *key;
4760
4761   /* Select either \ or ! as escape character.  */
4762   TEX_mode (inf);
4763
4764   /* Initialize token table once from environment. */
4765   if (TEX_toktab == NULL)
4766     TEX_decode_env ("TEXTAGS", TEX_defenv);
4767
4768   LOOP_ON_INPUT_LINES (inf, lb, cp)
4769     {
4770       /* Look at each TEX keyword in line. */
4771       for (;;)
4772         {
4773           /* Look for a TEX escape. */
4774           while (*cp++ != TEX_esc)
4775             if (cp[-1] == '\0' || cp[-1] == '%')
4776               goto tex_next_line;
4777
4778           for (key = TEX_toktab; key->buffer != NULL; key++)
4779             if (strneq (cp, key->buffer, key->len))
4780               {
4781                 register char *p;
4782                 char *name;
4783                 int linelen;
4784                 bool opgrp = FALSE;
4785
4786                 cp = skip_spaces (cp + key->len);
4787                 if (*cp == TEX_opgrp)
4788                   {
4789                     opgrp = TRUE;
4790                     cp++;
4791                   }
4792                 for (p = cp;
4793                      (!iswhite (*p) && *p != '#' &&
4794                       *p != TEX_opgrp && *p != TEX_clgrp);
4795                      p++)
4796                   continue;
4797                 name = savenstr (cp, p-cp);
4798                 linelen = lb.len;
4799                 if (!opgrp || *p == TEX_clgrp)
4800                   {
4801                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4802                       *p++;
4803                     linelen = p - lb.buffer + 1;
4804                   }
4805                 pfnote (name, TRUE, lb.buffer, linelen, lineno, linecharno);
4806                 goto tex_next_line; /* We only tag a line once */
4807               }
4808         }
4809     tex_next_line:
4810       ;
4811     }
4812 }
4813
4814 #define TEX_LESC '\\'
4815 #define TEX_SESC '!'
4816
4817 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4818    chars accordingly. */
4819 static void
4820 TEX_mode (inf)
4821      FILE *inf;
4822 {
4823   int c;
4824
4825   while ((c = getc (inf)) != EOF)
4826     {
4827       /* Skip to next line if we hit the TeX comment char. */
4828       if (c == '%')
4829         while (c != '\n')
4830           c = getc (inf);
4831       else if (c == TEX_LESC || c == TEX_SESC )
4832         break;
4833     }
4834
4835   if (c == TEX_LESC)
4836     {
4837       TEX_esc = TEX_LESC;
4838       TEX_opgrp = '{';
4839       TEX_clgrp = '}';
4840     }
4841   else
4842     {
4843       TEX_esc = TEX_SESC;
4844       TEX_opgrp = '<';
4845       TEX_clgrp = '>';
4846     }
4847   /* If the input file is compressed, inf is a pipe, and rewind may fail.
4848      No attempt is made to correct the situation. */
4849   rewind (inf);
4850 }
4851
4852 /* Read environment and prepend it to the default string.
4853    Build token table. */
4854 static void
4855 TEX_decode_env (evarname, defenv)
4856      char *evarname;
4857      char *defenv;
4858 {
4859   register char *env, *p;
4860   int i, len;
4861
4862   /* Append default string to environment. */
4863   env = getenv (evarname);
4864   if (!env)
4865     env = defenv;
4866   else
4867     {
4868       char *oldenv = env;
4869       env = concat (oldenv, defenv, "");
4870     }
4871
4872   /* Allocate a token table */
4873   for (len = 1, p = env; p;)
4874     if ((p = etags_strchr (p, ':')) && *++p != '\0')
4875       len++;
4876   TEX_toktab = xnew (len, linebuffer);
4877
4878   /* Unpack environment string into token table. Be careful about */
4879   /* zero-length strings (leading ':', "::" and trailing ':') */
4880   for (i = 0; *env != '\0';)
4881     {
4882       p = etags_strchr (env, ':');
4883       if (!p)                   /* End of environment string. */
4884         p = env + strlen (env);
4885       if (p - env > 0)
4886         {                       /* Only non-zero strings. */
4887           TEX_toktab[i].buffer = savenstr (env, p - env);
4888           TEX_toktab[i].len = p - env;
4889           i++;
4890         }
4891       if (*p)
4892         env = p + 1;
4893       else
4894         {
4895           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
4896           TEX_toktab[i].len = 0;
4897           break;
4898         }
4899     }
4900 }
4901
4902 \f
4903 /* Texinfo support.  Dave Love, Mar. 2000.  */
4904 static void
4905 Texinfo_nodes (inf)
4906      FILE * inf;
4907 {
4908   char *cp, *start;
4909   LOOP_ON_INPUT_LINES (inf, lb, cp)
4910     if (LOOKING_AT (cp, "@node"))
4911       {
4912         start = cp;
4913         while (*cp != '\0' && *cp != ',')
4914           cp++;
4915         pfnote (savenstr (start, cp - start), TRUE,
4916                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4917       }
4918 }
4919
4920 \f
4921 /*
4922  * Prolog support
4923  *
4924  * Assumes that the predicate or rule starts at column 0.
4925  * Only the first clause of a predicate or rule is added.
4926  * Original code by Sunichirou Sugou (1989)
4927  * Rewritten by Anders Lindgren (1996)
4928  */
4929 static int prolog_pr __P((char *, char *));
4930 static void prolog_skip_comment __P((linebuffer *, FILE *));
4931 static int prolog_atom __P((char *, int));
4932
4933 static void
4934 Prolog_functions (inf)
4935      FILE *inf;
4936 {
4937   char *cp, *last;
4938   int len;
4939   int allocated;
4940
4941   allocated = 0;
4942   len = 0;
4943   last = NULL;
4944
4945   LOOP_ON_INPUT_LINES (inf, lb, cp)
4946     {
4947       if (cp[0] == '\0')        /* Empty line */
4948         continue;
4949       else if (iswhite (cp[0])) /* Not a predicate */
4950         continue;
4951       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
4952         prolog_skip_comment (&lb, inf);
4953       else if ((len = prolog_pr (cp, last)) > 0)
4954         {
4955           /* Predicate or rule.  Store the function name so that we
4956              only generate a tag for the first clause.  */
4957           if (last == NULL)
4958             last = xnew(len + 1, char);
4959           else if (len + 1 > allocated)
4960             xrnew (last, len + 1, char);
4961           allocated = len + 1;
4962           strncpy (last, cp, len);
4963           last[len] = '\0';
4964         }
4965     }
4966 }
4967
4968
4969 static void
4970 prolog_skip_comment (plb, inf)
4971      linebuffer *plb;
4972      FILE *inf;
4973 {
4974   char *cp;
4975
4976   do
4977     {
4978       for (cp = plb->buffer; *cp != '\0'; cp++)
4979         if (cp[0] == '*' && cp[1] == '/')
4980           return;
4981       readline (plb, inf);
4982     }
4983   while (!feof(inf));
4984 }
4985
4986 /*
4987  * A predicate or rule definition is added if it matches:
4988  *     <beginning of line><Prolog Atom><whitespace>(
4989  * or  <beginning of line><Prolog Atom><whitespace>:-
4990  *
4991  * It is added to the tags database if it doesn't match the
4992  * name of the previous clause header.
4993  *
4994  * Return the size of the name of the predicate or rule, or 0 if no
4995  * header was found.
4996  */
4997 static int
4998 prolog_pr (s, last)
4999      char *s;
5000      char *last;                /* Name of last clause. */
5001 {
5002   int pos;
5003   int len;
5004
5005   pos = prolog_atom (s, 0);
5006   if (pos < 1)
5007     return 0;
5008
5009   len = pos;
5010   pos = skip_spaces (s + pos) - s;
5011
5012   if ((s[pos] == '.'
5013        || (s[pos] == '(' && (pos += 1))
5014        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5015       && (last == NULL          /* save only the first clause */
5016           || len != strlen (last)
5017           || !strneq (s, last, len)))
5018         {
5019           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5020           return len;
5021         }
5022   else
5023     return 0;
5024 }
5025
5026 /*
5027  * Consume a Prolog atom.
5028  * Return the number of bytes consumed, or -1 if there was an error.
5029  *
5030  * A prolog atom, in this context, could be one of:
5031  * - An alphanumeric sequence, starting with a lower case letter.
5032  * - A quoted arbitrary string. Single quotes can escape themselves.
5033  *   Backslash quotes everything.
5034  */
5035 static int
5036 prolog_atom (s, pos)
5037      char *s;
5038      int pos;
5039 {
5040   int origpos;
5041
5042   origpos = pos;
5043
5044   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5045     {
5046       /* The atom is unquoted. */
5047       pos++;
5048       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5049         {
5050           pos++;
5051         }
5052       return pos - origpos;
5053     }
5054   else if (s[pos] == '\'')
5055     {
5056       pos++;
5057
5058       for (;;)
5059         {
5060           if (s[pos] == '\'')
5061             {
5062               pos++;
5063               if (s[pos] != '\'')
5064                 break;
5065               pos++;            /* A double quote */
5066             }
5067           else if (s[pos] == '\0')
5068             /* Multiline quoted atoms are ignored. */
5069             return -1;
5070           else if (s[pos] == '\\')
5071             {
5072               if (s[pos+1] == '\0')
5073                 return -1;
5074               pos += 2;
5075             }
5076           else
5077             pos++;
5078         }
5079       return pos - origpos;
5080     }
5081   else
5082     return -1;
5083 }
5084
5085 \f
5086 /*
5087  * Support for Erlang
5088  *
5089  * Generates tags for functions, defines, and records.
5090  * Assumes that Erlang functions start at column 0.
5091  * Original code by Anders Lindgren (1996)
5092  */
5093 static int erlang_func __P((char *, char *));
5094 static void erlang_attribute __P((char *));
5095 static int erlang_atom __P((char *, int));
5096
5097 static void
5098 Erlang_functions (inf)
5099      FILE *inf;
5100 {
5101   char *cp, *last;
5102   int len;
5103   int allocated;
5104
5105   allocated = 0;
5106   len = 0;
5107   last = NULL;
5108
5109   LOOP_ON_INPUT_LINES (inf, lb, cp)
5110     {
5111       if (cp[0] == '\0')        /* Empty line */
5112         continue;
5113       else if (iswhite (cp[0])) /* Not function nor attribute */
5114         continue;
5115       else if (cp[0] == '%')    /* comment */
5116         continue;
5117       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5118         continue;
5119       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5120         {
5121           erlang_attribute (cp);
5122           last = NULL;
5123         }
5124       else if ((len = erlang_func (cp, last)) > 0)
5125         {
5126           /*
5127            * Function.  Store the function name so that we only
5128            * generates a tag for the first clause.
5129            */
5130           if (last == NULL)
5131             last = xnew (len + 1, char);
5132           else if (len + 1 > allocated)
5133             xrnew (last, len + 1, char);
5134           allocated = len + 1;
5135           strncpy (last, cp, len);
5136           last[len] = '\0';
5137         }
5138     }
5139 }
5140
5141
5142 /*
5143  * A function definition is added if it matches:
5144  *     <beginning of line><Erlang Atom><whitespace>(
5145  *
5146  * It is added to the tags database if it doesn't match the
5147  * name of the previous clause header.
5148  *
5149  * Return the size of the name of the function, or 0 if no function
5150  * was found.
5151  */
5152 static int
5153 erlang_func (s, last)
5154      char *s;
5155      char *last;                /* Name of last clause. */
5156 {
5157   int pos;
5158   int len;
5159
5160   pos = erlang_atom (s, 0);
5161   if (pos < 1)
5162     return 0;
5163
5164   len = pos;
5165   pos = skip_spaces (s + pos) - s;
5166
5167   /* Save only the first clause. */
5168   if (s[pos++] == '('
5169       && (last == NULL
5170           || len != (int)strlen (last)
5171           || !strneq (s, last, len)))
5172         {
5173           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5174           return len;
5175         }
5176
5177   return 0;
5178 }
5179
5180
5181 /*
5182  * Handle attributes.  Currently, tags are generated for defines
5183  * and records.
5184  *
5185  * They are on the form:
5186  * -define(foo, bar).
5187  * -define(Foo(M, N), M+N).
5188  * -record(graph, {vtab = notable, cyclic = true}).
5189  */
5190 static void
5191 erlang_attribute (s)
5192      char *s;
5193 {
5194   int pos;
5195   int len;
5196
5197   if (LOOKING_AT (s, "-define") || LOOKING_AT (s, "-record"))
5198     {
5199       if (s[pos++] == '(')
5200         {
5201           pos = skip_spaces (s + pos) - s;
5202           len = erlang_atom (s, pos);
5203           if (len != 0)
5204             pfnote (savenstr (& s[pos], len), TRUE,
5205                     s, pos + len, lineno, linecharno);
5206         }
5207     }
5208   return;
5209 }
5210
5211
5212 /*
5213  * Consume an Erlang atom (or variable).
5214  * Return the number of bytes consumed, or -1 if there was an error.
5215  */
5216 static int
5217 erlang_atom (s, pos)
5218      char *s;
5219      int pos;
5220 {
5221   int origpos;
5222
5223   origpos = pos;
5224
5225   if (ISALPHA (s[pos]) || s[pos] == '_')
5226     {
5227       /* The atom is unquoted. */
5228       pos++;
5229       while (ISALNUM (s[pos]) || s[pos] == '_')
5230         pos++;
5231       return pos - origpos;
5232     }
5233   else if (s[pos] == '\'')
5234     {
5235       pos++;
5236
5237       for (;;)
5238         {
5239           if (s[pos] == '\'')
5240             {
5241               pos++;
5242               break;
5243             }
5244           else if (s[pos] == '\0')
5245             /* Multiline quoted atoms are ignored. */
5246             return -1;
5247           else if (s[pos] == '\\')
5248             {
5249               if (s[pos+1] == '\0')
5250                 return -1;
5251               pos += 2;
5252             }
5253           else
5254             pos++;
5255         }
5256       return pos - origpos;
5257     }
5258   else
5259     return -1;
5260 }
5261
5262 \f
5263 #ifdef ETAGS_REGEXPS
5264
5265 static char *scan_separators __P((char *));
5266 static void analyse_regex __P((char *, bool));
5267 static void add_regex __P((char *, bool, language *));
5268 static char *substitute __P((char *, char *, struct re_registers *));
5269
5270 /* Take a string like "/blah/" and turn it into "blah", making sure
5271    that the first and last characters are the same, and handling
5272    quoted separator characters.  Actually, stops on the occurrence of
5273    an unquoted separator.  Also turns "\t" into a Tab character, and
5274    similarly for all character escape sequences supported by Gcc.
5275    Returns pointer to terminating separator.  Works in place.  Null
5276    terminates name string. */
5277 static char *
5278 scan_separators (name)
5279      char *name;
5280 {
5281   char sep = name[0];
5282   char *copyto = name;
5283   bool quoted = FALSE;
5284
5285   for (++name; *name != '\0'; ++name)
5286     {
5287       if (quoted)
5288         {
5289           switch (*name)
5290             {
5291             case 'a': *copyto++ = '\007'; break;
5292             case 'b': *copyto++ = '\b'; break;
5293             case 'd': *copyto++ = 0177; break;
5294             case 'e': *copyto++ = 033; break;
5295             case 'f': *copyto++ = '\f'; break;
5296             case 'n': *copyto++ = '\n'; break;
5297             case 'r': *copyto++ = '\r'; break;
5298             case 't': *copyto++ = '\t'; break;
5299             case 'v': *copyto++ = '\v'; break;
5300             default:
5301               if (*name == sep)
5302                 *copyto++ = sep;
5303               else
5304                 {
5305                   /* Something else is quoted, so preserve the quote. */
5306                   *copyto++ = '\\';
5307                   *copyto++ = *name;
5308                 }
5309               break;
5310             }
5311           quoted = FALSE;
5312         }
5313       else if (*name == '\\')
5314         quoted = TRUE;
5315       else if (*name == sep)
5316         break;
5317       else
5318         *copyto++ = *name;
5319     }
5320
5321   /* Terminate copied string. */
5322   *copyto = '\0';
5323   return name;
5324 }
5325
5326 /* Look at the argument of --regex or --no-regex and do the right
5327    thing.  Same for each line of a regexp file. */
5328 static void
5329 analyse_regex (regex_arg, ignore_case)
5330      char *regex_arg;
5331      bool ignore_case;
5332 {
5333   if (regex_arg == NULL)
5334     {
5335       free_patterns ();         /* --no-regex: remove existing regexps */
5336       return;
5337     }
5338
5339   /* A real --regexp option or a line in a regexp file. */
5340   switch (regex_arg[0])
5341     {
5342       /* Comments in regexp file or null arg to --regex. */
5343     case '\0':
5344     case ' ':
5345     case '\t':
5346       break;
5347
5348       /* Read a regex file.  This is recursive and may result in a
5349          loop, which will stop when the file descriptors are exhausted. */
5350     case '@':
5351       {
5352         FILE *regexfp;
5353         linebuffer regexbuf;
5354         char *regexfile = regex_arg + 1;
5355
5356         /* regexfile is a file containing regexps, one per line. */
5357         regexfp = fopen (regexfile, "r");
5358         if (regexfp == NULL)
5359           {
5360             pfatal (regexfile);
5361             return;
5362           }
5363         initbuffer (&regexbuf);
5364         while (readline_internal (&regexbuf, regexfp) > 0)
5365           analyse_regex (regexbuf.buffer, ignore_case);
5366         free (regexbuf.buffer);
5367         fclose (regexfp);
5368       }
5369       break;
5370
5371       /* Regexp to be used for a specific language only. */
5372     case '{':
5373       {
5374         language *lang;
5375         char *lang_name = regex_arg + 1;
5376         char *cp;
5377
5378         for (cp = lang_name; *cp != '}'; cp++)
5379           if (*cp == '\0')
5380             {
5381               error ("unterminated language name in regex: %s", regex_arg);
5382               return;
5383             }
5384         *cp = '\0';
5385         lang = get_language_from_langname (lang_name);
5386         if (lang == NULL)
5387           return;
5388         add_regex (cp + 1, ignore_case, lang);
5389       }
5390       break;
5391
5392       /* Regexp to be used for any language. */
5393     default:
5394       add_regex (regex_arg, ignore_case, NULL);
5395       break;
5396     }
5397 }
5398
5399 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5400    expression, into a real regular expression by compiling it. */
5401 static void
5402 add_regex (regexp_pattern, ignore_case, lang)
5403      char *regexp_pattern;
5404      bool ignore_case;
5405      language *lang;
5406 {
5407   static struct re_pattern_buffer zeropattern;
5408   char *name;
5409   const char *err;
5410   struct re_pattern_buffer *patbuf;
5411   pattern *pp;
5412
5413
5414   if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5415     {
5416       error ("%s: unterminated regexp", regexp_pattern);
5417       return;
5418     }
5419   name = scan_separators (regexp_pattern);
5420   if (regexp_pattern[0] == '\0')
5421     {
5422       error ("null regexp", (char *)NULL);
5423       return;
5424     }
5425   (void) scan_separators (name);
5426
5427   patbuf = xnew (1, struct re_pattern_buffer);
5428   *patbuf = zeropattern;
5429   if (ignore_case)
5430     patbuf->translate = lc_trans;       /* translation table to fold case  */
5431
5432   err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5433   if (err != NULL)
5434     {
5435       error ("%s while compiling pattern", err);
5436       return;
5437     }
5438
5439   pp = p_head;
5440   p_head = xnew (1, pattern);
5441   p_head->regex = savestr (regexp_pattern);
5442   p_head->p_next = pp;
5443   p_head->lang = lang;
5444   p_head->pat = patbuf;
5445   p_head->name_pattern = savestr (name);
5446   p_head->error_signaled = FALSE;
5447   p_head->ignore_case = ignore_case;
5448 }
5449
5450 /*
5451  * Do the substitutions indicated by the regular expression and
5452  * arguments.
5453  */
5454 static char *
5455 substitute (in, out, regs)
5456      char *in, *out;
5457      struct re_registers *regs;
5458 {
5459   char *result, *t;
5460   int size, dig, diglen;
5461
5462   result = NULL;
5463   size = strlen (out);
5464
5465   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5466   if (out[size - 1] == '\\')
5467     fatal ("pattern error in \"%s\"", out);
5468   for (t = etags_strchr (out, '\\');
5469        t != NULL;
5470        t = etags_strchr (t + 2, '\\'))
5471     if (ISDIGIT (t[1]))
5472       {
5473         dig = t[1] - '0';
5474         diglen = regs->end[dig] - regs->start[dig];
5475         size += diglen - 2;
5476       }
5477     else
5478       size -= 1;
5479
5480   /* Allocate space and do the substitutions. */
5481   result = xnew (size + 1, char);
5482
5483   for (t = result; *out != '\0'; out++)
5484     if (*out == '\\' && ISDIGIT (*++out))
5485       {
5486         dig = *out - '0';
5487         diglen = regs->end[dig] - regs->start[dig];
5488         strncpy (t, in + regs->start[dig], diglen);
5489         t += diglen;
5490       }
5491     else
5492       *t++ = *out;
5493   *t = '\0';
5494
5495   assert (t <= result + size && t - result == (int)strlen (result));
5496
5497   return result;
5498 }
5499
5500 /* Deallocate all patterns. */
5501 static void
5502 free_patterns ()
5503 {
5504   pattern *pp;
5505   while (p_head != NULL)
5506     {
5507       pp = p_head->p_next;
5508       free (p_head->regex);
5509       free (p_head->name_pattern);
5510       free (p_head);
5511       p_head = pp;
5512     }
5513   return;
5514 }
5515 #endif /* ETAGS_REGEXPS */
5516
5517 \f
5518 static bool
5519 nocase_tail (cp)
5520      char *cp;
5521 {
5522   register int len = 0;
5523
5524   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5525     cp++, len++;
5526   if (*cp == '\0' && !intoken (dbp[len]))
5527     {
5528       dbp += len;
5529       return TRUE;
5530     }
5531   return FALSE;
5532 }
5533
5534 static char *
5535 get_tag (bp)
5536      register char *bp;
5537 {
5538   register char *cp, *name;
5539
5540   if (*bp == '\0')
5541     return NULL;
5542   /* Go till you get to white space or a syntactic break */
5543   for (cp = bp + 1; !notinname (*cp); cp++)
5544     continue;
5545   name = savenstr (bp, cp-bp);
5546   pfnote (name, TRUE,
5547           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5548   return name;
5549 }
5550
5551 /* Initialize a linebuffer for use */
5552 static void
5553 initbuffer (lbp)
5554      linebuffer *lbp;
5555 {
5556   lbp->size = (DEBUG) ? 3 : 200;
5557   lbp->buffer = xnew (lbp->size, char);
5558   lbp->buffer[0] = '\0';
5559   lbp->len = 0;
5560 }
5561
5562 /*
5563  * Read a line of text from `stream' into `lbp', excluding the
5564  * newline or CR-NL, if any.  Return the number of characters read from
5565  * `stream', which is the length of the line including the newline.
5566  *
5567  * On DOS or Windows we do not count the CR character, if any, before the
5568  * NL, in the returned length; this mirrors the behavior of emacs on those
5569  * platforms (for text files, it translates CR-NL to NL as it reads in the
5570  * file).
5571  */
5572 static long
5573 readline_internal (lbp, stream)
5574      linebuffer *lbp;
5575      register FILE *stream;
5576 {
5577   char *buffer = lbp->buffer;
5578   register char *p = lbp->buffer;
5579   register char *pend;
5580   int chars_deleted;
5581
5582   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5583
5584   for (;;)
5585     {
5586       register int c = getc (stream);
5587       if (p == pend)
5588         {
5589           /* We're at the end of linebuffer: expand it. */
5590           lbp->size *= 2;
5591           xrnew (buffer, lbp->size, char);
5592           p += buffer - lbp->buffer;
5593           pend = buffer + lbp->size;
5594           lbp->buffer = buffer;
5595         }
5596       if (c == EOF)
5597         {
5598           *p = '\0';
5599           chars_deleted = 0;
5600           break;
5601         }
5602       if (c == '\n')
5603         {
5604           if (p > buffer && p[-1] == '\r')
5605             {
5606               p -= 1;
5607 #ifdef DOS_NT
5608              /* Assume CRLF->LF translation will be performed by Emacs
5609                 when loading this file, so CRs won't appear in the buffer.
5610                 It would be cleaner to compensate within Emacs;
5611                 however, Emacs does not know how many CRs were deleted
5612                 before any given point in the file.  */
5613               chars_deleted = 1;
5614 #else
5615               chars_deleted = 2;
5616 #endif
5617             }
5618           else
5619             {
5620               chars_deleted = 1;
5621             }
5622           *p = '\0';
5623           break;
5624         }
5625       *p++ = c;
5626     }
5627   lbp->len = p - buffer;
5628
5629   return lbp->len + chars_deleted;
5630 }
5631
5632 /*
5633  * Like readline_internal, above, but in addition try to match the
5634  * input line against relevant regular expressions.
5635  */
5636 static void
5637 readline (lbp, stream)
5638      linebuffer *lbp;
5639      FILE *stream;
5640 {
5641   long result;
5642
5643   linecharno = charno;          /* update global char number of line start */
5644   result = readline_internal (lbp, stream); /* read line */
5645   lineno += 1;                  /* increment global line number */
5646   charno += result;             /* increment global char number */
5647
5648   /* Honour #line directives. */
5649   if (!no_line_directive)
5650     {
5651       static bool discard_until_line_directive;
5652
5653       /* Check whether this is a #line directive. */
5654       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
5655         {
5656           int start, lno;
5657
5658           if (DEBUG) start = 0; /* shut up the compiler */
5659           if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
5660             {
5661               char *endp = lbp->buffer + start;
5662
5663               assert (start > 0);
5664               while ((endp = etags_strchr (endp, '"')) != NULL
5665                      && endp[-1] == '\\')
5666                 endp++;
5667               if (endp != NULL)
5668                 /* Ok, this is a real #line directive.  Let's deal with it. */
5669                 {
5670                   char *taggedabsname;  /* absolute name of original file */
5671                   char *taggedfname;    /* name of original file as given */
5672                   char *name;           /* temp var */
5673
5674                   discard_until_line_directive = FALSE; /* found it */
5675                   name = lbp->buffer + start;
5676                   *endp = '\0';
5677                   canonicalize_filename (name); /* for DOS */
5678                   taggedabsname = absolute_filename (name, curfdp->infabsdir);
5679                   if (filename_is_absolute (name)
5680                       || filename_is_absolute (curfdp->infname))
5681                     taggedfname = savestr (taggedabsname);
5682                   else
5683                     taggedfname = relative_filename (taggedabsname,tagfiledir);
5684
5685                   if (streq (curfdp->taggedfname, taggedfname))
5686                     /* The #line directive is only a line number change.  We
5687                        deal with this afterwards. */
5688                     free (taggedfname);
5689                   else
5690                     /* The tags following this #line directive should be
5691                        attributed to taggedfname.  In order to do this, set
5692                        curfdp accordingly. */
5693                     {
5694                       fdesc *fdp; /* file description pointer */
5695
5696                       /* Go look for a file description already set up for the
5697                          file indicated in the #line directive.  If there is
5698                          one, use it from now until the next #line
5699                          directive. */
5700                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5701                         if (streq (fdp->infname, curfdp->infname)
5702                             && streq (fdp->taggedfname, taggedfname))
5703                           /* If we remove the second test above (after the &&)
5704                              then all entries pertaining to the same file are
5705                              coalesced in the tags file.  If we use it, then
5706                              entries pertaining to the same file but generated
5707                              from different files (via #line directives) will
5708                              go into separate sections in the tags file.  These
5709                              alternatives look equivalent.  The first one
5710                              destroys some apparently useless information. */
5711                           {
5712                             curfdp = fdp;
5713                             free (taggedfname);
5714                             break;
5715                           }
5716                       /* Else, if we already tagged the real file, skip all
5717                          input lines until the next #line directive. */
5718                       if (fdp == NULL) /* not found */
5719                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5720                           if (streq (fdp->infabsname, taggedabsname))
5721                             {
5722                               discard_until_line_directive = TRUE;
5723                               free (taggedfname);
5724                               break;
5725                             }
5726                       /* Else create a new file description and use that from
5727                          now on, until the next #line directive. */
5728                       if (fdp == NULL) /* not found */
5729                         {
5730                           fdp = fdhead;
5731                           fdhead = xnew (1, fdesc);
5732                           *fdhead = *curfdp; /* copy curr. file description */
5733                           fdhead->next = fdp;
5734                           fdhead->infname = savestr (curfdp->infname);
5735                           fdhead->infabsname = savestr (curfdp->infabsname);
5736                           fdhead->infabsdir = savestr (curfdp->infabsdir);
5737                           fdhead->taggedfname = taggedfname;
5738                           fdhead->usecharno = FALSE;
5739                           curfdp = fdhead;
5740                         }
5741                     }
5742                   free (taggedabsname);
5743                   lineno = lno - 1;
5744                   readline (lbp, stream);
5745                   return;
5746                 } /* if a real #line directive */
5747             } /* if #line is followed by a a number */
5748         } /* if line begins with "#line " */
5749
5750       /* If we are here, no #line directive was found. */
5751       if (discard_until_line_directive)
5752         {
5753           if (result > 0)
5754             {
5755             /* Do a tail recursion on ourselves, thus discarding the contents
5756                of the line buffer. */
5757               readline (lbp, stream);
5758               return;
5759             }
5760           /* End of file. */
5761           discard_until_line_directive = FALSE;
5762           return;
5763         }
5764     } /* if #line directives should be considered */
5765
5766 #ifdef ETAGS_REGEXPS
5767   {
5768     int match;
5769     pattern *pp;
5770
5771     /* Match against relevant patterns. */
5772     if (lbp->len > 0)
5773       for (pp = p_head; pp != NULL; pp = pp->p_next)
5774         {
5775           /* Only use generic regexps or those for the current language. */
5776           if (pp->lang != NULL && pp->lang != fdhead->lang)
5777             continue;
5778
5779           match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
5780           switch (match)
5781             {
5782             case -2:
5783               /* Some error. */
5784               if (!pp->error_signaled)
5785                 {
5786                   error ("error while matching \"%s\"", pp->regex);
5787                   pp->error_signaled = TRUE;
5788                 }
5789               break;
5790             case -1:
5791               /* No match. */
5792               break;
5793             default:
5794               /* Match occurred.  Construct a tag. */
5795               if (pp->name_pattern[0] != '\0')
5796                 {
5797                   /* Make a named tag. */
5798                   char *name = substitute (lbp->buffer,
5799                                            pp->name_pattern, &pp->regs);
5800                   if (name != NULL)
5801                     pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5802                 }
5803               else
5804                 {
5805                   /* Make an unnamed tag. */
5806                   pfnote ((char *)NULL, TRUE,
5807                           lbp->buffer, match, lineno, linecharno);
5808                 }
5809               break;
5810             }
5811         }
5812   }
5813 #endif /* ETAGS_REGEXPS */
5814 }
5815
5816 \f
5817 /*
5818  * Return a pointer to a space of size strlen(cp)+1 allocated
5819  * with xnew where the string CP has been copied.
5820  */
5821 static char *
5822 savestr (cp)
5823      char *cp;
5824 {
5825   return savenstr (cp, strlen (cp));
5826 }
5827
5828 /*
5829  * Return a pointer to a space of size LEN+1 allocated with xnew where
5830  * the string CP has been copied for at most the first LEN characters.
5831  */
5832 static char *
5833 savenstr (cp, len)
5834      char *cp;
5835      int len;
5836 {
5837   register char *dp;
5838
5839   dp = xnew (len + 1, char);
5840   strncpy (dp, cp, len);
5841   dp[len] = '\0';
5842   return dp;
5843 }
5844
5845 /*
5846  * Return the ptr in sp at which the character c last
5847  * appears; NULL if not found
5848  *
5849  * Identical to POSIX strrchr, included for portability.
5850  */
5851 static char *
5852 etags_strrchr (sp, c)
5853      register const char *sp;
5854      register int c;
5855 {
5856   register const char *r;
5857
5858   r = NULL;
5859   do
5860     {
5861       if (*sp == c)
5862         r = sp;
5863   } while (*sp++);
5864   return (char *)r;
5865 }
5866
5867 /*
5868  * Return the ptr in sp at which the character c first
5869  * appears; NULL if not found
5870  *
5871  * Identical to POSIX strchr, included for portability.
5872  */
5873 static char *
5874 etags_strchr (sp, c)
5875      register const char *sp;
5876      register int c;
5877 {
5878   do
5879     {
5880       if (*sp == c)
5881         return (char *)sp;
5882     } while (*sp++);
5883   return NULL;
5884 }
5885
5886 /*
5887  * Return TRUE if the two strings are equal, ignoring case for alphabetic
5888  * characters.
5889  *
5890  * Analogous to BSD's strcasecmp, included for portability.
5891  */
5892 static bool
5893 strcaseeq (s1, s2)
5894      register const char *s1;
5895      register const char *s2;
5896 {
5897   while (*s1 != '\0'
5898          && (ISALPHA (*s1) && ISALPHA (*s2)
5899              ? lowcase (*s1) == lowcase (*s2)
5900              : *s1 == *s2))
5901     s1++, s2++;
5902
5903   return (*s1 == *s2);
5904 }
5905
5906 /* Skip spaces, return new pointer. */
5907 static char *
5908 skip_spaces (cp)
5909      char *cp;
5910 {
5911   while (iswhite (*cp))
5912     cp++;
5913   return cp;
5914 }
5915
5916 /* Skip non spaces, return new pointer. */
5917 static char *
5918 skip_non_spaces (cp)
5919      char *cp;
5920 {
5921   while (*cp != '\0' && !iswhite (*cp))
5922     cp++;
5923   return cp;
5924 }
5925
5926 /* Print error message and exit.  */
5927 void
5928 fatal (s1, s2)
5929      char *s1, *s2;
5930 {
5931   error (s1, s2);
5932   exit (BAD);
5933 }
5934
5935 static void
5936 pfatal (s1)
5937      char *s1;
5938 {
5939   perror (s1);
5940   exit (BAD);
5941 }
5942
5943 static void
5944 suggest_asking_for_help ()
5945 {
5946   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5947            progname,
5948 #ifdef LONG_OPTIONS
5949            "--help"
5950 #else
5951            "-h"
5952 #endif
5953            );
5954   exit (BAD);
5955 }
5956
5957 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
5958 static void
5959 error (s1, s2)
5960      const char *s1, *s2;
5961 {
5962   fprintf (stderr, "%s: ", progname);
5963   fprintf (stderr, s1, s2);
5964   fprintf (stderr, "\n");
5965 }
5966
5967 /* Return a newly-allocated string whose contents
5968    concatenate those of s1, s2, s3.  */
5969 static char *
5970 concat (s1, s2, s3)
5971      char *s1, *s2, *s3;
5972 {
5973   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5974   char *result = xnew (len1 + len2 + len3 + 1, char);
5975
5976   strcpy (result, s1);
5977   strcpy (result + len1, s2);
5978   strcpy (result + len1 + len2, s3);
5979   result[len1 + len2 + len3] = '\0';
5980
5981   return result;
5982 }
5983
5984 \f
5985 /* Does the same work as the system V getcwd, but does not need to
5986    guess the buffer size in advance. */
5987 static char *
5988 etags_getcwd ()
5989 {
5990 #ifdef HAVE_GETCWD
5991   int bufsize = 200;
5992   char *path = xnew (bufsize, char);
5993
5994   while (getcwd (path, bufsize) == NULL)
5995     {
5996       if (errno != ERANGE)
5997         pfatal ("getcwd");
5998       bufsize *= 2;
5999       free (path);
6000       path = xnew (bufsize, char);
6001     }
6002
6003   canonicalize_filename (path);
6004   return path;
6005
6006 #else /* not HAVE_GETCWD */
6007 #if MSDOS
6008
6009   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6010
6011   getwd (path);
6012
6013   for (p = path; *p != '\0'; p++)
6014     if (*p == '\\')
6015       *p = '/';
6016     else
6017       *p = lowcase (*p);
6018
6019   return strdup (path);
6020 #else /* not MSDOS */
6021   linebuffer path;
6022   FILE *pipe;
6023
6024   initbuffer (&path);
6025   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6026   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6027     pfatal ("pwd");
6028   pclose (pipe);
6029
6030   return path.buffer;
6031 #endif /* not MSDOS */
6032 #endif /* not HAVE_GETCWD */
6033 }
6034
6035 /* Return a newly allocated string containing the file name of FILE
6036    relative to the absolute directory DIR (which should end with a slash). */
6037 static char *
6038 relative_filename (file, dir)
6039      char *file, *dir;
6040 {
6041   char *fp, *dp, *afn, *res;
6042   int i;
6043
6044   /* Find the common root of file and dir (with a trailing slash). */
6045   afn = absolute_filename (file, cwd);
6046   fp = afn;
6047   dp = dir;
6048   while (*fp++ == *dp++)
6049     continue;
6050   fp--, dp--;                   /* back to the first differing char */
6051 #ifdef DOS_NT
6052   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6053     return afn;
6054 #endif
6055   do                            /* look at the equal chars until '/' */
6056     fp--, dp--;
6057   while (*fp != '/');
6058
6059   /* Build a sequence of "../" strings for the resulting relative file name. */
6060   i = 0;
6061   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6062     i += 1;
6063   res = xnew (3*i + strlen (fp + 1) + 1, char);
6064   res[0] = '\0';
6065   while (i-- > 0)
6066     strcat (res, "../");
6067
6068   /* Add the file name relative to the common root of file and dir. */
6069   strcat (res, fp + 1);
6070   free (afn);
6071
6072   return res;
6073 }
6074
6075 /* Return a newly allocated string containing the absolute file name
6076    of FILE given DIR (which should end with a slash). */
6077 static char *
6078 absolute_filename (file, dir)
6079      char *file, *dir;
6080 {
6081   char *slashp, *cp, *res;
6082
6083   if (filename_is_absolute (file))
6084     res = savestr (file);
6085 #ifdef DOS_NT
6086   /* We don't support non-absolute file names with a drive
6087      letter, like `d:NAME' (it's too much hassle).  */
6088   else if (file[1] == ':')
6089     fatal ("%s: relative file names with drive letters not supported", file);
6090 #endif
6091   else
6092     res = concat (dir, file, "");
6093
6094   /* Delete the "/dirname/.." and "/." substrings. */
6095   slashp = etags_strchr (res, '/');
6096   while (slashp != NULL && slashp[0] != '\0')
6097     {
6098       if (slashp[1] == '.')
6099         {
6100           if (slashp[2] == '.'
6101               && (slashp[3] == '/' || slashp[3] == '\0'))
6102             {
6103               cp = slashp;
6104               do
6105                 cp--;
6106               while (cp >= res && !filename_is_absolute (cp));
6107               if (cp < res)
6108                 cp = slashp;    /* the absolute name begins with "/.." */
6109 #ifdef DOS_NT
6110               /* Under MSDOS and NT we get `d:/NAME' as absolute
6111                  file name, so the luser could say `d:/../NAME'.
6112                  We silently treat this as `d:/NAME'.  */
6113               else if (cp[0] != '/')
6114                 cp = slashp;
6115 #endif
6116               strcpy (cp, slashp + 3);
6117               slashp = cp;
6118               continue;
6119             }
6120           else if (slashp[2] == '/' || slashp[2] == '\0')
6121             {
6122               strcpy (slashp, slashp + 2);
6123               continue;
6124             }
6125         }
6126
6127       slashp = etags_strchr (slashp + 1, '/');
6128     }
6129
6130   if (res[0] == '\0')
6131     return savestr ("/");
6132   else
6133     return res;
6134 }
6135
6136 /* Return a newly allocated string containing the absolute
6137    file name of dir where FILE resides given DIR (which should
6138    end with a slash). */
6139 static char *
6140 absolute_dirname (file, dir)
6141      char *file, *dir;
6142 {
6143   char *slashp, *res;
6144   char save;
6145
6146   canonicalize_filename (file);
6147   slashp = etags_strrchr (file, '/');
6148   if (slashp == NULL)
6149     return savestr (dir);
6150   save = slashp[1];
6151   slashp[1] = '\0';
6152   res = absolute_filename (file, dir);
6153   slashp[1] = save;
6154
6155   return res;
6156 }
6157
6158 /* Whether the argument string is an absolute file name.  The argument
6159    string must have been canonicalized with canonicalize_filename. */
6160 static bool
6161 filename_is_absolute (fn)
6162      char *fn;
6163 {
6164   return (fn[0] == '/'
6165 #ifdef DOS_NT
6166           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6167 #endif
6168           );
6169 }
6170
6171 /* Translate backslashes into slashes.  Works in place. */
6172 static void
6173 canonicalize_filename (fn)
6174      register char *fn;
6175 {
6176 #ifdef DOS_NT
6177   /* Canonicalize drive letter case.  */
6178   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6179     fn[0] = upcase (fn[0]);
6180   /* Convert backslashes to slashes.  */
6181   for (; *fn != '\0'; fn++)
6182     if (*fn == '\\')
6183       *fn = '/';
6184 #else
6185   /* No action. */
6186   fn = NULL;                    /* shut up the compiler */
6187 #endif
6188 }
6189
6190 /* Set the minimum size of a string contained in a linebuffer. */
6191 static void
6192 linebuffer_setlen (lbp, toksize)
6193      linebuffer *lbp;
6194      int toksize;
6195 {
6196   while (lbp->size <= toksize)
6197     {
6198       lbp->size *= 2;
6199       xrnew (lbp->buffer, lbp->size, char);
6200     }
6201   lbp->len = toksize;
6202 }
6203
6204 /* Like malloc but get fatal error if memory is exhausted.  */
6205 static PTR
6206 xmalloc (size)
6207      unsigned int size;
6208 {
6209   PTR result = (PTR) malloc (size);
6210   if (result == NULL)
6211     fatal ("virtual memory exhausted", (char *)NULL);
6212   return result;
6213 }
6214
6215 static PTR
6216 xrealloc (ptr, size)
6217      char *ptr;
6218      unsigned int size;
6219 {
6220   PTR result = (PTR) realloc (ptr, size);
6221   if (result == NULL)
6222     fatal ("virtual memory exhausted", (char *)NULL);
6223   return result;
6224 }
6225
6226 /*
6227  * Local Variables:
6228  * c-indentation-style: gnu
6229  * indent-tabs-mode: t
6230  * tab-width: 8
6231  * fill-column: 79
6232  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node")
6233  * End:
6234  */