lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
   3    Free Software Foundation, Inc. and Ken Arnold
   4
   5 This file is not considered part of GNU Emacs.
   6
   7 This program is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 This program is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with this program; if not, write to the Free Software Foundation,
  19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  20
  21 /*
  22  * Authors:
  23  *      Ctags originally by Ken Arnold.
  24  *      Fortran added by Jim Kleckner.
  25  *      Ed Pelegri-Llopart added C typedefs.
  26  *      Gnu Emacs TAGS format and modifications by RMS?
  27  * 1989 Sam Kendall added C++.
  28  * 1992 Joseph B. Wells improved C and C++ parsing.
  29  * 1993 Francesco Potortì reorganised C and C++.
  30  * 1994 Regexp tags by Tom Tromey.
  31  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  32  * 2002 #line directives by Francesco Potortì.
  33  *
  34  *      Francesco Potortì <pot@gnu.org> has maintained it since 1993.
  35  */
  36
  37 char pot_etags_version[] = "@(#) pot revision number is 16.4";
  38
  39 #define TRUE    1
  40 #define FALSE   0
  41
  42 #ifdef DEBUG
  43 #  undef DEBUG
  44 #  define DEBUG TRUE
  45 #else
  46 #  define DEBUG  FALSE
  47 #  define NDEBUG                /* disable assert */
  48 #endif
  49
  50 #ifdef HAVE_CONFIG_H
  51 # include <config.h>
  52   /* On some systems, Emacs defines static as nothing for the sake
  53      of unexec.  We don't want that here since we don't use unexec. */
  54 # undef static
  55 # define ETAGS_REGEXPS          /* use the regexp features */
  56 # define LONG_OPTIONS           /* accept long options */
  57 # ifndef PTR                    /* for Xemacs */
  58 #   define PTR void *
  59 # endif
  60 # ifndef __P                    /* for Xemacs */
  61 #   define __P(args) args
  62 # endif
  63 #else
  64 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  65 #   define __P(args) args       /* use prototypes */
  66 #   define PTR void *           /* for generic pointers */
  67 # else
  68 #   define __P(args) ()         /* no prototypes */
  69 #   define const                /* remove const for old compilers' sake */
  70 #   define PTR long *           /* don't use void* */
  71 # endif
  72 #endif /* !HAVE_CONFIG_H */
  73
  74 #ifndef _GNU_SOURCE
  75 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  76 #endif
  77
  78 /* WIN32_NATIVE is for Xemacs.
  79    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  80 #ifdef WIN32_NATIVE
  81 # undef MSDOS
  82 # undef  WINDOWSNT
  83 # define WINDOWSNT
  84 #endif /* WIN32_NATIVE */
  85
  86 #ifdef MSDOS
  87 # undef MSDOS
  88 # define MSDOS TRUE
  89 # include <fcntl.h>
  90 # include <sys/param.h>
  91 # include <io.h>
  92 # ifndef HAVE_CONFIG_H
  93 #   define DOS_NT
  94 #   include <sys/config.h>
  95 # endif
  96 #else
  97 # define MSDOS FALSE
  98 #endif /* MSDOS */
  99
 100 #ifdef WINDOWSNT
 101 # include <stdlib.h>
 102 # include <fcntl.h>
 103 # include <string.h>
 104 # include <direct.h>
 105 # include <io.h>
 106 # define MAXPATHLEN _MAX_PATH
 107 # undef HAVE_NTGUI
 108 # undef  DOS_NT
 109 # define DOS_NT
 110 # ifndef HAVE_GETCWD
 111 #   define HAVE_GETCWD
 112 # endif /* undef HAVE_GETCWD */
 113 #else /* !WINDOWSNT */
 114 # ifdef STDC_HEADERS
 115 #  include <stdlib.h>
 116 #  include <string.h>
 117 # else
 118     extern char *getenv ();
 119 # endif
 120 #endif /* !WINDOWSNT */
 121
 122 #ifdef HAVE_UNISTD_H
 123 # include <unistd.h>
 124 #else
 125 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 126     extern char *getcwd (char *buf, size_t size);
 127 # endif
 128 #endif /* HAVE_UNISTD_H */
 129
 130 #include <stdio.h>
 131 #include <ctype.h>
 132 #include <errno.h>
 133 #ifndef errno
 134   extern int errno;
 135 #endif
 136 #include <sys/types.h>
 137 #include <sys/stat.h>
 138
 139 #include <assert.h>
 140 #ifdef NDEBUG
 141 # undef  assert                 /* some systems have a buggy assert.h */
 142 # define assert(x) ((void) 0)
 143 #endif
 144
 145 #if !defined (S_ISREG) && defined (S_IFREG)
 146 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 147 #endif
 148
 149 #ifdef LONG_OPTIONS
 150 # include <getopt.h>
 151 #else
 152 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 153   extern char *optarg;
 154   extern int optind, opterr;
 155 #endif /* LONG_OPTIONS */
 156
 157 #ifdef ETAGS_REGEXPS
 158 # ifndef HAVE_CONFIG_H          /* this is a standalone compilation */
 159 #   ifdef __CYGWIN__            /* compiling on Cygwin */
 160                              !!! NOTICE !!!
 161  the regex.h distributed with Cygwin is not compatible with etags, alas!
 162 If you want regular expression support, you should delete this notice and
 163               arrange to use the GNU regex.h and regex.c.
 164 #   endif
 165 # endif
 166 # include <regex.h>
 167 #endif /* ETAGS_REGEXPS */
 168
 169 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 170  Leave it undefined to make the program "etags", which makes emacs-style
 171  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 172 #ifdef CTAGS
 173 # undef  CTAGS
 174 # define CTAGS TRUE
 175 #else
 176 # define CTAGS FALSE
 177 #endif
 178
 179 /* Exit codes for success and failure.  */
 180 #ifdef VMS
 181 # define        GOOD    1
 182 # define        BAD     0
 183 #else
 184 # define        GOOD    0
 185 # define        BAD     1
 186 #endif
 187
 188 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 189 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 190
 191 #define CHARS 256               /* 2^sizeof(char) */
 192 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 193 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white */
 194 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name */
 195 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token */
 196 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token */
 197 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens */
 198
 199 #define ISALNUM(c)      isalnum (CHAR(c))
 200 #define ISALPHA(c)      isalpha (CHAR(c))
 201 #define ISDIGIT(c)      isdigit (CHAR(c))
 202 #define ISLOWER(c)      islower (CHAR(c))
 203
 204 #define lowcase(c)      tolower (CHAR(c))
 205 #define upcase(c)       toupper (CHAR(c))
 206
 207
 208 /*
 209  *      xnew, xrnew -- allocate, reallocate storage
 210  *
 211  * SYNOPSIS:    Type *xnew (int n, Type);
 212  *              void xrnew (OldPointer, int n, Type);
 213  */
 214 #if DEBUG
 215 # include "chkmalloc.h"
 216 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 217                                                   (n) * sizeof (Type)))
 218 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 219                                         (char *) (op), (n) * sizeof (Type)))
 220 #else
 221 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 222 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 223                                         (char *) (op), (n) * sizeof (Type)))
 224 #endif
 225
 226 #define bool int
 227
 228 typedef void Lang_function __P((FILE *));
 229
 230 typedef struct
 231 {
 232   char *suffix;                 /* file name suffix for this compressor */
 233   char *command;                /* takes one arg and decompresses to stdout */
 234 } compressor;
 235
 236 typedef struct
 237 {
 238   char *name;                   /* language name */
 239   bool metasource;              /* source used to generate other sources */
 240   Lang_function *function;      /* parse function */
 241   char **filenames;             /* names of this language's files */
 242   char **suffixes;              /* name suffixes of this language's files */
 243   char **interpreters;          /* interpreters for this language */
 244 } language;
 245
 246 typedef struct fdesc
 247 {
 248   struct fdesc *next;           /* for the linked list */
 249   char *infname;                /* uncompressed input file name */
 250   char *infabsname;             /* absolute uncompressed input file name */
 251   char *infabsdir;              /* absolute dir of input file */
 252   char *taggedfname;            /* file name to write in tagfile */
 253   language *lang;               /* language of file */
 254   char *prop;                   /* file properties to write in tagfile */
 255   bool usecharno;               /* etags tags shall contain char number */
 256 } fdesc;
 257
 258 typedef struct node_st
 259 {                               /* sorting structure */
 260   struct node_st *left, *right; /* left and right sons */
 261   fdesc *fdp;                   /* description of file to whom tag belongs */
 262   char *name;                   /* tag name */
 263   char *pat;                    /* search pattern */
 264   bool valid;                   /* write this tag on the tag file */
 265   bool is_func;                 /* function tag: use pattern in CTAGS mode */
 266   bool been_warned;             /* warning already given for duplicated tag */
 267   int lno;                      /* line number tag is on */
 268   long cno;                     /* character number line starts on */
 269 } node;
 270
 271 /*
 272  * A `linebuffer' is a structure which holds a line of text.
 273  * `readline_internal' reads a line from a stream into a linebuffer
 274  * and works regardless of the length of the line.
 275  * SIZE is the size of BUFFER, LEN is the length of the string in
 276  * BUFFER after readline reads it.
 277  */
 278 typedef struct
 279 {
 280   long size;
 281   int len;
 282   char *buffer;
 283 } linebuffer;
 284
 285 /* Used to support mixing of --lang and file names. */
 286 typedef struct
 287 {
 288   enum {
 289     at_language,                /* a language specification */
 290     at_regexp,                  /* a regular expression */
 291     at_icregexp,                /* same, but with case ignored */
 292     at_filename,                /* a file name */
 293     at_stdin                    /* read from stdin here */
 294   } arg_type;                   /* argument type */
 295   language *lang;               /* language associated with the argument */
 296   char *what;                   /* the argument itself */
 297 } argument;
 298
 299 #ifdef ETAGS_REGEXPS
 300 /* Structure defining a regular expression. */
 301 typedef struct pattern
 302 {
 303   struct pattern *p_next;
 304   language *lang;
 305   char *regex;
 306   struct re_pattern_buffer *pat;
 307   struct re_registers regs;
 308   char *name_pattern;
 309   bool error_signaled;
 310   bool ignore_case;
 311 } pattern;
 312 #endif /* ETAGS_REGEXPS */
 313
 314
 315 /* Many compilers barf on this:
 316         Lang_function Ada_funcs;
 317    so let's write it this way */
 318 static void Ada_funcs __P((FILE *));
 319 static void Asm_labels __P((FILE *));
 320 static void C_entries __P((int c_ext, FILE *));
 321 static void default_C_entries __P((FILE *));
 322 static void plain_C_entries __P((FILE *));
 323 static void Cjava_entries __P((FILE *));
 324 static void Cobol_paragraphs __P((FILE *));
 325 static void Cplusplus_entries __P((FILE *));
 326 static void Cstar_entries __P((FILE *));
 327 static void Erlang_functions __P((FILE *));
 328 static void Fortran_functions __P((FILE *));
 329 static void Yacc_entries __P((FILE *));
 330 static void Lisp_functions __P((FILE *));
 331 static void Makefile_targets __P((FILE *));
 332 static void Pascal_functions __P((FILE *));
 333 static void Perl_functions __P((FILE *));
 334 static void PHP_functions __P((FILE *));
 335 static void Postscript_functions __P((FILE *));
 336 static void Prolog_functions __P((FILE *));
 337 static void Python_functions __P((FILE *));
 338 static void Scheme_functions __P((FILE *));
 339 static void TeX_commands __P((FILE *));
 340 static void Texinfo_nodes __P((FILE *));
 341 static void just_read_file __P((FILE *));
 342
 343 static void print_language_names __P((void));
 344 static void print_version __P((void));
 345 static void print_help __P((void));
 346 int main __P((int, char **));
 347
 348 static compressor *get_compressor_from_suffix __P((char *, char **));
 349 static language *get_language_from_langname __P((const char *));
 350 static language *get_language_from_interpreter __P((char *));
 351 static language *get_language_from_filename __P((char *, bool));
 352 static long readline __P((linebuffer *, FILE *));
 353 static long readline_internal __P((linebuffer *, FILE *));
 354 static bool nocase_tail __P((char *));
 355 static char *get_tag __P((char *));
 356
 357 #ifdef ETAGS_REGEXPS
 358 static void analyse_regex __P((char *, bool));
 359 static void add_regex __P((char *, bool, language *));
 360 static void free_patterns __P((void));
 361 #endif /* ETAGS_REGEXPS */
 362 static void error __P((const char *, const char *));
 363 static void suggest_asking_for_help __P((void));
 364 void fatal __P((char *, char *));
 365 static void pfatal __P((char *));
 366 static void add_node __P((node *, node **));
 367
 368 static void init __P((void));
 369 static void initbuffer __P((linebuffer *));
 370 static void process_file_name __P((char *, language *));
 371 static void process_file __P((FILE *, char *, language *));
 372 static void find_entries __P((FILE *));
 373 static void free_tree __P((node *));
 374 static void free_fdesc __P((fdesc *));
 375 static void pfnote __P((char *, bool, char *, int, int, long));
 376 static void new_pfnote __P((char *, int, bool, char *, int, int, long));
 377 static void invalidate_nodes __P((fdesc *, node **));
 378 static void put_entries __P((node *));
 379
 380 static char *concat __P((char *, char *, char *));
 381 static char *skip_spaces __P((char *));
 382 static char *skip_non_spaces __P((char *));
 383 static char *savenstr __P((char *, int));
 384 static char *savestr __P((char *));
 385 static char *etags_strchr __P((const char *, int));
 386 static char *etags_strrchr __P((const char *, int));
 387 static bool strcaseeq __P((const char *, const char *));
 388 static char *etags_getcwd __P((void));
 389 static char *relative_filename __P((char *, char *));
 390 static char *absolute_filename __P((char *, char *));
 391 static char *absolute_dirname __P((char *, char *));
 392 static bool filename_is_absolute __P((char *f));
 393 static void canonicalize_filename __P((char *));
 394 static void linebuffer_setlen __P((linebuffer *, int));
 395 static PTR xmalloc __P((unsigned int));
 396 static PTR xrealloc __P((char *, unsigned int));
 397
 398 \f
 399 static char searchar = '/';     /* use /.../ searches */
 400
 401 static char *tagfile;           /* output file */
 402 static char *progname;          /* name this program was invoked with */
 403 static char *cwd;               /* current working directory */
 404 static char *tagfiledir;        /* directory of tagfile */
 405 static FILE *tagf;              /* ioptr for tags file */
 406
 407 static fdesc *fdhead;           /* head of file description list */
 408 static fdesc *curfdp;           /* current file description */
 409 static int lineno;              /* line number of current line */
 410 static long charno;             /* current character number */
 411 static long linecharno;         /* charno of start of current line */
 412 static char *dbp;               /* pointer to start of current tag */
 413
 414 static const int invalidcharno = -1;
 415
 416 static node *nodehead;          /* the head of the binary tree of tags */
 417 static node *last_node;         /* the last node created */
 418
 419 static linebuffer lb;           /* the current line */
 420
 421 /* boolean "functions" (see init)       */
 422 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 423 static char
 424   /* white chars */
 425   *white = " \f\t\n\r\v",
 426   /* not in a name */
 427   *nonam = " \f\t\n\r()=,;",
 428   /* token ending chars */
 429   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 430   /* token starting chars */
 431   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 432   /* valid in-token chars */
 433   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 434
 435 static bool append_to_tagfile;  /* -a: append to tags */
 436 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 437 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 438 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 439                                 /* 0 struct/enum/union decls, and C++ */
 440                                 /* member functions. */
 441 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 442                                 /* constants and variables. */
 443                                 /* -D: opposite of -d.  Default under ctags. */
 444 static bool globals;            /* create tags for global variables */
 445 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 446 static bool members;            /* create tags for C member variables */
 447 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 448 static bool update;             /* -u: update tags */
 449 static bool vgrind_style;       /* -v: create vgrind style index output */
 450 static bool no_warnings;        /* -w: suppress warnings */
 451 static bool cxref_style;        /* -x: create cxref style output */
 452 static bool cplusplus;          /* .[hc] means C++, not C */
 453 static bool noindentypedefs;    /* -I: ignore indentation in C */
 454 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 455
 456 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 457 static bool parsing_stdin;      /* --parse-stdin used */
 458
 459 #ifdef ETAGS_REGEXPS
 460 /* List of all regexps. */
 461 static pattern *p_head;
 462
 463 /* How many characters in the character set.  (From regex.c.)  */
 464 #define CHAR_SET_SIZE 256
 465 /* Translation table for case-insensitive matching. */
 466 static char lc_trans[CHAR_SET_SIZE];
 467 #endif /* ETAGS_REGEXPS */
 468
 469 #ifdef LONG_OPTIONS
 470 static struct option longopts[] =
 471 {
 472   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 473   { "c++",                no_argument,       NULL,               'C'   },
 474   { "declarations",       no_argument,       &declarations,      TRUE  },
 475   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 476   { "help",               no_argument,       NULL,               'h'   },
 477   { "help",               no_argument,       NULL,               'H'   },
 478   { "ignore-indentation", no_argument,       NULL,               'I'   },
 479   { "language",           required_argument, NULL,               'l'   },
 480   { "members",            no_argument,       &members,           TRUE  },
 481   { "no-members",         no_argument,       &members,           FALSE },
 482   { "output",             required_argument, NULL,               'o'   },
 483 #ifdef ETAGS_REGEXPS
 484   { "regex",              required_argument, NULL,               'r'   },
 485   { "no-regex",           no_argument,       NULL,               'R'   },
 486   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 487 #endif /* ETAGS_REGEXPS */
 488   { "parse-stdin",        required_argument, NULL,               STDIN },
 489   { "version",            no_argument,       NULL,               'V'   },
 490
 491 #if CTAGS /* Etags options */
 492   { "backward-search",    no_argument,       NULL,               'B'   },
 493   { "cxref",              no_argument,       NULL,               'x'   },
 494   { "defines",            no_argument,       NULL,               'd'   },
 495   { "globals",            no_argument,       &globals,           TRUE  },
 496   { "typedefs",           no_argument,       NULL,               't'   },
 497   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 498   { "update",             no_argument,       NULL,               'u'   },
 499   { "vgrind",             no_argument,       NULL,               'v'   },
 500   { "no-warn",            no_argument,       NULL,               'w'   },
 501
 502 #else /* Ctags options */
 503   { "append",             no_argument,       NULL,               'a'   },
 504   { "no-defines",         no_argument,       NULL,               'D'   },
 505   { "no-globals",         no_argument,       &globals,           FALSE },
 506   { "include",            required_argument, NULL,               'i'   },
 507 #endif
 508   { NULL }
 509 };
 510 #endif /* LONG_OPTIONS */
 511
 512 static compressor compressors[] =
 513 {
 514   { "z", "gzip -d -c"},
 515   { "Z", "gzip -d -c"},
 516   { "gz", "gzip -d -c"},
 517   { "GZ", "gzip -d -c"},
 518   { "bz2", "bzip2 -d -c" },
 519   { NULL }
 520 };
 521
 522 /*
 523  * Language stuff.
 524  */
 525
 526 /* Ada code */
 527 static char *Ada_suffixes [] =
 528   { "ads", "adb", "ada", NULL };
 529
 530 /* Assembly code */
 531 static char *Asm_suffixes [] =
 532   { "a",        /* Unix assembler */
 533     "asm", /* Microcontroller assembly */
 534     "def", /* BSO/Tasking definition includes  */
 535     "inc", /* Microcontroller include files */
 536     "ins", /* Microcontroller include files */
 537     "s", "sa", /* Unix assembler */
 538     "S",   /* cpp-processed Unix assembler */
 539     "src", /* BSO/Tasking C compiler output */
 540     NULL
 541   };
 542
 543 /* Note that .c and .h can be considered C++, if the --c++ flag was
 544    given, or if the `class' keyowrd is met inside the file.
 545    That is why default_C_entries is called for these. */
 546 static char *default_C_suffixes [] =
 547   { "c", "h", NULL };
 548
 549 static char *Cplusplus_suffixes [] =
 550   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 551     "M",                        /* Objective C++ */
 552     "pdb",                      /* Postscript with C syntax */
 553     NULL };
 554
 555 static char *Cjava_suffixes [] =
 556   { "java", NULL };
 557
 558 static char *Cobol_suffixes [] =
 559   { "COB", "cob", NULL };
 560
 561 static char *Cstar_suffixes [] =
 562   { "cs", "hs", NULL };
 563
 564 static char *Erlang_suffixes [] =
 565   { "erl", "hrl", NULL };
 566
 567 static char *Fortran_suffixes [] =
 568   { "F", "f", "f90", "for", NULL };
 569
 570 static char *Lisp_suffixes [] =
 571   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 572
 573 static char *Makefile_filenames [] =
 574   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 575
 576 static char *Pascal_suffixes [] =
 577   { "p", "pas", NULL };
 578
 579 static char *Perl_suffixes [] =
 580   { "pl", "pm", NULL };
 581
 582 static char *Perl_interpreters [] =
 583   { "perl", "@PERL@", NULL };
 584
 585 static char *PHP_suffixes [] =
 586   { "php", "php3", "php4", NULL };
 587
 588 static char *plain_C_suffixes [] =
 589   { "lm",                       /* Objective lex file */
 590     "m",                        /* Objective C file */
 591     "pc",                       /* Pro*C file */
 592      NULL };
 593
 594 static char *Postscript_suffixes [] =
 595   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 596
 597 static char *Prolog_suffixes [] =
 598   { "prolog", NULL };
 599
 600 static char *Python_suffixes [] =
 601   { "py", NULL };
 602
 603 /* Can't do the `SCM' or `scm' prefix with a version number. */
 604 static char *Scheme_suffixes [] =
 605   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 606
 607 static char *TeX_suffixes [] =
 608   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 609
 610 static char *Texinfo_suffixes [] =
 611   { "texi", "texinfo", "txi", NULL };
 612
 613 static char *Yacc_suffixes [] =
 614   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 615
 616 /*
 617  * Table of languages.
 618  *
 619  * It is ok for a given function to be listed under more than one
 620  * name.  I just didn't.
 621  */
 622
 623 static language lang_names [] =
 624 {
 625   { "ada",      FALSE, Ada_funcs,            NULL, Ada_suffixes,        NULL },
 626   { "asm",      FALSE, Asm_labels,           NULL, Asm_suffixes,        NULL },
 627   { "c",        FALSE, default_C_entries,    NULL, default_C_suffixes,  NULL },
 628   { "c++",      FALSE, Cplusplus_entries,    NULL, Cplusplus_suffixes,  NULL },
 629   { "c*",       FALSE, Cstar_entries,        NULL, Cstar_suffixes,      NULL },
 630   { "cobol",    FALSE, Cobol_paragraphs,     NULL, Cobol_suffixes,      NULL },
 631   { "erlang",   FALSE, Erlang_functions,     NULL, Erlang_suffixes,     NULL },
 632   { "fortran",  FALSE, Fortran_functions,    NULL, Fortran_suffixes,    NULL },
 633   { "java",     FALSE, Cjava_entries,        NULL, Cjava_suffixes,      NULL },
 634   { "lisp",     FALSE, Lisp_functions,       NULL, Lisp_suffixes,       NULL },
 635   { "makefile", FALSE, Makefile_targets,     Makefile_filenames, NULL,  NULL },
 636   { "pascal",   FALSE, Pascal_functions,     NULL, Pascal_suffixes,     NULL },
 637   { "perl",     FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
 638   { "php",      FALSE, PHP_functions,        NULL, PHP_suffixes,        NULL },
 639   { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
 640   { "proc",     FALSE, plain_C_entries,      NULL, plain_C_suffixes,    NULL },
 641   { "prolog",   FALSE, Prolog_functions,     NULL, Prolog_suffixes,     NULL },
 642   { "python",   FALSE, Python_functions,     NULL, Python_suffixes,     NULL },
 643   { "scheme",   FALSE, Scheme_functions,     NULL, Scheme_suffixes,     NULL },
 644   { "tex",      FALSE, TeX_commands,         NULL, TeX_suffixes,        NULL },
 645   { "texinfo",  FALSE, Texinfo_nodes,        NULL, Texinfo_suffixes,    NULL },
 646   { "yacc",      TRUE, Yacc_entries,         NULL, Yacc_suffixes,       NULL },
 647   { "auto", FALSE, NULL },             /* default guessing scheme */
 648   { "none", FALSE, just_read_file },   /* regexp matching only */
 649   { NULL, FALSE, NULL }                /* end of list */
 650 };
 651
 652 \f
 653 static void
 654 print_language_names ()
 655 {
 656   language *lang;
 657   char **name, **ext;
 658
 659   puts ("\nThese are the currently supported languages, along with the\n\
 660 default file names and dot suffixes:");
 661   for (lang = lang_names; lang->name != NULL; lang++)
 662     {
 663       printf ("  %-*s", 10, lang->name);
 664       if (lang->filenames != NULL)
 665         for (name = lang->filenames; *name != NULL; name++)
 666           printf (" %s", *name);
 667       if (lang->suffixes != NULL)
 668         for (ext = lang->suffixes; *ext != NULL; ext++)
 669           printf (" .%s", *ext);
 670       puts ("");
 671     }
 672   puts ("Where `auto' means use default language for files based on file\n\
 673 name suffix, and `none' means only do regexp processing on files.\n\
 674 If no language is specified and no matching suffix is found,\n\
 675 the first line of the file is read for a sharp-bang (#!) sequence\n\
 676 followed by the name of an interpreter.  If no such sequence is found,\n\
 677 Fortran is tried first; if no tags are found, C is tried next.\n\
 678 When parsing any C file, a \"class\" keyword switches to C++.\n\
 679 Compressed files are supported using gzip and bzip2.");
 680 }
 681
 682 #ifndef EMACS_NAME
 683 # define EMACS_NAME "GNU Emacs"
 684 #endif
 685 #ifndef VERSION
 686 # define VERSION "21"
 687 #endif
 688 static void
 689 print_version ()
 690 {
 691   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 692   puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
 693   puts ("This program is distributed under the same terms as Emacs");
 694
 695   exit (GOOD);
 696 }
 697
 698 static void
 699 print_help ()
 700 {
 701   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 702 \n\
 703 These are the options accepted by %s.\n", progname, progname);
 704 #ifdef LONG_OPTIONS
 705   puts ("You may use unambiguous abbreviations for the long option names.");
 706 #else
 707   puts ("Long option names do not work with this executable, as it is not\n\
 708 linked with GNU getopt.");
 709 #endif /* LONG_OPTIONS */
 710   puts ("  A - as file name means read names from stdin (one per line).\n\
 711 Absolute names are stored in the output file as they are.\n\
 712 Relative ones are stored relative to the output file's directory.\n");
 713
 714   puts ("--parse-stdin=NAME\n\
 715         Read from standard input and record tags as belonging to file NAME.");
 716
 717   if (!CTAGS)
 718     puts ("-a, --append\n\
 719         Append tag entries to existing tags file.");
 720
 721   puts ("--packages-only\n\
 722         For Ada files, only generate tags for packages.");
 723
 724   if (CTAGS)
 725     puts ("-B, --backward-search\n\
 726         Write the search commands for the tag entries using '?', the\n\
 727         backward-search command instead of '/', the forward-search command.");
 728
 729   /* This option is mostly obsolete, because etags can now automatically
 730      detect C++.  Retained for backward compatibility and for debugging and
 731      experimentation.  In principle, we could want to tag as C++ even
 732      before any "class" keyword.
 733   puts ("-C, --c++\n\
 734         Treat files whose name suffix defaults to C language as C++ files.");
 735   */
 736
 737   puts ("--declarations\n\
 738         In C and derived languages, create tags for function declarations,");
 739   if (CTAGS)
 740     puts ("\tand create tags for extern variables if --globals is used.");
 741   else
 742     puts
 743       ("\tand create tags for extern variables unless --no-globals is used.");
 744
 745   if (CTAGS)
 746     puts ("-d, --defines\n\
 747         Create tag entries for C #define constants and enum constants, too.");
 748   else
 749     puts ("-D, --no-defines\n\
 750         Don't create tag entries for C #define constants and enum constants.\n\
 751         This makes the tags file smaller.");
 752
 753   if (!CTAGS)
 754     puts ("-i FILE, --include=FILE\n\
 755         Include a note in tag file indicating that, when searching for\n\
 756         a tag, one should also consult the tags file FILE after\n\
 757         checking the current file.");
 758
 759   puts ("-l LANG, --language=LANG\n\
 760         Force the following files to be considered as written in the\n\
 761         named language up to the next --language=LANG option.");
 762
 763   if (CTAGS)
 764     puts ("--globals\n\
 765         Create tag entries for global variables in some languages.");
 766   else
 767     puts ("--no-globals\n\
 768         Do not create tag entries for global variables in some\n\
 769         languages.  This makes the tags file smaller.");
 770   puts ("--members\n\
 771         Create tag entries for member variables in C and derived languages.");
 772
 773 #ifdef ETAGS_REGEXPS
 774   puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
 775         Make a tag for each line matching pattern REGEXP in the following\n\
 776         files.  {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
 777         regexfile is a file containing one REGEXP per line.\n\
 778         REGEXP is anchored (as if preceded by ^).\n\
 779         The form /REGEXP/NAME/ creates a named tag.\n\
 780         For example Tcl named tags can be created with:\n\
 781         --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
 782   puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
 783         Like -r, --regex but ignore case when matching expressions.");
 784   puts ("-R, --no-regex\n\
 785         Don't create tags from regexps for the following files.");
 786 #endif /* ETAGS_REGEXPS */
 787   puts ("-o FILE, --output=FILE\n\
 788         Write the tags to FILE.");
 789   puts ("-I, --ignore-indentation\n\
 790         Don't rely on indentation quite as much as normal.  Currently,\n\
 791         this means not to assume that a closing brace in the first\n\
 792         column is the final brace of a function or structure\n\
 793         definition in C and C++.");
 794
 795   if (CTAGS)
 796     {
 797       puts ("-t, --typedefs\n\
 798         Generate tag entries for C and Ada typedefs.");
 799       puts ("-T, --typedefs-and-c++\n\
 800         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 801         and C++ member functions.");
 802     }
 803
 804   if (CTAGS)
 805     puts ("-u, --update\n\
 806         Update the tag entries for the given files, leaving tag\n\
 807         entries for other files in place.  Currently, this is\n\
 808         implemented by deleting the existing entries for the given\n\
 809         files and then rewriting the new entries at the end of the\n\
 810         tags file.  It is often faster to simply rebuild the entire\n\
 811         tag file than to use this.");
 812
 813   if (CTAGS)
 814     {
 815       puts ("-v, --vgrind\n\
 816         Generates an index of items intended for human consumption,\n\
 817         similar to the output of vgrind.  The index is sorted, and\n\
 818         gives the page number of each item.");
 819       puts ("-w, --no-warn\n\
 820         Suppress warning messages about entries defined in multiple\n\
 821         files.");
 822       puts ("-x, --cxref\n\
 823         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 824         The output uses line numbers instead of page numbers, but\n\
 825         beyond that the differences are cosmetic; try both to see\n\
 826         which you like.");
 827     }
 828
 829   puts ("-V, --version\n\
 830         Print the version of the program.\n\
 831 -h, --help\n\
 832         Print this help message.");
 833
 834   print_language_names ();
 835
 836   puts ("");
 837   puts ("Report bugs to bug-gnu-emacs@gnu.org");
 838
 839   exit (GOOD);
 840 }
 841
 842 \f
 843 #ifdef VMS                      /* VMS specific functions */
 844
 845 #define EOS     '\0'
 846
 847 /* This is a BUG!  ANY arbitrary limit is a BUG!
 848    Won't someone please fix this?  */
 849 #define MAX_FILE_SPEC_LEN       255
 850 typedef struct  {
 851   short   curlen;
 852   char    body[MAX_FILE_SPEC_LEN + 1];
 853 } vspec;
 854
 855 /*
 856  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
 857  returning in each successive call the next file name matching the input
 858  spec. The function expects that each in_spec passed
 859  to it will be processed to completion; in particular, up to and
 860  including the call following that in which the last matching name
 861  is returned, the function ignores the value of in_spec, and will
 862  only start processing a new spec with the following call.
 863  If an error occurs, on return out_spec contains the value
 864  of in_spec when the error occurred.
 865
 866  With each successive file name returned in out_spec, the
 867  function's return value is one. When there are no more matching
 868  names the function returns zero. If on the first call no file
 869  matches in_spec, or there is any other error, -1 is returned.
 870 */
 871
 872 #include        <rmsdef.h>
 873 #include        <descrip.h>
 874 #define         OUTSIZE MAX_FILE_SPEC_LEN
 875 static short
 876 fn_exp (out, in)
 877      vspec *out;
 878      char *in;
 879 {
 880   static long context = 0;
 881   static struct dsc$descriptor_s o;
 882   static struct dsc$descriptor_s i;
 883   static bool pass1 = TRUE;
 884   long status;
 885   short retval;
 886
 887   if (pass1)
 888     {
 889       pass1 = FALSE;
 890       o.dsc$a_pointer = (char *) out;
 891       o.dsc$w_length = (short)OUTSIZE;
 892       i.dsc$a_pointer = in;
 893       i.dsc$w_length = (short)strlen(in);
 894       i.dsc$b_dtype = DSC$K_DTYPE_T;
 895       i.dsc$b_class = DSC$K_CLASS_S;
 896       o.dsc$b_dtype = DSC$K_DTYPE_VT;
 897       o.dsc$b_class = DSC$K_CLASS_VS;
 898     }
 899   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
 900     {
 901       out->body[out->curlen] = EOS;
 902       return 1;
 903     }
 904   else if (status == RMS$_NMF)
 905     retval = 0;
 906   else
 907     {
 908       strcpy(out->body, in);
 909       retval = -1;
 910     }
 911   lib$find_file_end(&context);
 912   pass1 = TRUE;
 913   return retval;
 914 }
 915
 916 /*
 917   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
 918   name of each file specified by the provided arg expanding wildcards.
 919 */
 920 static char *
 921 gfnames (arg, p_error)
 922      char *arg;
 923      bool *p_error;
 924 {
 925   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
 926
 927   switch (fn_exp (&filename, arg))
 928     {
 929     case 1:
 930       *p_error = FALSE;
 931       return filename.body;
 932     case 0:
 933       *p_error = FALSE;
 934       return NULL;
 935     default:
 936       *p_error = TRUE;
 937       return filename.body;
 938     }
 939 }
 940
 941 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
 942 system (cmd)
 943      char *cmd;
 944 {
 945   error ("%s", "system() function not implemented under VMS");
 946 }
 947 #endif
 948
 949 #define VERSION_DELIM   ';'
 950 char *massage_name (s)
 951      char *s;
 952 {
 953   char *start = s;
 954
 955   for ( ; *s; s++)
 956     if (*s == VERSION_DELIM)
 957       {
 958         *s = EOS;
 959         break;
 960       }
 961     else
 962       *s = lowcase (*s);
 963   return start;
 964 }
 965 #endif /* VMS */
 966
 967 \f
 968 int
 969 main (argc, argv)
 970      int argc;
 971      char *argv[];
 972 {
 973   int i;
 974   unsigned int nincluded_files;
 975   char **included_files;
 976   argument *argbuffer;
 977   int current_arg, file_count;
 978   linebuffer filename_lb;
 979 #ifdef VMS
 980   bool got_err;
 981 #endif
 982  char *optstring;
 983  int opt;
 984
 985
 986 #ifdef DOS_NT
 987   _fmode = O_BINARY;   /* all of files are treated as binary files */
 988 #endif /* DOS_NT */
 989
 990   progname = argv[0];
 991   nincluded_files = 0;
 992   included_files = xnew (argc, char *);
 993   current_arg = 0;
 994   file_count = 0;
 995
 996   /* Allocate enough no matter what happens.  Overkill, but each one
 997      is small. */
 998   argbuffer = xnew (argc, argument);
 999
1000 #ifdef ETAGS_REGEXPS
1001   /* Set syntax for regular expression routines. */
1002   re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
1003   /* Translation table for case-insensitive search. */
1004   for (i = 0; i < CHAR_SET_SIZE; i++)
1005     lc_trans[i] = lowcase (i);
1006 #endif /* ETAGS_REGEXPS */
1007
1008   /*
1009    * If etags, always find typedefs and structure tags.  Why not?
1010    * Also default to find macro constants, enum constants and
1011    * global variables.
1012    */
1013   if (!CTAGS)
1014     {
1015       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1016       globals = TRUE;
1017     }
1018
1019   optstring = "-";
1020 #ifdef ETAGS_REGEXPS
1021   optstring = "-r:Rc:";
1022 #endif /* ETAGS_REGEXPS */
1023 #ifndef LONG_OPTIONS
1024   optstring = optstring + 1;
1025 #endif /* LONG_OPTIONS */
1026   optstring = concat (optstring,
1027                       "Cf:Il:o:SVhH",
1028                       (CTAGS) ? "BxdtTuvw" : "aDi:");
1029
1030   while ((opt = getopt_long (argc, argv, optstring, longopts, 0)) != EOF)
1031     switch (opt)
1032       {
1033       case 0:
1034         /* If getopt returns 0, then it has already processed a
1035            long-named option.  We should do nothing.  */
1036         break;
1037
1038       case 1:
1039         /* This means that a file name has been seen.  Record it. */
1040         argbuffer[current_arg].arg_type = at_filename;
1041         argbuffer[current_arg].what     = optarg;
1042         ++current_arg;
1043         ++file_count;
1044         break;
1045
1046       case STDIN:
1047         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1048         argbuffer[current_arg].arg_type = at_stdin;
1049         argbuffer[current_arg].what     = optarg;
1050         ++current_arg;
1051         ++file_count;
1052         parsing_stdin = TRUE;
1053         break;
1054
1055         /* Common options. */
1056       case 'C': cplusplus = TRUE;               break;
1057       case 'f':         /* for compatibility with old makefiles */
1058       case 'o':
1059         if (tagfile)
1060           {
1061             error ("-o option may only be given once.", (char *)NULL);
1062             suggest_asking_for_help ();
1063           }
1064         tagfile = optarg;
1065         break;
1066       case 'I':
1067       case 'S':         /* for backward compatibility */
1068         noindentypedefs = TRUE;
1069         break;
1070       case 'l':
1071         {
1072           language *lang = get_language_from_langname (optarg);
1073           if (lang != NULL)
1074             {
1075               argbuffer[current_arg].lang = lang;
1076               argbuffer[current_arg].arg_type = at_language;
1077               ++current_arg;
1078             }
1079         }
1080         break;
1081       case 'r':
1082         argbuffer[current_arg].arg_type = at_regexp;
1083         argbuffer[current_arg].what = optarg;
1084         ++current_arg;
1085         break;
1086       case 'R':
1087         argbuffer[current_arg].arg_type = at_regexp;
1088         argbuffer[current_arg].what = NULL;
1089         ++current_arg;
1090         break;
1091       case 'c':
1092         argbuffer[current_arg].arg_type = at_icregexp;
1093         argbuffer[current_arg].what = optarg;
1094         ++current_arg;
1095         break;
1096       case 'V':
1097         print_version ();
1098         break;
1099       case 'h':
1100       case 'H':
1101         print_help ();
1102         break;
1103
1104         /* Etags options */
1105       case 'a': append_to_tagfile = TRUE;                       break;
1106       case 'D': constantypedefs = FALSE;                        break;
1107       case 'i': included_files[nincluded_files++] = optarg;     break;
1108
1109         /* Ctags options. */
1110       case 'B': searchar = '?';                                 break;
1111       case 'd': constantypedefs = TRUE;                         break;
1112       case 't': typedefs = TRUE;                                break;
1113       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1114       case 'u': update = TRUE;                                  break;
1115       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1116       case 'x': cxref_style = TRUE;                             break;
1117       case 'w': no_warnings = TRUE;                             break;
1118       default:
1119         suggest_asking_for_help ();
1120       }
1121
1122   for (; optind < argc; ++optind)
1123     {
1124       argbuffer[current_arg].arg_type = at_filename;
1125       argbuffer[current_arg].what = argv[optind];
1126       ++current_arg;
1127       ++file_count;
1128     }
1129
1130   if (nincluded_files == 0 && file_count == 0)
1131     {
1132       error ("no input files specified.", (char *)NULL);
1133       suggest_asking_for_help ();
1134     }
1135
1136   if (tagfile == NULL)
1137     tagfile = CTAGS ? "tags" : "TAGS";
1138   cwd = etags_getcwd ();        /* the current working directory */
1139   if (cwd[strlen (cwd) - 1] != '/')
1140     {
1141       char *oldcwd = cwd;
1142       cwd = concat (oldcwd, "/", "");
1143       free (oldcwd);
1144     }
1145   if (streq (tagfile, "-"))
1146     tagfiledir = cwd;
1147   else
1148     tagfiledir = absolute_dirname (tagfile, cwd);
1149
1150   init ();                      /* set up boolean "functions" */
1151
1152   initbuffer (&lb);
1153   initbuffer (&filename_lb);
1154
1155   if (!CTAGS)
1156     {
1157       if (streq (tagfile, "-"))
1158         {
1159           tagf = stdout;
1160 #ifdef DOS_NT
1161           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1162              doesn't take effect until after `stdout' is already open). */
1163           if (!isatty (fileno (stdout)))
1164             setmode (fileno (stdout), O_BINARY);
1165 #endif /* DOS_NT */
1166         }
1167       else
1168         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1169       if (tagf == NULL)
1170         pfatal (tagfile);
1171     }
1172
1173   /*
1174    * Loop through files finding functions.
1175    */
1176   for (i = 0; i < current_arg; ++i)
1177     {
1178       static language *lang;    /* non-NULL if language is forced */
1179       char *this_file;
1180
1181       switch (argbuffer[i].arg_type)
1182         {
1183         case at_language:
1184           lang = argbuffer[i].lang;
1185           break;
1186 #ifdef ETAGS_REGEXPS
1187         case at_regexp:
1188           analyse_regex (argbuffer[i].what, FALSE);
1189           break;
1190         case at_icregexp:
1191           analyse_regex (argbuffer[i].what, TRUE);
1192           break;
1193 #endif
1194         case at_filename:
1195 #ifdef VMS
1196           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1197             {
1198               if (got_err)
1199                 {
1200                   error ("can't find file %s\n", this_file);
1201                   argc--, argv++;
1202                 }
1203               else
1204                 {
1205                   this_file = massage_name (this_file);
1206                 }
1207 #else
1208               this_file = argbuffer[i].what;
1209 #endif
1210               /* Input file named "-" means read file names from stdin
1211                  (one per line) and use them. */
1212               if (streq (this_file, "-"))
1213                 {
1214                   if (parsing_stdin)
1215                     fatal ("cannot parse standard input AND read file names from it",
1216                            (char *)NULL);
1217                   while (readline_internal (&filename_lb, stdin) > 0)
1218                     process_file_name (filename_lb.buffer, lang);
1219                 }
1220               else
1221                 process_file_name (this_file, lang);
1222 #ifdef VMS
1223             }
1224 #endif
1225           break;
1226         case at_stdin:
1227           this_file = argbuffer[i].what;
1228           process_file (stdin, this_file, lang);
1229           break;
1230         }
1231     }
1232
1233 #ifdef ETAGS_REGEXPS
1234   free_patterns ();
1235 #endif /* ETAGS_REGEXPS */
1236
1237   if (!CTAGS || cxref_style)
1238     {
1239       put_entries (nodehead);
1240       free_tree (nodehead);
1241       nodehead = NULL;
1242       if (!CTAGS)
1243         while (nincluded_files-- > 0)
1244           fprintf (tagf, "\f\n%s,include\n", *included_files++);
1245
1246       if (fclose (tagf) == EOF)
1247         pfatal (tagfile);
1248       exit (GOOD);
1249     }
1250
1251   if (update)
1252     {
1253       char cmd[BUFSIZ];
1254       for (i = 0; i < current_arg; ++i)
1255         {
1256           switch (argbuffer[i].arg_type)
1257             {
1258             case at_filename:
1259             case at_stdin:
1260               break;
1261             default:
1262               continue;         /* the for loop */
1263             }
1264           sprintf (cmd,
1265                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1266                    tagfile, argbuffer[i].what, tagfile);
1267           if (system (cmd) != GOOD)
1268             fatal ("failed to execute shell command", (char *)NULL);
1269         }
1270       append_to_tagfile = TRUE;
1271     }
1272
1273   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1274   if (tagf == NULL)
1275     pfatal (tagfile);
1276   put_entries (nodehead);
1277   free_tree (nodehead);
1278   nodehead = NULL;
1279   if (fclose (tagf) == EOF)
1280     pfatal (tagfile);
1281
1282   if (update)
1283     {
1284       char cmd[2*BUFSIZ+10];
1285       sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1286       exit (system (cmd));
1287     }
1288   return GOOD;
1289 }
1290
1291
1292 /*
1293  * Return a compressor given the file name.  If EXTPTR is non-zero,
1294  * return a pointer into FILE where the compressor-specific
1295  * extension begins.  If no compressor is found, NULL is returned
1296  * and EXTPTR is not significant.
1297  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1298  */
1299 static compressor *
1300 get_compressor_from_suffix (file, extptr)
1301      char *file;
1302      char **extptr;
1303 {
1304   compressor *compr;
1305   char *slash, *suffix;
1306
1307   /* This relies on FN to be after canonicalize_filename,
1308      so we don't need to consider backslashes on DOS_NT.  */
1309   slash = etags_strrchr (file, '/');
1310   suffix = etags_strrchr (file, '.');
1311   if (suffix == NULL || suffix < slash)
1312     return NULL;
1313   if (extptr != NULL)
1314     *extptr = suffix;
1315   suffix += 1;
1316   /* Let those poor souls who live with DOS 8+3 file name limits get
1317      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1318      Only the first do loop is run if not MSDOS */
1319   do
1320     {
1321       for (compr = compressors; compr->suffix != NULL; compr++)
1322         if (streq (compr->suffix, suffix))
1323           return compr;
1324       if (!MSDOS)
1325         break;                  /* do it only once: not really a loop */
1326       if (extptr != NULL)
1327         *extptr = ++suffix;
1328     } while (*suffix != '\0');
1329   return NULL;
1330 }
1331
1332
1333
1334 /*
1335  * Return a language given the name.
1336  */
1337 static language *
1338 get_language_from_langname (name)
1339      const char *name;
1340 {
1341   language *lang;
1342
1343   if (name == NULL)
1344     error ("empty language name", (char *)NULL);
1345   else
1346     {
1347       for (lang = lang_names; lang->name != NULL; lang++)
1348         if (streq (name, lang->name))
1349           return lang;
1350       error ("unknown language \"%s\"", name);
1351     }
1352
1353   return NULL;
1354 }
1355
1356
1357 /*
1358  * Return a language given the interpreter name.
1359  */
1360 static language *
1361 get_language_from_interpreter (interpreter)
1362      char *interpreter;
1363 {
1364   language *lang;
1365   char **iname;
1366
1367   if (interpreter == NULL)
1368     return NULL;
1369   for (lang = lang_names; lang->name != NULL; lang++)
1370     if (lang->interpreters != NULL)
1371       for (iname = lang->interpreters; *iname != NULL; iname++)
1372         if (streq (*iname, interpreter))
1373             return lang;
1374
1375   return NULL;
1376 }
1377
1378
1379
1380 /*
1381  * Return a language given the file name.
1382  */
1383 static language *
1384 get_language_from_filename (file, case_sensitive)
1385      char *file;
1386      bool case_sensitive;
1387 {
1388   language *lang;
1389   char **name, **ext, *suffix;
1390
1391   /* Try whole file name first. */
1392   for (lang = lang_names; lang->name != NULL; lang++)
1393     if (lang->filenames != NULL)
1394       for (name = lang->filenames; *name != NULL; name++)
1395         if ((case_sensitive)
1396             ? streq (*name, file)
1397             : strcaseeq (*name, file))
1398           return lang;
1399
1400   /* If not found, try suffix after last dot. */
1401   suffix = etags_strrchr (file, '.');
1402   if (suffix == NULL)
1403     return NULL;
1404   suffix += 1;
1405   for (lang = lang_names; lang->name != NULL; lang++)
1406     if (lang->suffixes != NULL)
1407       for (ext = lang->suffixes; *ext != NULL; ext++)
1408         if ((case_sensitive)
1409             ? streq (*ext, suffix)
1410             : strcaseeq (*ext, suffix))
1411           return lang;
1412   return NULL;
1413 }
1414
1415 \f
1416 /*
1417  * This routine is called on each file argument.
1418  */
1419 static void
1420 process_file_name (file, lang)
1421      char *file;
1422      language *lang;
1423 {
1424   struct stat stat_buf;
1425   FILE *inf;
1426   fdesc *fdp;
1427   compressor *compr;
1428   char *compressed_name, *uncompressed_name;
1429   char *ext, *real_name;
1430   int retval;
1431
1432   canonicalize_filename (file);
1433   if (streq (file, tagfile) && !streq (tagfile, "-"))
1434     {
1435       error ("skipping inclusion of %s in self.", file);
1436       return;
1437     }
1438   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1439     {
1440       compressed_name = NULL;
1441       real_name = uncompressed_name = savestr (file);
1442     }
1443   else
1444     {
1445       real_name = compressed_name = savestr (file);
1446       uncompressed_name = savenstr (file, ext - file);
1447     }
1448
1449   /* If the canonicalized uncompressed name
1450      has already been dealt with, skip it silently. */
1451   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1452     {
1453       assert (fdp->infname != NULL);
1454       if (streq (uncompressed_name, fdp->infname))
1455         goto cleanup;
1456     }
1457
1458   if (stat (real_name, &stat_buf) != 0)
1459     {
1460       /* Reset real_name and try with a different name. */
1461       real_name = NULL;
1462       if (compressed_name != NULL) /* try with the given suffix */
1463         {
1464           if (stat (uncompressed_name, &stat_buf) == 0)
1465             real_name = uncompressed_name;
1466         }
1467       else                      /* try all possible suffixes */
1468         {
1469           for (compr = compressors; compr->suffix != NULL; compr++)
1470             {
1471               compressed_name = concat (file, ".", compr->suffix);
1472               if (stat (compressed_name, &stat_buf) != 0)
1473                 {
1474                   if (MSDOS)
1475                     {
1476                       char *suf = compressed_name + strlen (file);
1477                       size_t suflen = strlen (compr->suffix) + 1;
1478                       for ( ; suf[1]; suf++, suflen--)
1479                         {
1480                           memmove (suf, suf + 1, suflen);
1481                           if (stat (compressed_name, &stat_buf) == 0)
1482                             {
1483                               real_name = compressed_name;
1484                               break;
1485                             }
1486                         }
1487                       if (real_name != NULL)
1488                         break;
1489                     } /* MSDOS */
1490                   free (compressed_name);
1491                   compressed_name = NULL;
1492                 }
1493               else
1494                 {
1495                   real_name = compressed_name;
1496                   break;
1497                 }
1498             }
1499         }
1500       if (real_name == NULL)
1501         {
1502           perror (file);
1503           goto cleanup;
1504         }
1505     } /* try with a different name */
1506
1507   if (!S_ISREG (stat_buf.st_mode))
1508     {
1509       error ("skipping %s: it is not a regular file.", real_name);
1510       goto cleanup;
1511     }
1512   if (real_name == compressed_name)
1513     {
1514       char *cmd = concat (compr->command, " ", real_name);
1515       inf = (FILE *) popen (cmd, "r");
1516       free (cmd);
1517     }
1518   else
1519     inf = fopen (real_name, "r");
1520   if (inf == NULL)
1521     {
1522       perror (real_name);
1523       goto cleanup;
1524     }
1525
1526   process_file (inf, uncompressed_name, lang);
1527
1528   if (real_name == compressed_name)
1529     retval = pclose (inf);
1530   else
1531     retval = fclose (inf);
1532   if (retval < 0)
1533     pfatal (file);
1534
1535  cleanup:
1536   if (compressed_name) free (compressed_name);
1537   if (uncompressed_name) free (uncompressed_name);
1538   last_node = NULL;
1539   curfdp = NULL;
1540   return;
1541 }
1542
1543 static void
1544 process_file (fh, fn, lang)
1545      FILE *fh;
1546      char *fn;
1547      language *lang;
1548 {
1549   static const fdesc emptyfdesc;
1550   fdesc *fdp;
1551
1552   /* Create a new input file description entry. */
1553   fdp = xnew (1, fdesc);
1554   *fdp = emptyfdesc;
1555   fdp->next = fdhead;
1556   fdp->infname = savestr (fn);
1557   fdp->lang = lang;
1558   fdp->infabsname = absolute_filename (fn, cwd);
1559   fdp->infabsdir = absolute_dirname (fn, cwd);
1560   if (filename_is_absolute (fn))
1561     {
1562       /* An absolute file name.  Canonicalize it. */
1563       fdp->taggedfname = absolute_filename (fn, NULL);
1564     }
1565   else
1566     {
1567       /* A file name relative to cwd.  Make it relative
1568          to the directory of the tags file. */
1569       fdp->taggedfname = relative_filename (fn, tagfiledir);
1570     }
1571   fdp->usecharno = TRUE;        /* use char position when making tags */
1572   fdp->prop = NULL;
1573
1574   fdhead = fdp;
1575   curfdp = fdhead;              /* the current file description */
1576
1577   find_entries (fh);
1578
1579   /* If not Ctags, and if this is not metasource and if it contained no #line
1580      directives, we can write the tags and free all nodes pointing to
1581      curfdp. */
1582   if (!CTAGS
1583       && curfdp->usecharno      /* no #line directives in this file */
1584       && !curfdp->lang->metasource)
1585     {
1586       node *np, *prev;
1587
1588       /* Look for the head of the sublist relative to this file.  See add_node
1589          for the structure of the node tree. */
1590       prev = NULL;
1591       for (np = nodehead; np != NULL; prev = np, np = np->left)
1592         if (np->fdp == curfdp)
1593           break;
1594
1595       /* If we generated tags for this file, write and delete them. */
1596       if (np != NULL)
1597         {
1598           /* This is the head of the last sublist, if any.  The following
1599              instructions depend on this being true. */
1600           assert (np->left == NULL);
1601
1602           assert (fdhead == curfdp);
1603           assert (last_node->fdp == curfdp);
1604           put_entries (np);     /* write tags for file curfdp->taggedfname */
1605           free_tree (np);       /* remove the written nodes */
1606           if (prev == NULL)
1607             nodehead = NULL;    /* no nodes left */
1608           else
1609             prev->left = NULL;  /* delete the pointer to the sublist */
1610         }
1611     }
1612 }
1613
1614 /*
1615  * This routine sets up the boolean pseudo-functions which work
1616  * by setting boolean flags dependent upon the corresponding character.
1617  * Every char which is NOT in that string is not a white char.  Therefore,
1618  * all of the array "_wht" is set to FALSE, and then the elements
1619  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1620  * of a char is TRUE if it is the string "white", else FALSE.
1621  */
1622 static void
1623 init ()
1624 {
1625   register char *sp;
1626   register int i;
1627
1628   for (i = 0; i < CHARS; i++)
1629     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1630   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1631   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1632   notinname('\0') = notinname('\n');
1633   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1634   begtoken('\0') = begtoken('\n');
1635   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1636   intoken('\0') = intoken('\n');
1637   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1638   endtoken('\0') = endtoken('\n');
1639 }
1640
1641 /*
1642  * This routine opens the specified file and calls the function
1643  * which finds the function and type definitions.
1644  */
1645 static void
1646 find_entries (inf)
1647      FILE *inf;
1648 {
1649   char *cp;
1650   node *old_last_node;
1651   language *lang = curfdp->lang;
1652   Lang_function *parser = NULL;
1653
1654   /* If user specified a language, use it. */
1655   if (lang != NULL && lang->function != NULL)
1656     {
1657       parser = lang->function;
1658     }
1659
1660   /* Else try to guess the language given the file name. */
1661   if (parser == NULL)
1662     {
1663       lang = get_language_from_filename (curfdp->infname, TRUE);
1664       if (lang != NULL && lang->function != NULL)
1665         {
1666           curfdp->lang = lang;
1667           parser = lang->function;
1668         }
1669     }
1670
1671   /* Else look for sharp-bang as the first two characters. */
1672   if (parser == NULL
1673       && readline_internal (&lb, inf) > 0
1674       && lb.len >= 2
1675       && lb.buffer[0] == '#'
1676       && lb.buffer[1] == '!')
1677     {
1678       char *lp;
1679
1680       /* Set lp to point at the first char after the last slash in the
1681          line or, if no slashes, at the first nonblank.  Then set cp to
1682          the first successive blank and terminate the string. */
1683       lp = etags_strrchr (lb.buffer+2, '/');
1684       if (lp != NULL)
1685         lp += 1;
1686       else
1687         lp = skip_spaces (lb.buffer + 2);
1688       cp = skip_non_spaces (lp);
1689       *cp = '\0';
1690
1691       if (strlen (lp) > 0)
1692         {
1693           lang = get_language_from_interpreter (lp);
1694           if (lang != NULL && lang->function != NULL)
1695             {
1696               curfdp->lang = lang;
1697               parser = lang->function;
1698             }
1699         }
1700     }
1701
1702   /* We rewind here, even if inf may be a pipe.  We fail if the
1703      length of the first line is longer than the pipe block size,
1704      which is unlikely. */
1705   if (parser == NULL)
1706     rewind (inf);
1707
1708   /* Else try to guess the language given the case insensitive file name. */
1709   if (parser == NULL)
1710     {
1711       lang = get_language_from_filename (curfdp->infname, FALSE);
1712       if (lang != NULL && lang->function != NULL)
1713         {
1714           curfdp->lang = lang;
1715           parser = lang->function;
1716         }
1717     }
1718
1719   if (!no_line_directive
1720       && curfdp->lang != NULL && curfdp->lang->metasource)
1721     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1722        file, or anyway we parsed a file that is automatically generated from
1723        this one.  If this is the case, the bingo.c file contained #line
1724        directives that generated tags pointing to this file.  Let's delete
1725        them all before parsing this file, which is the real source. */
1726     {
1727       fdesc **fdpp = &fdhead;
1728       while (*fdpp != NULL)
1729         if (*fdpp != curfdp
1730             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1731           /* We found one of those!  We must delete both the file description
1732              and all tags referring to it. */
1733           {
1734             fdesc *badfdp = *fdpp;
1735
1736             if (DEBUG)
1737               fprintf (stderr,
1738                        "Removing references to \"%s\" obtained from \"%s\"\n",
1739                        badfdp->taggedfname, badfdp->infname);
1740
1741             /* Delete the tags referring to badfdp. */
1742             invalidate_nodes (badfdp, &nodehead);
1743
1744             *fdpp = badfdp->next; /* remove the bad description from the list */
1745             free_fdesc (badfdp);
1746           }
1747         else
1748           fdpp = &(*fdpp)->next; /* advance the list pointer */
1749     }
1750
1751   if (parser != NULL)
1752     {
1753       parser (inf);
1754       return;
1755     }
1756
1757   /* Else try Fortran. */
1758   old_last_node = last_node;
1759   curfdp->lang = get_language_from_langname ("fortran");
1760   Fortran_functions (inf);
1761
1762   if (old_last_node == last_node)
1763     /* No Fortran entries found.  Try C. */
1764     {
1765       /* We do not tag if rewind fails.
1766          Only the file name will be recorded in the tags file. */
1767       rewind (inf);
1768       curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1769       default_C_entries (inf);
1770     }
1771   return;
1772 }
1773
1774 \f
1775 /* Record a tag. */
1776 static void
1777 pfnote (name, is_func, linestart, linelen, lno, cno)
1778      char *name;                /* tag name, or NULL if unnamed */
1779      bool is_func;              /* tag is a function */
1780      char *linestart;           /* start of the line where tag is */
1781      int linelen;               /* length of the line where tag is */
1782      int lno;                   /* line number */
1783      long cno;                  /* character number */
1784 {
1785   register node *np;
1786
1787   if (CTAGS && name == NULL)
1788     return;
1789
1790   np = xnew (1, node);
1791
1792   /* If ctags mode, change name "main" to M<thisfilename>. */
1793   if (CTAGS && !cxref_style && streq (name, "main"))
1794     {
1795       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
1796       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
1797       fp = etags_strrchr (np->name, '.');
1798       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1799         fp[0] = '\0';
1800     }
1801   else
1802     np->name = name;
1803   np->valid = TRUE;
1804   np->been_warned = FALSE;
1805   np->fdp = curfdp;
1806   np->is_func = is_func;
1807   np->lno = lno;
1808   if (np->fdp->usecharno)
1809     /* Our char numbers are 0-base, because of C language tradition?
1810        ctags compatibility?  old versions compatibility?   I don't know.
1811        Anyway, since emacs's are 1-base we expect etags.el to take care
1812        of the difference.  If we wanted to have 1-based numbers, we would
1813        uncomment the +1 below. */
1814     np->cno = cno /* + 1 */ ;
1815   else
1816     np->cno = invalidcharno;
1817   np->left = np->right = NULL;
1818   if (CTAGS && !cxref_style)
1819     {
1820       if (strlen (linestart) < 50)
1821         np->pat = concat (linestart, "$", "");
1822       else
1823         np->pat = savenstr (linestart, 50);
1824     }
1825   else
1826     np->pat = savenstr (linestart, linelen);
1827
1828   add_node (np, &nodehead);
1829 }
1830
1831 /*
1832  * TAGS format specification
1833  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1834  *
1835  * pfnote should emit the optimized form [unnamed tag] only if:
1836  *  1. name does not contain any of the characters " \t\r\n(),;";
1837  *  2. linestart contains name as either a rightmost, or rightmost but
1838  *     one character, substring;
1839  *  3. the character, if any, immediately before name in linestart must
1840  *     be one of the characters " \t(),;";
1841  *  4. the character, if any, immediately after name in linestart must
1842  *     also be one of the characters " \t(),;".
1843  *
1844  * The real implementation uses the notinname() macro, which recognises
1845  * characters slightly different from " \t\r\n(),;".  See the variable
1846  * `nonam'.
1847  */
1848 #define traditional_tag_style TRUE
1849 static void
1850 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1851      char *name;                /* tag name, or NULL if unnamed */
1852      int namelen;               /* tag length */
1853      bool is_func;              /* tag is a function */
1854      char *linestart;           /* start of the line where tag is */
1855      int linelen;               /* length of the line where tag is */
1856      int lno;                   /* line number */
1857      long cno;                  /* character number */
1858 {
1859   register char *cp;
1860   bool named;
1861
1862   named = TRUE;
1863   if (!CTAGS)
1864     {
1865       for (cp = name; !notinname (*cp); cp++)
1866         continue;
1867       if (*cp == '\0')                          /* rule #1 */
1868         {
1869           cp = linestart + linelen - namelen;
1870           if (notinname (linestart[linelen-1]))
1871             cp -= 1;                            /* rule #4 */
1872           if (cp >= linestart                   /* rule #2 */
1873               && (cp == linestart
1874                   || notinname (cp[-1]))        /* rule #3 */
1875               && strneq (name, cp, namelen))    /* rule #2 */
1876             named = FALSE;      /* use unnamed tag */
1877         }
1878     }
1879
1880   if (named)
1881     name = savenstr (name, namelen);
1882   else
1883     name = NULL;
1884   pfnote (name, is_func, linestart, linelen, lno, cno);
1885 }
1886
1887 /*
1888  * free_tree ()
1889  *      recurse on left children, iterate on right children.
1890  */
1891 static void
1892 free_tree (np)
1893      register node *np;
1894 {
1895   while (np)
1896     {
1897       register node *node_right = np->right;
1898       free_tree (np->left);
1899       if (np->name != NULL)
1900         free (np->name);
1901       free (np->pat);
1902       free (np);
1903       np = node_right;
1904     }
1905 }
1906
1907 /*
1908  * free_fdesc ()
1909  *      delete a file description
1910  */
1911 static void
1912 free_fdesc (fdp)
1913      register fdesc *fdp;
1914 {
1915   if (fdp->infname != NULL) free (fdp->infname);
1916   if (fdp->infabsname != NULL) free (fdp->infabsname);
1917   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
1918   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
1919   if (fdp->prop != NULL) free (fdp->prop);
1920   free (fdp);
1921 }
1922
1923 /*
1924  * add_node ()
1925  *      Adds a node to the tree of nodes.  In etags mode, sort by file
1926  *      name.  In ctags mode, sort by tag name.  Make no attempt at
1927  *      balancing.
1928  *
1929  *      add_node is the only function allowed to add nodes, so it can
1930  *      maintain state.
1931  */
1932 static void
1933 add_node (np, cur_node_p)
1934      node *np, **cur_node_p;
1935 {
1936   register int dif;
1937   register node *cur_node = *cur_node_p;
1938
1939   if (cur_node == NULL)
1940     {
1941       *cur_node_p = np;
1942       last_node = np;
1943       return;
1944     }
1945
1946   if (!CTAGS)
1947     /* Etags Mode */
1948     {
1949       /* For each file name, tags are in a linked sublist on the right
1950          pointer.  The first tags of different files are a linked list
1951          on the left pointer.  last_node points to the end of the last
1952          used sublist. */
1953       if (last_node != NULL && last_node->fdp == np->fdp)
1954         {
1955           /* Let's use the same sublist as the last added node. */
1956           assert (last_node->right == NULL);
1957           last_node->right = np;
1958           last_node = np;
1959         }
1960       else if (cur_node->fdp == np->fdp)
1961         {
1962           /* Scanning the list we found the head of a sublist which is
1963              good for us.  Let's scan this sublist. */
1964           add_node (np, &cur_node->right);
1965         }
1966       else
1967         /* The head of this sublist is not good for us.  Let's try the
1968            next one. */
1969         add_node (np, &cur_node->left);
1970     } /* if ETAGS mode */
1971
1972   else
1973     {
1974       /* Ctags Mode */
1975       dif = strcmp (np->name, cur_node->name);
1976
1977       /*
1978        * If this tag name matches an existing one, then
1979        * do not add the node, but maybe print a warning.
1980        */
1981       if (!dif)
1982         {
1983           if (np->fdp == cur_node->fdp)
1984             {
1985               if (!no_warnings)
1986                 {
1987                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1988                            np->fdp->infname, lineno, np->name);
1989                   fprintf (stderr, "Second entry ignored\n");
1990                 }
1991             }
1992           else if (!cur_node->been_warned && !no_warnings)
1993             {
1994               fprintf
1995                 (stderr,
1996                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
1997                  np->fdp->infname, cur_node->fdp->infname, np->name);
1998               cur_node->been_warned = TRUE;
1999             }
2000           return;
2001         }
2002
2003       /* Actually add the node */
2004       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2005     } /* if CTAGS mode */
2006 }
2007
2008 /*
2009  * invalidate_nodes ()
2010  *      Scan the node tree and invalidate all nodes pointing to the
2011  *      given file description (CTAGS case) or free them (ETAGS case).
2012  */
2013 static void
2014 invalidate_nodes (badfdp, npp)
2015      fdesc *badfdp;
2016      node **npp;
2017 {
2018   node *np = *npp;
2019
2020   if (np == NULL)
2021     return;
2022
2023   if (CTAGS)
2024     {
2025       if (np->left != NULL)
2026         invalidate_nodes (badfdp, &np->left);
2027       if (np->fdp == badfdp)
2028         np-> valid = FALSE;
2029       if (np->right != NULL)
2030         invalidate_nodes (badfdp, &np->right);
2031     }
2032   else
2033     {
2034       node **next = &np->left;
2035       if (np->fdp == badfdp)
2036         {
2037           *npp = *next;         /* detach the sublist from the list */
2038           np->left = NULL;      /* isolate it */
2039           free_tree (np);       /* free it */
2040         }
2041       invalidate_nodes (badfdp, next);
2042     }
2043 }
2044
2045 \f
2046 static int total_size_of_entries __P((node *));
2047 static int number_len __P((long));
2048
2049 /* Length of a non-negative number's decimal representation. */
2050 static int
2051 number_len (num)
2052      long num;
2053 {
2054   int len = 1;
2055   while ((num /= 10) > 0)
2056     len += 1;
2057   return len;
2058 }
2059
2060 /*
2061  * Return total number of characters that put_entries will output for
2062  * the nodes in the linked list at the right of the specified node.
2063  * This count is irrelevant with etags.el since emacs 19.34 at least,
2064  * but is still supplied for backward compatibility.
2065  */
2066 static int
2067 total_size_of_entries (np)
2068      register node *np;
2069 {
2070   register int total = 0;
2071
2072   for (; np != NULL; np = np->right)
2073     {
2074       total += strlen (np->pat) + 1;            /* pat\177 */
2075       if (np->name != NULL)
2076         total += strlen (np->name) + 1;         /* name\001 */
2077       total += number_len ((long) np->lno) + 1; /* lno, */
2078       if (np->cno != invalidcharno)             /* cno */
2079         total += number_len (np->cno);
2080       total += 1;                               /* newline */
2081     }
2082
2083   return total;
2084 }
2085
2086 static void
2087 put_entries (np)
2088      register node *np;
2089 {
2090   register char *sp;
2091   static fdesc *fdp = NULL;
2092
2093   if (np == NULL)
2094     return;
2095
2096   /* Output subentries that precede this one */
2097   if (CTAGS)
2098     put_entries (np->left);
2099
2100   /* Output this entry */
2101   if (np->valid)
2102     {
2103       if (!CTAGS)
2104         {
2105           /* Etags mode */
2106           if (fdp != np->fdp)
2107             {
2108               fdp = np->fdp;
2109               fprintf (tagf, "\f\n%s,%d\n",
2110                        fdp->taggedfname, total_size_of_entries (np));
2111             }
2112           fputs (np->pat, tagf);
2113           fputc ('\177', tagf);
2114           if (np->name != NULL)
2115             {
2116               fputs (np->name, tagf);
2117               fputc ('\001', tagf);
2118             }
2119           fprintf (tagf, "%d,", np->lno);
2120           if (np->cno != invalidcharno)
2121             fprintf (tagf, "%ld", np->cno);
2122           fputs ("\n", tagf);
2123         }
2124       else
2125         {
2126           /* Ctags mode */
2127           if (np->name == NULL)
2128             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2129
2130           if (cxref_style)
2131             {
2132               if (vgrind_style)
2133                 fprintf (stdout, "%s %s %d\n",
2134                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2135               else
2136                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2137                          np->name, np->lno, np->fdp->taggedfname, np->pat);
2138             }
2139           else
2140             {
2141               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2142
2143               if (np->is_func)
2144                 {               /* function or #define macro with args */
2145                   putc (searchar, tagf);
2146                   putc ('^', tagf);
2147
2148                   for (sp = np->pat; *sp; sp++)
2149                     {
2150                       if (*sp == '\\' || *sp == searchar)
2151                         putc ('\\', tagf);
2152                       putc (*sp, tagf);
2153                     }
2154                   putc (searchar, tagf);
2155                 }
2156               else
2157                 {               /* anything else; text pattern inadequate */
2158                   fprintf (tagf, "%d", np->lno);
2159                 }
2160               putc ('\n', tagf);
2161             }
2162         }
2163     } /* if this node contains a valid tag */
2164
2165   /* Output subentries that follow this one */
2166   put_entries (np->right);
2167   if (!CTAGS)
2168     put_entries (np->left);
2169 }
2170
2171 \f
2172 /* C extensions. */
2173 #define C_EXT   0x00fff         /* C extensions */
2174 #define C_PLAIN 0x00000         /* C */
2175 #define C_PLPL  0x00001         /* C++ */
2176 #define C_STAR  0x00003         /* C* */
2177 #define C_JAVA  0x00005         /* JAVA */
2178 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2179 #define YACC    0x10000         /* yacc file */
2180
2181 /*
2182  * The C symbol tables.
2183  */
2184 enum sym_type
2185 {
2186   st_none,
2187   st_C_objprot, st_C_objimpl, st_C_objend,
2188   st_C_gnumacro,
2189   st_C_ignore,
2190   st_C_javastruct,
2191   st_C_operator,
2192   st_C_class, st_C_template,
2193   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
2194 };
2195
2196 static unsigned int hash __P((const char *, unsigned int));
2197 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2198 static enum sym_type C_symtype __P((char *, int, int));
2199
2200 /* Feed stuff between (but not including) %[ and %] lines to:
2201       gperf -c -k 1,3 -o -p -r -t
2202 %[
2203 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2204 %%
2205 if,             0,      st_C_ignore
2206 for,            0,      st_C_ignore
2207 while,          0,      st_C_ignore
2208 switch,         0,      st_C_ignore
2209 return,         0,      st_C_ignore
2210 @interface,     0,      st_C_objprot
2211 @protocol,      0,      st_C_objprot
2212 @implementation,0,      st_C_objimpl
2213 @end,           0,      st_C_objend
2214 import,         C_JAVA, st_C_ignore
2215 package,        C_JAVA, st_C_ignore
2216 friend,         C_PLPL, st_C_ignore
2217 extends,        C_JAVA, st_C_javastruct
2218 implements,     C_JAVA, st_C_javastruct
2219 interface,      C_JAVA, st_C_struct
2220 class,          0,      st_C_class
2221 namespace,      C_PLPL, st_C_struct
2222 domain,         C_STAR, st_C_struct
2223 union,          0,      st_C_struct
2224 struct,         0,      st_C_struct
2225 extern,         0,      st_C_extern
2226 enum,           0,      st_C_enum
2227 typedef,        0,      st_C_typedef
2228 define,         0,      st_C_define
2229 operator,       C_PLPL, st_C_operator
2230 template,       0,      st_C_template
2231 bool,           C_PLPL, st_C_typespec
2232 long,           0,      st_C_typespec
2233 short,          0,      st_C_typespec
2234 int,            0,      st_C_typespec
2235 char,           0,      st_C_typespec
2236 float,          0,      st_C_typespec
2237 double,         0,      st_C_typespec
2238 signed,         0,      st_C_typespec
2239 unsigned,       0,      st_C_typespec
2240 auto,           0,      st_C_typespec
2241 void,           0,      st_C_typespec
2242 static,         0,      st_C_typespec
2243 const,          0,      st_C_typespec
2244 volatile,       0,      st_C_typespec
2245 explicit,       C_PLPL, st_C_typespec
2246 mutable,        C_PLPL, st_C_typespec
2247 typename,       C_PLPL, st_C_typespec
2248 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2249 DEFUN,          0,      st_C_gnumacro
2250 SYSCALL,        0,      st_C_gnumacro
2251 ENTRY,          0,      st_C_gnumacro
2252 PSEUDO,         0,      st_C_gnumacro
2253 # These are defined inside C functions, so currently they are not met.
2254 # EXFUN used in glibc, DEFVAR_* in emacs.
2255 #EXFUN,         0,      st_C_gnumacro
2256 #DEFVAR_,       0,      st_C_gnumacro
2257 %]
2258 and replace lines between %< and %> with its output,
2259 then make in_word_set and C_stab_entry static. */
2260 /*%<*/
2261 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
2262 /* Command-line: gperf -c -k 1,3 -o -p -r -t  */
2263 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2264
2265 #define TOTAL_KEYWORDS 47
2266 #define MIN_WORD_LENGTH 2
2267 #define MAX_WORD_LENGTH 15
2268 #define MIN_HASH_VALUE 18
2269 #define MAX_HASH_VALUE 138
2270 /* maximum key range = 121, duplicates = 0 */
2271
2272 #ifdef __GNUC__
2273 __inline
2274 #endif
2275 static unsigned int
2276 hash (str, len)
2277      register const char *str;
2278      register unsigned int len;
2279 {
2280   static unsigned char asso_values[] =
2281     {
2282       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2283       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2284       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2285       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2286       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2287       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2288       139, 139, 139, 139,  63, 139, 139, 139,  33,  44,
2289        62, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2290        42, 139, 139,  12,  32, 139, 139, 139, 139, 139,
2291       139, 139, 139, 139, 139, 139, 139,  34,  59,  37,
2292        24,  58,  33,   3, 139,  16, 139, 139,  42,  60,
2293        18,  11,  39, 139,  23,  57,   4,  63,   6,  20,
2294       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2295       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2296       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2297       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2298       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2299       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2300       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2301       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2302       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2303       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2304       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2305       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2306       139, 139, 139, 139, 139, 139, 139, 139, 139, 139,
2307       139, 139, 139, 139, 139, 139
2308     };
2309   register int hval = len;
2310
2311   switch (hval)
2312     {
2313       default:
2314       case 3:
2315         hval += asso_values[(unsigned char)str[2]];
2316       case 2:
2317       case 1:
2318         hval += asso_values[(unsigned char)str[0]];
2319         break;
2320     }
2321   return hval;
2322 }
2323
2324 #ifdef __GNUC__
2325 __inline
2326 #endif
2327 static struct C_stab_entry *
2328 in_word_set (str, len)
2329      register const char *str;
2330      register unsigned int len;
2331 {
2332   static struct C_stab_entry wordlist[] =
2333     {
2334       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2335       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2336       {"if",            0,      st_C_ignore},
2337       {""}, {""}, {""}, {""},
2338       {"int",           0,      st_C_typespec},
2339       {""}, {""},
2340       {"void",          0,      st_C_typespec},
2341       {""}, {""},
2342       {"interface",     C_JAVA, st_C_struct},
2343       {""},
2344       {"SYSCALL",       0,      st_C_gnumacro},
2345       {""},
2346       {"return",                0,      st_C_ignore},
2347       {""}, {""}, {""}, {""}, {""}, {""}, {""},
2348       {"while",         0,      st_C_ignore},
2349       {"auto",          0,      st_C_typespec},
2350       {""}, {""}, {""}, {""}, {""}, {""},
2351       {"float",         0,      st_C_typespec},
2352       {"typedef",       0,      st_C_typedef},
2353       {"typename",      C_PLPL, st_C_typespec},
2354       {""}, {""}, {""},
2355       {"friend",                C_PLPL, st_C_ignore},
2356       {"volatile",      0,      st_C_typespec},
2357       {""}, {""},
2358       {"for",           0,      st_C_ignore},
2359       {"const",         0,      st_C_typespec},
2360       {"import",                C_JAVA, st_C_ignore},
2361       {""},
2362       {"define",        0,      st_C_define},
2363       {"long",          0,      st_C_typespec},
2364       {"implements",    C_JAVA, st_C_javastruct},
2365       {"signed",        0,      st_C_typespec},
2366       {""},
2367       {"extern",        0,      st_C_extern},
2368       {"extends",       C_JAVA, st_C_javastruct},
2369       {""},
2370       {"mutable",       C_PLPL, st_C_typespec},
2371       {"template",      0,      st_C_template},
2372       {"short",         0,      st_C_typespec},
2373       {"bool",          C_PLPL, st_C_typespec},
2374       {"char",          0,      st_C_typespec},
2375       {"class",         0,      st_C_class},
2376       {"operator",      C_PLPL, st_C_operator},
2377       {""},
2378       {"switch",                0,      st_C_ignore},
2379       {""},
2380       {"ENTRY",         0,      st_C_gnumacro},
2381       {""},
2382       {"package",       C_JAVA, st_C_ignore},
2383       {"union",         0,      st_C_struct},
2384       {"@end",          0,      st_C_objend},
2385       {"struct",        0,      st_C_struct},
2386       {"namespace",     C_PLPL, st_C_struct},
2387       {""}, {""},
2388       {"domain",        C_STAR, st_C_struct},
2389       {"@interface",    0,      st_C_objprot},
2390       {"PSEUDO",                0,      st_C_gnumacro},
2391       {"double",        0,      st_C_typespec},
2392       {""},
2393       {"@protocol",     0,      st_C_objprot},
2394       {""},
2395       {"static",        0,      st_C_typespec},
2396       {""}, {""},
2397       {"DEFUN",         0,      st_C_gnumacro},
2398       {""}, {""}, {""}, {""},
2399       {"explicit",      C_PLPL, st_C_typespec},
2400       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2401       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2402       {""},
2403       {"enum",          0,      st_C_enum},
2404       {""}, {""},
2405       {"unsigned",      0,      st_C_typespec},
2406       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2407       {"@implementation",0,     st_C_objimpl}
2408     };
2409
2410   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2411     {
2412       register int key = hash (str, len);
2413
2414       if (key <= MAX_HASH_VALUE && key >= 0)
2415         {
2416           register const char *s = wordlist[key].name;
2417
2418           if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2419             return &wordlist[key];
2420         }
2421     }
2422   return 0;
2423 }
2424 /*%>*/
2425
2426 static enum sym_type
2427 C_symtype (str, len, c_ext)
2428      char *str;
2429      int len;
2430      int c_ext;
2431 {
2432   register struct C_stab_entry *se = in_word_set (str, len);
2433
2434   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2435     return st_none;
2436   return se->type;
2437 }
2438
2439 \f
2440 /*
2441  * C functions and variables are recognized using a simple
2442  * finite automaton.  fvdef is its state variable.
2443  */
2444 static enum
2445 {
2446   fvnone,                       /* nothing seen */
2447   fdefunkey,                    /* Emacs DEFUN keyword seen */
2448   fdefunname,                   /* Emacs DEFUN name seen */
2449   foperator,                    /* func: operator keyword seen (cplpl) */
2450   fvnameseen,                   /* function or variable name seen */
2451   fstartlist,                   /* func: just after open parenthesis */
2452   finlist,                      /* func: in parameter list */
2453   flistseen,                    /* func: after parameter list */
2454   fignore,                      /* func: before open brace */
2455   vignore                       /* var-like: ignore until ';' */
2456 } fvdef;
2457
2458 static bool fvextern;           /* func or var: extern keyword seen; */
2459
2460 /*
2461  * typedefs are recognized using a simple finite automaton.
2462  * typdef is its state variable.
2463  */
2464 static enum
2465 {
2466   tnone,                        /* nothing seen */
2467   tkeyseen,                     /* typedef keyword seen */
2468   ttypeseen,                    /* defined type seen */
2469   tinbody,                      /* inside typedef body */
2470   tend,                         /* just before typedef tag */
2471   tignore                       /* junk after typedef tag */
2472 } typdef;
2473
2474 /*
2475  * struct-like structures (enum, struct and union) are recognized
2476  * using another simple finite automaton.  `structdef' is its state
2477  * variable.
2478  */
2479 static enum
2480 {
2481   snone,                        /* nothing seen yet,
2482                                    or in struct body if cblev > 0 */
2483   skeyseen,                     /* struct-like keyword seen */
2484   stagseen,                     /* struct-like tag seen */
2485   sintemplate,                  /* inside template (ignore) */
2486   scolonseen                    /* colon seen after struct-like tag */
2487 } structdef;
2488
2489 /*
2490  * When objdef is different from onone, objtag is the name of the class.
2491  */
2492 static char *objtag = "<uninited>";
2493
2494 /*
2495  * Yet another little state machine to deal with preprocessor lines.
2496  */
2497 static enum
2498 {
2499   dnone,                        /* nothing seen */
2500   dsharpseen,                   /* '#' seen as first char on line */
2501   ddefineseen,                  /* '#' and 'define' seen */
2502   dignorerest                   /* ignore rest of line */
2503 } definedef;
2504
2505 /*
2506  * State machine for Objective C protocols and implementations.
2507  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2508  */
2509 static enum
2510 {
2511   onone,                        /* nothing seen */
2512   oprotocol,                    /* @interface or @protocol seen */
2513   oimplementation,              /* @implementations seen */
2514   otagseen,                     /* class name seen */
2515   oparenseen,                   /* parenthesis before category seen */
2516   ocatseen,                     /* category name seen */
2517   oinbody,                      /* in @implementation body */
2518   omethodsign,                  /* in @implementation body, after +/- */
2519   omethodtag,                   /* after method name */
2520   omethodcolon,                 /* after method colon */
2521   omethodparm,                  /* after method parameter */
2522   oignore                       /* wait for @end */
2523 } objdef;
2524
2525
2526 /*
2527  * Use this structure to keep info about the token read, and how it
2528  * should be tagged.  Used by the make_C_tag function to build a tag.
2529  */
2530 static struct tok
2531 {
2532   bool valid;
2533   bool named;
2534   int offset;
2535   int length;
2536   int lineno;
2537   long linepos;
2538   char *line;
2539 } token;                        /* latest token read */
2540 static linebuffer token_name;   /* its name */
2541
2542 /*
2543  * Variables and functions for dealing with nested structures.
2544  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2545  */
2546 static void pushclass_above __P((int, char *, int));
2547 static void popclass_above __P((int));
2548 static void write_classname __P((linebuffer *, char *qualifier));
2549
2550 static struct {
2551   char **cname;                 /* nested class names */
2552   int *cblev;                   /* nested class curly brace level */
2553   int nl;                       /* class nesting level (elements used) */
2554   int size;                     /* length of the array */
2555 } cstack;                       /* stack for nested declaration tags */
2556 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2557 #define nestlev         (cstack.nl)
2558 /* After struct keyword or in struct body, not inside an nested function. */
2559 #define instruct        (structdef == snone && nestlev > 0                      \
2560                          && cblev == cstack.cblev[nestlev-1] + 1)
2561
2562 static void
2563 pushclass_above (cblev, str, len)
2564      int cblev;
2565      char *str;
2566      int len;
2567 {
2568   int nl;
2569
2570   popclass_above (cblev);
2571   nl = cstack.nl;
2572   if (nl >= cstack.size)
2573     {
2574       int size = cstack.size *= 2;
2575       xrnew (cstack.cname, size, char *);
2576       xrnew (cstack.cblev, size, int);
2577     }
2578   assert (nl == 0 || cstack.cblev[nl-1] < cblev);
2579   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2580   cstack.cblev[nl] = cblev;
2581   cstack.nl = nl + 1;
2582 }
2583
2584 static void
2585 popclass_above (cblev)
2586      int cblev;
2587 {
2588   int nl;
2589
2590   for (nl = cstack.nl - 1;
2591        nl >= 0 && cstack.cblev[nl] >= cblev;
2592        nl--)
2593     {
2594       if (cstack.cname[nl] != NULL)
2595         free (cstack.cname[nl]);
2596       cstack.nl = nl;
2597     }
2598 }
2599
2600 static void
2601 write_classname (cn, qualifier)
2602      linebuffer *cn;
2603      char *qualifier;
2604 {
2605   int i, len;
2606   int qlen = strlen (qualifier);
2607
2608   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2609     {
2610       len = 0;
2611       cn->len = 0;
2612       cn->buffer[0] = '\0';
2613     }
2614   else
2615     {
2616       len = strlen (cstack.cname[0]);
2617       linebuffer_setlen (cn, len);
2618       strcpy (cn->buffer, cstack.cname[0]);
2619     }
2620   for (i = 1; i < cstack.nl; i++)
2621     {
2622       char *s;
2623       int slen;
2624
2625       s = cstack.cname[i];
2626       if (s == NULL)
2627         continue;
2628       slen = strlen (s);
2629       len += slen + qlen;
2630       linebuffer_setlen (cn, len);
2631       strncat (cn->buffer, qualifier, qlen);
2632       strncat (cn->buffer, s, slen);
2633     }
2634 }
2635
2636 \f
2637 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2638 static void make_C_tag __P((bool));
2639
2640 /*
2641  * consider_token ()
2642  *      checks to see if the current token is at the start of a
2643  *      function or variable, or corresponds to a typedef, or
2644  *      is a struct/union/enum tag, or #define, or an enum constant.
2645  *
2646  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2647  *      with args.  C_EXTP points to which language we are looking at.
2648  *
2649  * Globals
2650  *      fvdef                   IN OUT
2651  *      structdef               IN OUT
2652  *      definedef               IN OUT
2653  *      typdef                  IN OUT
2654  *      objdef                  IN OUT
2655  */
2656
2657 static bool
2658 consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
2659      register char *str;        /* IN: token pointer */
2660      register int len;          /* IN: token length */
2661      register int c;            /* IN: first char after the token */
2662      int *c_extp;               /* IN, OUT: C extensions mask */
2663      int cblev;                 /* IN: curly brace level */
2664      int parlev;                /* IN: parenthesis level */
2665      bool *is_func_or_var;      /* OUT: function or variable found */
2666 {
2667   /* When structdef is stagseen, scolonseen, or snone with cblev > 0,
2668      structtype is the type of the preceding struct-like keyword, and
2669      structcblev is the curly brace level where it has been seen. */
2670   static enum sym_type structtype;
2671   static int structcblev;
2672   static enum sym_type toktype;
2673
2674
2675   toktype = C_symtype (str, len, *c_extp);
2676
2677   /*
2678    * Advance the definedef state machine.
2679    */
2680   switch (definedef)
2681     {
2682     case dnone:
2683       /* We're not on a preprocessor line. */
2684       if (toktype == st_C_gnumacro)
2685         {
2686           fvdef = fdefunkey;
2687           return FALSE;
2688         }
2689       break;
2690     case dsharpseen:
2691       if (toktype == st_C_define)
2692         {
2693           definedef = ddefineseen;
2694         }
2695       else
2696         {
2697           definedef = dignorerest;
2698         }
2699       return FALSE;
2700     case ddefineseen:
2701       /*
2702        * Make a tag for any macro, unless it is a constant
2703        * and constantypedefs is FALSE.
2704        */
2705       definedef = dignorerest;
2706       *is_func_or_var = (c == '(');
2707       if (!*is_func_or_var && !constantypedefs)
2708         return FALSE;
2709       else
2710         return TRUE;
2711     case dignorerest:
2712       return FALSE;
2713     default:
2714       error ("internal error: definedef value.", (char *)NULL);
2715     }
2716
2717   /*
2718    * Now typedefs
2719    */
2720   switch (typdef)
2721     {
2722     case tnone:
2723       if (toktype == st_C_typedef)
2724         {
2725           if (typedefs)
2726             typdef = tkeyseen;
2727           fvextern = FALSE;
2728           fvdef = fvnone;
2729           return FALSE;
2730         }
2731       break;
2732     case tkeyseen:
2733       switch (toktype)
2734         {
2735         case st_none:
2736         case st_C_typespec:
2737         case st_C_class:
2738         case st_C_struct:
2739         case st_C_enum:
2740           typdef = ttypeseen;
2741           break;
2742         }
2743       break;
2744     case ttypeseen:
2745       if (structdef == snone && fvdef == fvnone)
2746         {
2747           fvdef = fvnameseen;
2748           return TRUE;
2749         }
2750       break;
2751     case tend:
2752       switch (toktype)
2753         {
2754         case st_C_typespec:
2755         case st_C_class:
2756         case st_C_struct:
2757         case st_C_enum:
2758           return FALSE;
2759         }
2760       return TRUE;
2761     }
2762
2763   /*
2764    * This structdef business is NOT invoked when we are ctags and the
2765    * file is plain C.  This is because a struct tag may have the same
2766    * name as another tag, and this loses with ctags.
2767    */
2768   switch (toktype)
2769     {
2770     case st_C_javastruct:
2771       if (structdef == stagseen)
2772         structdef = scolonseen;
2773       return FALSE;
2774     case st_C_template:
2775     case st_C_class:
2776       if (cblev == 0
2777           && (*c_extp & C_AUTO) /* automatic detection of C++ language */
2778           && definedef == dnone && structdef == snone
2779           && typdef == tnone && fvdef == fvnone)
2780         *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2781       if (toktype == st_C_template)
2782         break;
2783       /* FALLTHRU */
2784     case st_C_struct:
2785     case st_C_enum:
2786       if (parlev == 0
2787           && fvdef != vignore
2788           && (typdef == tkeyseen
2789               || (typedefs_or_cplusplus && structdef == snone)))
2790         {
2791           structdef = skeyseen;
2792           structtype = toktype;
2793           structcblev = cblev;
2794         }
2795       return FALSE;
2796     }
2797
2798   if (structdef == skeyseen)
2799     {
2800       structdef = stagseen;
2801       return TRUE;
2802     }
2803
2804   if (typdef != tnone)
2805     definedef = dnone;
2806
2807   /* Detect Objective C constructs. */
2808   switch (objdef)
2809     {
2810     case onone:
2811       switch (toktype)
2812         {
2813         case st_C_objprot:
2814           objdef = oprotocol;
2815           return FALSE;
2816         case st_C_objimpl:
2817           objdef = oimplementation;
2818           return FALSE;
2819         }
2820       break;
2821     case oimplementation:
2822       /* Save the class tag for functions or variables defined inside. */
2823       objtag = savenstr (str, len);
2824       objdef = oinbody;
2825       return FALSE;
2826     case oprotocol:
2827       /* Save the class tag for categories. */
2828       objtag = savenstr (str, len);
2829       objdef = otagseen;
2830       *is_func_or_var = TRUE;
2831       return TRUE;
2832     case oparenseen:
2833       objdef = ocatseen;
2834       *is_func_or_var = TRUE;
2835       return TRUE;
2836     case oinbody:
2837       break;
2838     case omethodsign:
2839       if (parlev == 0)
2840         {
2841           objdef = omethodtag;
2842           linebuffer_setlen (&token_name, len);
2843           strncpy (token_name.buffer, str, len);
2844           token_name.buffer[len] = '\0';
2845           return TRUE;
2846         }
2847       return FALSE;
2848     case omethodcolon:
2849       if (parlev == 0)
2850         objdef = omethodparm;
2851       return FALSE;
2852     case omethodparm:
2853       if (parlev == 0)
2854         {
2855           objdef = omethodtag;
2856           linebuffer_setlen (&token_name, token_name.len + len);
2857           strncat (token_name.buffer, str, len);
2858           return TRUE;
2859         }
2860       return FALSE;
2861     case oignore:
2862       if (toktype == st_C_objend)
2863         {
2864           /* Memory leakage here: the string pointed by objtag is
2865              never released, because many tests would be needed to
2866              avoid breaking on incorrect input code.  The amount of
2867              memory leaked here is the sum of the lengths of the
2868              class tags.
2869           free (objtag); */
2870           objdef = onone;
2871         }
2872       return FALSE;
2873     }
2874
2875   /* A function, variable or enum constant? */
2876   switch (toktype)
2877     {
2878     case st_C_extern:
2879       fvextern = TRUE;
2880       /* FALLTHRU */
2881     case st_C_typespec:
2882       if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2883         fvdef = fvnone;         /* should be useless */
2884       return FALSE;
2885     case st_C_ignore:
2886       fvextern = FALSE;
2887       fvdef = vignore;
2888       return FALSE;
2889     case st_C_operator:
2890       fvdef = foperator;
2891       *is_func_or_var = TRUE;
2892       return TRUE;
2893     case st_none:
2894       if (constantypedefs
2895           && structdef == snone
2896           && structtype == st_C_enum && cblev > structcblev)
2897         return TRUE;            /* enum constant */
2898       switch (fvdef)
2899         {
2900         case fdefunkey:
2901           if (cblev > 0)
2902             break;
2903           fvdef = fdefunname;   /* GNU macro */
2904           *is_func_or_var = TRUE;
2905           return TRUE;
2906         case fvnone:
2907           if ((strneq (str, "asm", 3) && endtoken (str[3]))
2908               || (strneq (str, "__asm__", 7) && endtoken (str[7])))
2909             {
2910               fvdef = vignore;
2911               return FALSE;
2912             }
2913           if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
2914             {
2915               fvdef = foperator;
2916               *is_func_or_var = TRUE;
2917               return TRUE;
2918             }
2919           if (cblev > 0 && !instruct)
2920             break;
2921           fvdef = fvnameseen;   /* function or variable */
2922           *is_func_or_var = TRUE;
2923           return TRUE;
2924         }
2925       break;
2926     }
2927
2928   return FALSE;
2929 }
2930
2931 \f
2932 /*
2933  * C_entries often keeps pointers to tokens or lines which are older than
2934  * the line currently read.  By keeping two line buffers, and switching
2935  * them at end of line, it is possible to use those pointers.
2936  */
2937 static struct
2938 {
2939   long linepos;
2940   linebuffer lb;
2941 } lbs[2];
2942
2943 #define current_lb_is_new (newndx == curndx)
2944 #define switch_line_buffers() (curndx = 1 - curndx)
2945
2946 #define curlb (lbs[curndx].lb)
2947 #define newlb (lbs[newndx].lb)
2948 #define curlinepos (lbs[curndx].linepos)
2949 #define newlinepos (lbs[newndx].linepos)
2950
2951 #define CNL_SAVE_DEFINEDEF()                                            \
2952 do {                                                                    \
2953   curlinepos = charno;                                                  \
2954   lineno++;                                                             \
2955   linecharno = charno;                                                  \
2956   charno += readline (&curlb, inf);                                     \
2957   lp = curlb.buffer;                                                    \
2958   quotednl = FALSE;                                                     \
2959   newndx = curndx;                                                      \
2960 } while (0)
2961
2962 #define CNL()                                                           \
2963 do {                                                                    \
2964   CNL_SAVE_DEFINEDEF();                                                 \
2965   if (savetoken.valid)                                                  \
2966     {                                                                   \
2967       token = savetoken;                                                \
2968       savetoken.valid = FALSE;                                          \
2969     }                                                                   \
2970   definedef = dnone;                                                    \
2971 } while (0)
2972
2973
2974 static void
2975 make_C_tag (isfun)
2976      bool isfun;
2977 {
2978   /* This function should never be called when token.valid is FALSE, but
2979      we must protect against invalid input or internal errors. */
2980   if (DEBUG || token.valid)
2981     {
2982       if (traditional_tag_style)
2983         {
2984           /* This was the original code.  Now we call new_pfnote instead,
2985              which uses the new method for naming tags (see new_pfnote). */
2986           char *name = NULL;
2987
2988           if (CTAGS || token.named)
2989             name = savestr (token_name.buffer);
2990           if (DEBUG && !token.valid)
2991             {
2992               if (token.named)
2993                 name = concat (name, "##invalid##", "");
2994               else
2995                 name = savestr ("##invalid##");
2996             }
2997           pfnote (name, isfun, token.line,
2998                   token.offset+token.length+1, token.lineno, token.linepos);
2999         }
3000       else
3001         new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
3002                     token.offset+token.length+1, token.lineno, token.linepos);
3003       token.valid = FALSE;
3004     }
3005 }
3006
3007
3008 /*
3009  * C_entries ()
3010  *      This routine finds functions, variables, typedefs,
3011  *      #define's, enum constants and struct/union/enum definitions in
3012  *      C syntax and adds them to the list.
3013  */
3014 static void
3015 C_entries (c_ext, inf)
3016      int c_ext;                 /* extension of C */
3017      FILE *inf;                 /* input file */
3018 {
3019   register char c;              /* latest char read; '\0' for end of line */
3020   register char *lp;            /* pointer one beyond the character `c' */
3021   int curndx, newndx;           /* indices for current and new lb */
3022   register int tokoff;          /* offset in line of start of current token */
3023   register int toklen;          /* length of current token */
3024   char *qualifier;              /* string used to qualify names */
3025   int qlen;                     /* length of qualifier */
3026   int cblev;                    /* current curly brace level */
3027   int parlev;                   /* current parenthesis level */
3028   int typdefcblev;              /* cblev where a typedef struct body begun */
3029   bool incomm, inquote, inchar, quotednl, midtoken;
3030   bool cplpl, cjava;
3031   bool yacc_rules;              /* in the rules part of a yacc file */
3032   struct tok savetoken;         /* token saved during preprocessor handling */
3033
3034
3035   initbuffer (&token_name);
3036   initbuffer (&lbs[0].lb);
3037   initbuffer (&lbs[1].lb);
3038   if (cstack.size == 0)
3039     {
3040       cstack.size = (DEBUG) ? 1 : 4;
3041       cstack.nl = 0;
3042       cstack.cname = xnew (cstack.size, char *);
3043       cstack.cblev = xnew (cstack.size, int);
3044     }
3045
3046   tokoff = toklen = typdefcblev = 0; /* keep compiler quiet */
3047   curndx = newndx = 0;
3048   lineno = 0;
3049   charno = 0;
3050   lp = curlb.buffer;
3051   *lp = 0;
3052
3053   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3054   structdef = snone; definedef = dnone; objdef = onone;
3055   yacc_rules = FALSE;
3056   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3057   token.valid = savetoken.valid = FALSE;
3058   cblev = 0;
3059   parlev = 0;
3060   cplpl = (c_ext & C_PLPL) == C_PLPL;
3061   cjava = (c_ext & C_JAVA) == C_JAVA;
3062   if (cjava)
3063     { qualifier = "."; qlen = 1; }
3064   else
3065     { qualifier = "::"; qlen = 2; }
3066
3067
3068   while (!feof (inf))
3069     {
3070       c = *lp++;
3071       if (c == '\\')
3072         {
3073           /* If we're at the end of the line, the next character is a
3074              '\0'; don't skip it, because it's the thing that tells us
3075              to read the next line.  */
3076           if (*lp == '\0')
3077             {
3078               quotednl = TRUE;
3079               continue;
3080             }
3081           lp++;
3082           c = ' ';
3083         }
3084       else if (incomm)
3085         {
3086           switch (c)
3087             {
3088             case '*':
3089               if (*lp == '/')
3090                 {
3091                   c = *lp++;
3092                   incomm = FALSE;
3093                 }
3094               break;
3095             case '\0':
3096               /* Newlines inside comments do not end macro definitions in
3097                  traditional cpp. */
3098               CNL_SAVE_DEFINEDEF ();
3099               break;
3100             }
3101           continue;
3102         }
3103       else if (inquote)
3104         {
3105           switch (c)
3106             {
3107             case '"':
3108               inquote = FALSE;
3109               break;
3110             case '\0':
3111               /* Newlines inside strings do not end macro definitions
3112                  in traditional cpp, even though compilers don't
3113                  usually accept them. */
3114               CNL_SAVE_DEFINEDEF ();
3115               break;
3116             }
3117           continue;
3118         }
3119       else if (inchar)
3120         {
3121           switch (c)
3122             {
3123             case '\0':
3124               /* Hmmm, something went wrong. */
3125               CNL ();
3126               /* FALLTHRU */
3127             case '\'':
3128               inchar = FALSE;
3129               break;
3130             }
3131           continue;
3132         }
3133       else
3134         switch (c)
3135           {
3136           case '"':
3137             inquote = TRUE;
3138             switch (fvdef)
3139               {
3140               case fdefunkey:
3141               case fstartlist:
3142               case finlist:
3143               case fignore:
3144               case vignore:
3145                 break;
3146               default:
3147                 fvextern = FALSE;
3148                 fvdef = fvnone;
3149               }
3150             continue;
3151           case '\'':
3152             inchar = TRUE;
3153             if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3154               {
3155                 fvextern = FALSE;
3156                 fvdef = fvnone;
3157               }
3158             continue;
3159           case '/':
3160             if (*lp == '*')
3161               {
3162                 lp++;
3163                 incomm = TRUE;
3164                 continue;
3165               }
3166             else if (/* cplpl && */ *lp == '/')
3167               {
3168                 c = '\0';
3169                 break;
3170               }
3171             else
3172               break;
3173           case '%':
3174             if ((c_ext & YACC) && *lp == '%')
3175               {
3176                 /* Entering or exiting rules section in yacc file. */
3177                 lp++;
3178                 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3179                 typdef = tnone; structdef = snone;
3180                 midtoken = inquote = inchar = incomm = quotednl = FALSE;
3181                 cblev = 0;
3182                 yacc_rules = !yacc_rules;
3183                 continue;
3184               }
3185             else
3186               break;
3187           case '#':
3188             if (definedef == dnone)
3189               {
3190                 char *cp;
3191                 bool cpptoken = TRUE;
3192
3193                 /* Look back on this line.  If all blanks, or nonblanks
3194                    followed by an end of comment, this is a preprocessor
3195                    token. */
3196                 for (cp = newlb.buffer; cp < lp-1; cp++)
3197                   if (!iswhite (*cp))
3198                     {
3199                       if (*cp == '*' && *(cp+1) == '/')
3200                         {
3201                           cp++;
3202                           cpptoken = TRUE;
3203                         }
3204                       else
3205                         cpptoken = FALSE;
3206                     }
3207                 if (cpptoken)
3208                   definedef = dsharpseen;
3209               } /* if (definedef == dnone) */
3210
3211             continue;
3212           } /* switch (c) */
3213
3214
3215       /* Consider token only if some involved conditions are satisfied. */
3216       if (typdef != tignore
3217           && definedef != dignorerest
3218           && fvdef != finlist
3219           && structdef != sintemplate
3220           && (definedef != dnone
3221               || structdef != scolonseen))
3222         {
3223           if (midtoken)
3224             {
3225               if (endtoken (c))
3226                 {
3227                   if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
3228                     {
3229                       /*
3230                        * This handles :: in the middle, but not at the
3231                        * beginning of an identifier.  Also, space-separated
3232                        * :: is not recognised.
3233                        */
3234                       lp += 2;
3235                       toklen += 2;
3236                       c = lp[-1];
3237                       goto still_in_token;
3238                     }
3239                   else
3240                     {
3241                       bool funorvar = FALSE;
3242
3243                       if (yacc_rules
3244                           || consider_token (newlb.buffer + tokoff, toklen, c,
3245                                              &c_ext, cblev, parlev, &funorvar))
3246                         {
3247                           if (fvdef == foperator)
3248                             {
3249                               char *oldlp = lp;
3250                               lp = skip_spaces (lp-1);
3251                               if (*lp != '\0')
3252                                 lp += 1;
3253                               while (*lp != '\0'
3254                                      && !iswhite (*lp) && *lp != '(')
3255                                 lp += 1;
3256                               c = *lp++;
3257                               toklen += lp - oldlp;
3258                             }
3259                           token.named = FALSE;
3260                           if ((c_ext & C_EXT)   /* not pure C */
3261                               && nestlev > 0 && definedef == dnone)
3262                             /* in struct body */
3263                             {
3264                               write_classname (&token_name, qualifier);
3265                               linebuffer_setlen (&token_name,
3266                                                  token_name.len+qlen+toklen);
3267                               strcat (token_name.buffer, qualifier);
3268                               strncat (token_name.buffer,
3269                                        newlb.buffer + tokoff, toklen);
3270                               token.named = TRUE;
3271                             }
3272                           else if (objdef == ocatseen)
3273                             /* Objective C category */
3274                             {
3275                               int len = strlen (objtag) + 2 + toklen;
3276                               linebuffer_setlen (&token_name, len);
3277                               strcpy (token_name.buffer, objtag);
3278                               strcat (token_name.buffer, "(");
3279                               strncat (token_name.buffer,
3280                                        newlb.buffer + tokoff, toklen);
3281                               strcat (token_name.buffer, ")");
3282                               token.named = TRUE;
3283                             }
3284                           else if (objdef == omethodtag
3285                                    || objdef == omethodparm)
3286                             /* Objective C method */
3287                             {
3288                               token.named = TRUE;
3289                             }
3290                           else if (fvdef == fdefunname)
3291                             /* GNU DEFUN and similar macros */
3292                             {
3293                               bool defun = (newlb.buffer[tokoff] == 'F');
3294                               int off = tokoff;
3295                               int len = toklen;
3296
3297                               /* Rewrite the tag so that emacs lisp DEFUNs
3298                                  can be found by their elisp name */
3299                               if (defun)
3300                                 {
3301                                   off += 1;
3302                                   len -= 1;
3303                                 }
3304                               len = toklen;
3305                               linebuffer_setlen (&token_name, len);
3306                               strncpy (token_name.buffer,
3307                                        newlb.buffer + off, len);
3308                               token_name.buffer[len] = '\0';
3309                               if (defun)
3310                                 while (--len >= 0)
3311                                   if (token_name.buffer[len] == '_')
3312                                     token_name.buffer[len] = '-';
3313                               token.named = defun;
3314                             }
3315                           else
3316                             {
3317                               linebuffer_setlen (&token_name, toklen);
3318                               strncpy (token_name.buffer,
3319                                        newlb.buffer + tokoff, toklen);
3320                               token_name.buffer[toklen] = '\0';
3321                               /* Name macros and members. */
3322                               token.named = (structdef == stagseen
3323                                              || typdef == ttypeseen
3324                                              || typdef == tend
3325                                              || (funorvar
3326                                                  && definedef == dignorerest)
3327                                              || (funorvar
3328                                                  && definedef == dnone
3329                                                  && structdef == snone
3330                                                  && cblev > 0));
3331                             }
3332                           token.lineno = lineno;
3333                           token.offset = tokoff;
3334                           token.length = toklen;
3335                           token.line = newlb.buffer;
3336                           token.linepos = newlinepos;
3337                           token.valid = TRUE;
3338
3339                           if (definedef == dnone
3340                               && (fvdef == fvnameseen
3341                                   || fvdef == foperator
3342                                   || structdef == stagseen
3343                                   || typdef == tend
3344                                   || typdef == ttypeseen
3345                                   || objdef != onone))
3346                             {
3347                               if (current_lb_is_new)
3348                                 switch_line_buffers ();
3349                             }
3350                           else if (definedef != dnone
3351                                    || fvdef == fdefunname
3352                                    || instruct)
3353                             make_C_tag (funorvar);
3354                         }
3355                       midtoken = FALSE;
3356                     }
3357                 } /* if (endtoken (c)) */
3358               else if (intoken (c))
3359                 still_in_token:
3360                 {
3361                   toklen++;
3362                   continue;
3363                 }
3364             } /* if (midtoken) */
3365           else if (begtoken (c))
3366             {
3367               switch (definedef)
3368                 {
3369                 case dnone:
3370                   switch (fvdef)
3371                     {
3372                     case fstartlist:
3373                       fvdef = finlist;
3374                       continue;
3375                     case flistseen:
3376                       make_C_tag (TRUE); /* a function */
3377                       fvdef = fignore;
3378                       break;
3379                     case fvnameseen:
3380                       fvdef = fvnone;
3381                       break;
3382                     }
3383                   if (structdef == stagseen && !cjava)
3384                     {
3385                       popclass_above (cblev);
3386                       structdef = snone;
3387                     }
3388                   break;
3389                 case dsharpseen:
3390                   savetoken = token;
3391                   break;
3392                 }
3393               if (!yacc_rules || lp == newlb.buffer + 1)
3394                 {
3395                   tokoff = lp - 1 - newlb.buffer;
3396                   toklen = 1;
3397                   midtoken = TRUE;
3398                 }
3399               continue;
3400             } /* if (begtoken) */
3401         } /* if must look at token */
3402
3403
3404       /* Detect end of line, colon, comma, semicolon and various braces
3405          after having handled a token.*/
3406       switch (c)
3407         {
3408         case ':':
3409           if (yacc_rules && token.offset == 0 && token.valid)
3410             {
3411               make_C_tag (FALSE); /* a yacc function */
3412               break;
3413             }
3414           if (definedef != dnone)
3415             break;
3416           switch (objdef)
3417             {
3418             case  otagseen:
3419               objdef = oignore;
3420               make_C_tag (TRUE); /* an Objective C class */
3421               break;
3422             case omethodtag:
3423             case omethodparm:
3424               objdef = omethodcolon;
3425               linebuffer_setlen (&token_name, token_name.len + 1);
3426               strcat (token_name.buffer, ":");
3427               break;
3428             }
3429           if (structdef == stagseen)
3430             structdef = scolonseen;
3431           break;
3432         case ';':
3433           if (definedef != dnone)
3434             break;
3435           switch (typdef)
3436             {
3437             case tend:
3438             case ttypeseen:
3439               make_C_tag (FALSE); /* a typedef */
3440               typdef = tnone;
3441               fvdef = fvnone;
3442               break;
3443             case tnone:
3444             case tinbody:
3445             case tignore:
3446               switch (fvdef)
3447                 {
3448                 case fignore:
3449                   if (typdef == tignore)
3450                     fvdef = fvnone;
3451                   break;
3452                 case fvnameseen:
3453                   if ((globals && cblev == 0 && (!fvextern || declarations))
3454                       || (members && instruct))
3455                     make_C_tag (FALSE); /* a variable */
3456                   fvextern = FALSE;
3457                   fvdef = fvnone;
3458                   token.valid = FALSE;
3459                   break;
3460                 case flistseen:
3461                   if ((declarations && typdef == tnone && !instruct)
3462                       || (members && typdef != tignore && instruct))
3463                     make_C_tag (TRUE);  /* a function declaration */
3464                   /* FALLTHRU */
3465                 default:
3466                   fvextern = FALSE;
3467                   fvdef = fvnone;
3468                   if (declarations
3469                       && structdef == stagseen && (c_ext & C_PLPL))
3470                     make_C_tag (FALSE); /* forward declaration */
3471                   else
3472                     /* The following instruction invalidates the token.
3473                        Probably the token should be invalidated in all other
3474                        cases where some state machine is reset prematurely. */
3475                     token.valid = FALSE;
3476                 } /* switch (fvdef) */
3477               /* FALLTHRU */
3478             default:
3479               if (!instruct)
3480                 typdef = tnone;
3481             }
3482           if (structdef == stagseen)
3483             structdef = snone;
3484           break;
3485         case ',':
3486           if (definedef != dnone)
3487             break;
3488           switch (objdef)
3489             {
3490             case omethodtag:
3491             case omethodparm:
3492               make_C_tag (TRUE); /* an Objective C method */
3493               objdef = oinbody;
3494               break;
3495             }
3496           switch (fvdef)
3497             {
3498             case fdefunkey:
3499             case foperator:
3500             case fstartlist:
3501             case finlist:
3502             case fignore:
3503             case vignore:
3504               break;
3505             case fdefunname:
3506               fvdef = fignore;
3507               break;
3508             case fvnameseen:    /* a variable */
3509               if ((globals && cblev == 0 && (!fvextern || declarations))
3510                   || (members && instruct))
3511                 make_C_tag (FALSE);
3512               break;
3513             case flistseen:     /* a function */
3514               if ((declarations && typdef == tnone && !instruct)
3515                   || (members && typdef != tignore && instruct))
3516                 {
3517                   make_C_tag (TRUE); /* a function declaration */
3518                   fvdef = fvnameseen;
3519                 }
3520               else if (!declarations)
3521                 fvdef = fvnone;
3522               token.valid = FALSE;
3523               break;
3524             default:
3525               fvdef = fvnone;
3526             }
3527           if (structdef == stagseen)
3528             structdef = snone;
3529           break;
3530         case '[':
3531           if (definedef != dnone)
3532             break;
3533           if (structdef == stagseen)
3534             structdef = snone;
3535           switch (typdef)
3536             {
3537             case ttypeseen:
3538             case tend:
3539               typdef = tignore;
3540               make_C_tag (FALSE);       /* a typedef */
3541               break;
3542             case tnone:
3543             case tinbody:
3544               switch (fvdef)
3545                 {
3546                 case foperator:
3547                 case finlist:
3548                 case fignore:
3549                 case vignore:
3550                   break;
3551                 case fvnameseen:
3552                   if ((members && cblev == 1)
3553                       || (globals && cblev == 0
3554                           && (!fvextern || declarations)))
3555                     make_C_tag (FALSE); /* a variable */
3556                   /* FALLTHRU */
3557                 default:
3558                   fvdef = fvnone;
3559                 }
3560               break;
3561             }
3562           break;
3563         case '(':
3564           if (definedef != dnone)
3565             break;
3566           if (objdef == otagseen && parlev == 0)
3567             objdef = oparenseen;
3568           switch (fvdef)
3569             {
3570             case fvnameseen:
3571               if (typdef == ttypeseen
3572                   && *lp != '*'
3573                   && !instruct)
3574                 {
3575                   /* This handles constructs like:
3576                      typedef void OperatorFun (int fun); */
3577                   make_C_tag (FALSE);
3578                   typdef = tignore;
3579                   fvdef = fignore;
3580                   break;
3581                 }
3582               /* FALLTHRU */
3583             case foperator:
3584               fvdef = fstartlist;
3585               break;
3586             case flistseen:
3587               fvdef = finlist;
3588               break;
3589             }
3590           parlev++;
3591           break;
3592         case ')':
3593           if (definedef != dnone)
3594             break;
3595           if (objdef == ocatseen && parlev == 1)
3596             {
3597               make_C_tag (TRUE); /* an Objective C category */
3598               objdef = oignore;
3599             }
3600           if (--parlev == 0)
3601             {
3602               switch (fvdef)
3603                 {
3604                 case fstartlist:
3605                 case finlist:
3606                   fvdef = flistseen;
3607                   break;
3608                 }
3609               if (!instruct
3610                   && (typdef == tend
3611                       || typdef == ttypeseen))
3612                 {
3613                   typdef = tignore;
3614                   make_C_tag (FALSE); /* a typedef */
3615                 }
3616             }
3617           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3618             parlev = 0;
3619           break;
3620         case '{':
3621           if (definedef != dnone)
3622             break;
3623           if (typdef == ttypeseen)
3624             {
3625               /* Whenever typdef is set to tinbody (currently only
3626                  here), typdefcblev should be set to cblev. */
3627               typdef = tinbody;
3628               typdefcblev = cblev;
3629             }
3630           switch (fvdef)
3631             {
3632             case flistseen:
3633               make_C_tag (TRUE);    /* a function */
3634               /* FALLTHRU */
3635             case fignore:
3636               fvdef = fvnone;
3637               break;
3638             case fvnone:
3639               switch (objdef)
3640                 {
3641                 case otagseen:
3642                   make_C_tag (TRUE); /* an Objective C class */
3643                   objdef = oignore;
3644                   break;
3645                 case omethodtag:
3646                 case omethodparm:
3647                   make_C_tag (TRUE); /* an Objective C method */
3648                   objdef = oinbody;
3649                   break;
3650                 default:
3651                   /* Neutralize `extern "C" {' grot. */
3652                   if (cblev == 0 && structdef == snone && nestlev == 0
3653                       && typdef == tnone)
3654                     cblev = -1;
3655                 }
3656               break;
3657             }
3658           switch (structdef)
3659             {
3660             case skeyseen:         /* unnamed struct */
3661               pushclass_above (cblev, NULL, 0);
3662               structdef = snone;
3663               break;
3664             case stagseen:         /* named struct or enum */
3665             case scolonseen:       /* a class */
3666               pushclass_above (cblev, token.line+token.offset, token.length);
3667               structdef = snone;
3668               make_C_tag (FALSE);  /* a struct or enum */
3669               break;
3670             }
3671           cblev++;
3672           break;
3673         case '*':
3674           if (definedef != dnone)
3675             break;
3676           if (fvdef == fstartlist)
3677             fvdef = fvnone;     /* avoid tagging `foo' in `foo (*bar()) ()' */
3678           break;
3679         case '}':
3680           if (definedef != dnone)
3681             break;
3682           if (!noindentypedefs && lp == newlb.buffer + 1)
3683             {
3684               cblev = 0;        /* reset curly brace level if first column */
3685               parlev = 0;       /* also reset paren level, just in case... */
3686             }
3687           else if (cblev > 0)
3688             cblev--;
3689           popclass_above (cblev);
3690           structdef = snone;
3691           /* Only if typdef == tinbody is typdefcblev significant. */
3692           if (typdef == tinbody && cblev <= typdefcblev)
3693             {
3694               assert (cblev == typdefcblev);
3695               typdef = tend;
3696             }
3697           break;
3698         case '=':
3699           if (definedef != dnone)
3700             break;
3701           switch (fvdef)
3702             {
3703             case foperator:
3704             case finlist:
3705             case fignore:
3706             case vignore:
3707               break;
3708             case fvnameseen:
3709               if ((members && cblev == 1)
3710                   || (globals && cblev == 0 && (!fvextern || declarations)))
3711                 make_C_tag (FALSE); /* a variable */
3712               /* FALLTHRU */
3713             default:
3714               fvdef = vignore;
3715             }
3716           break;
3717         case '<':
3718           if (cplpl && structdef == stagseen)
3719             {
3720               structdef = sintemplate;
3721               break;
3722             }
3723           goto resetfvdef;
3724         case '>':
3725           if (structdef == sintemplate)
3726             {
3727               structdef = stagseen;
3728               break;
3729             }
3730           goto resetfvdef;
3731         case '+':
3732         case '-':
3733           if (objdef == oinbody && cblev == 0)
3734             {
3735               objdef = omethodsign;
3736               break;
3737             }
3738           /* FALLTHRU */
3739         resetfvdef:
3740         case '#': case '~': case '&': case '%': case '/': case '|':
3741         case '^': case '!': case '.': case '?': case ']':
3742           if (definedef != dnone)
3743             break;
3744           /* These surely cannot follow a function tag in C. */
3745           switch (fvdef)
3746             {
3747             case foperator:
3748             case finlist:
3749             case fignore:
3750             case vignore:
3751               break;
3752             default:
3753               fvdef = fvnone;
3754             }
3755           break;
3756         case '\0':
3757           if (objdef == otagseen)
3758             {
3759               make_C_tag (TRUE); /* an Objective C class */
3760               objdef = oignore;
3761             }
3762           /* If a macro spans multiple lines don't reset its state. */
3763           if (quotednl)
3764             CNL_SAVE_DEFINEDEF ();
3765           else
3766             CNL ();
3767           break;
3768         } /* switch (c) */
3769
3770     } /* while not eof */
3771
3772   free (token_name.buffer);
3773   free (lbs[0].lb.buffer);
3774   free (lbs[1].lb.buffer);
3775 }
3776
3777 /*
3778  * Process either a C++ file or a C file depending on the setting
3779  * of a global flag.
3780  */
3781 static void
3782 default_C_entries (inf)
3783      FILE *inf;
3784 {
3785   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
3786 }
3787
3788 /* Always do plain C. */
3789 static void
3790 plain_C_entries (inf)
3791      FILE *inf;
3792 {
3793   C_entries (0, inf);
3794 }
3795
3796 /* Always do C++. */
3797 static void
3798 Cplusplus_entries (inf)
3799      FILE *inf;
3800 {
3801   C_entries (C_PLPL, inf);
3802 }
3803
3804 /* Always do Java. */
3805 static void
3806 Cjava_entries (inf)
3807      FILE *inf;
3808 {
3809   C_entries (C_JAVA, inf);
3810 }
3811
3812 /* Always do C*. */
3813 static void
3814 Cstar_entries (inf)
3815      FILE *inf;
3816 {
3817   C_entries (C_STAR, inf);
3818 }
3819
3820 /* Always do Yacc. */
3821 static void
3822 Yacc_entries (inf)
3823      FILE *inf;
3824 {
3825   C_entries (YACC, inf);
3826 }
3827
3828 \f
3829 /* Useful macros. */
3830 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3831   for (lineno = charno = 0;     /* loop initialization */               \
3832        !feof (file_pointer)     /* loop test */                         \
3833        && (lineno++,            /* instructions at start of loop */     \
3834            linecharno = charno,                                         \
3835            charno += readline (&line_buffer, file_pointer),             \
3836            char_pointer = lb.buffer,                                    \
3837            TRUE);                                                       \
3838       )
3839 #define LOOKING_AT(cp, keyword) /* keyword is a constant string */      \
3840   (strneq ((cp), keyword, sizeof(keyword)-1) /* cp points at keyword */ \
3841    && notinname ((cp)[sizeof(keyword)-1])       /* end of keyword */    \
3842    && ((cp) = skip_spaces((cp)+sizeof(keyword)-1))) /* skip spaces */
3843
3844 /*
3845  * Read a file, but do no processing.  This is used to do regexp
3846  * matching on files that have no language defined.
3847  */
3848 static void
3849 just_read_file (inf)
3850      FILE *inf;
3851 {
3852   register char *dummy;
3853
3854   LOOP_ON_INPUT_LINES (inf, lb, dummy)
3855     continue;
3856 }
3857
3858 \f
3859 /* Fortran parsing */
3860
3861 static void F_takeprec __P((void));
3862 static void F_getit __P((FILE *));
3863
3864 static void
3865 F_takeprec ()
3866 {
3867   dbp = skip_spaces (dbp);
3868   if (*dbp != '*')
3869     return;
3870   dbp++;
3871   dbp = skip_spaces (dbp);
3872   if (strneq (dbp, "(*)", 3))
3873     {
3874       dbp += 3;
3875       return;
3876     }
3877   if (!ISDIGIT (*dbp))
3878     {
3879       --dbp;                    /* force failure */
3880       return;
3881     }
3882   do
3883     dbp++;
3884   while (ISDIGIT (*dbp));
3885 }
3886
3887 static void
3888 F_getit (inf)
3889      FILE *inf;
3890 {
3891   register char *cp;
3892
3893   dbp = skip_spaces (dbp);
3894   if (*dbp == '\0')
3895     {
3896       lineno++;
3897       linecharno = charno;
3898       charno += readline (&lb, inf);
3899       dbp = lb.buffer;
3900       if (dbp[5] != '&')
3901         return;
3902       dbp += 6;
3903       dbp = skip_spaces (dbp);
3904     }
3905   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
3906     return;
3907   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3908     continue;
3909   pfnote (savenstr (dbp, cp-dbp), TRUE,
3910           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3911 }
3912
3913
3914 static void
3915 Fortran_functions (inf)
3916      FILE *inf;
3917 {
3918   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3919     {
3920       if (*dbp == '%')
3921         dbp++;                  /* Ratfor escape to fortran */
3922       dbp = skip_spaces (dbp);
3923       if (*dbp == '\0')
3924         continue;
3925       switch (lowcase (*dbp))
3926         {
3927         case 'i':
3928           if (nocase_tail ("integer"))
3929             F_takeprec ();
3930           break;
3931         case 'r':
3932           if (nocase_tail ("real"))
3933             F_takeprec ();
3934           break;
3935         case 'l':
3936           if (nocase_tail ("logical"))
3937             F_takeprec ();
3938           break;
3939         case 'c':
3940           if (nocase_tail ("complex") || nocase_tail ("character"))
3941             F_takeprec ();
3942           break;
3943         case 'd':
3944           if (nocase_tail ("double"))
3945             {
3946               dbp = skip_spaces (dbp);
3947               if (*dbp == '\0')
3948                 continue;
3949               if (nocase_tail ("precision"))
3950                 break;
3951               continue;
3952             }
3953           break;
3954         }
3955       dbp = skip_spaces (dbp);
3956       if (*dbp == '\0')
3957         continue;
3958       switch (lowcase (*dbp))
3959         {
3960         case 'f':
3961           if (nocase_tail ("function"))
3962             F_getit (inf);
3963           continue;
3964         case 's':
3965           if (nocase_tail ("subroutine"))
3966             F_getit (inf);
3967           continue;
3968         case 'e':
3969           if (nocase_tail ("entry"))
3970             F_getit (inf);
3971           continue;
3972         case 'b':
3973           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
3974             {
3975               dbp = skip_spaces (dbp);
3976               if (*dbp == '\0') /* assume un-named */
3977                 pfnote (savestr ("blockdata"), TRUE,
3978                         lb.buffer, dbp - lb.buffer, lineno, linecharno);
3979               else
3980                 F_getit (inf);  /* look for name */
3981             }
3982           continue;
3983         }
3984     }
3985 }
3986
3987 \f
3988 /*
3989  * Ada parsing
3990  * Original code by
3991  * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
3992  */
3993
3994 static void Ada_getit __P((FILE *, char *));
3995
3996 /* Once we are positioned after an "interesting" keyword, let's get
3997    the real tag value necessary. */
3998 static void
3999 Ada_getit (inf, name_qualifier)
4000      FILE *inf;
4001      char *name_qualifier;
4002 {
4003   register char *cp;
4004   char *name;
4005   char c;
4006
4007   while (!feof (inf))
4008     {
4009       dbp = skip_spaces (dbp);
4010       if (*dbp == '\0'
4011           || (dbp[0] == '-' && dbp[1] == '-'))
4012         {
4013           lineno++;
4014           linecharno = charno;
4015           charno += readline (&lb, inf);
4016           dbp = lb.buffer;
4017         }
4018       switch (lowcase(*dbp))
4019         {
4020         case 'b':
4021           if (nocase_tail ("body"))
4022             {
4023               /* Skipping body of   procedure body   or   package body or ....
4024                  resetting qualifier to body instead of spec. */
4025               name_qualifier = "/b";
4026               continue;
4027             }
4028           break;
4029         case 't':
4030           /* Skipping type of   task type   or   protected type ... */
4031           if (nocase_tail ("type"))
4032             continue;
4033           break;
4034         }
4035       if (*dbp == '"')
4036         {
4037           dbp += 1;
4038           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4039             continue;
4040         }
4041       else
4042         {
4043           dbp = skip_spaces (dbp);
4044           for (cp = dbp;
4045                (*cp != '\0'
4046                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4047                cp++)
4048             continue;
4049           if (cp == dbp)
4050             return;
4051         }
4052       c = *cp;
4053       *cp = '\0';
4054       name = concat (dbp, name_qualifier, "");
4055       *cp = c;
4056       pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4057       if (c == '"')
4058         dbp = cp + 1;
4059       return;
4060     }
4061 }
4062
4063 static void
4064 Ada_funcs (inf)
4065      FILE *inf;
4066 {
4067   bool inquote = FALSE;
4068
4069   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4070     {
4071       while (*dbp != '\0')
4072         {
4073           /* Skip a string i.e. "abcd". */
4074           if (inquote || (*dbp == '"'))
4075             {
4076               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4077               if (dbp != NULL)
4078                 {
4079                   inquote = FALSE;
4080                   dbp += 1;
4081                   continue;     /* advance char */
4082                 }
4083               else
4084                 {
4085                   inquote = TRUE;
4086                   break;        /* advance line */
4087                 }
4088             }
4089
4090           /* Skip comments. */
4091           if (dbp[0] == '-' && dbp[1] == '-')
4092             break;              /* advance line */
4093
4094           /* Skip character enclosed in single quote i.e. 'a'
4095              and skip single quote starting an attribute i.e. 'Image. */
4096           if (*dbp == '\'')
4097             {
4098               dbp++ ;
4099               if (*dbp != '\0')
4100                 dbp++;
4101               continue;
4102             }
4103
4104           /* Search for beginning of a token.  */
4105           if (!begtoken (*dbp))
4106             {
4107               dbp++;
4108               continue;         /* advance char */
4109             }
4110
4111           /* We are at the beginning of a token. */
4112           switch (lowcase(*dbp))
4113             {
4114             case 'f':
4115               if (!packages_only && nocase_tail ("function"))
4116                 Ada_getit (inf, "/f");
4117               else
4118                 break;          /* from switch */
4119               continue;         /* advance char */
4120             case 'p':
4121               if (!packages_only && nocase_tail ("procedure"))
4122                 Ada_getit (inf, "/p");
4123               else if (nocase_tail ("package"))
4124                 Ada_getit (inf, "/s");
4125               else if (nocase_tail ("protected")) /* protected type */
4126                 Ada_getit (inf, "/t");
4127               else
4128                 break;          /* from switch */
4129               continue;         /* advance char */
4130             case 't':
4131               if (!packages_only && nocase_tail ("task"))
4132                 Ada_getit (inf, "/k");
4133               else if (typedefs && !packages_only && nocase_tail ("type"))
4134                 {
4135                   Ada_getit (inf, "/t");
4136                   while (*dbp != '\0')
4137                     dbp += 1;
4138                 }
4139               else
4140                 break;          /* from switch */
4141               continue;         /* advance char */
4142             }
4143
4144           /* Look for the end of the token. */
4145           while (!endtoken (*dbp))
4146             dbp++;
4147
4148         } /* advance char */
4149     } /* advance line */
4150 }
4151
4152 \f
4153 /*
4154  * Unix and microcontroller assembly tag handling
4155  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4156  * Idea by Bob Weiner, Motorola Inc. (1994)
4157  */
4158 static void
4159 Asm_labels (inf)
4160      FILE *inf;
4161 {
4162   register char *cp;
4163
4164   LOOP_ON_INPUT_LINES (inf, lb, cp)
4165     {
4166       /* If first char is alphabetic or one of [_.$], test for colon
4167          following identifier. */
4168       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4169         {
4170           /* Read past label. */
4171           cp++;
4172           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4173             cp++;
4174           if (*cp == ':' || iswhite (*cp))
4175             {
4176               /* Found end of label, so copy it and add it to the table. */
4177               pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
4178                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4179             }
4180         }
4181     }
4182 }
4183
4184 \f
4185 /*
4186  * Perl support
4187  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4188  * Perl variable names: /^(my|local).../
4189  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4190  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4191  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4192  */
4193 static void
4194 Perl_functions (inf)
4195      FILE *inf;
4196 {
4197   char *package = savestr ("main"); /* current package name */
4198   register char *cp;
4199
4200   LOOP_ON_INPUT_LINES (inf, lb, cp)
4201     {
4202       skip_spaces(cp);
4203
4204       if (LOOKING_AT (cp, "package"))
4205         {
4206           free (package);
4207           package = get_tag (cp);
4208           if (package == NULL)  /* can't parse package name */
4209             package = savestr ("");
4210           else
4211             package = savestr(package); /* make a copy */
4212         }
4213       else if (LOOKING_AT (cp, "sub"))
4214         {
4215           char *name, *fullname, *pos;
4216           char *sp = cp;
4217
4218           while (!notinname (*cp))
4219             cp++;
4220           if (cp == sp)
4221             continue;
4222           name = savenstr (sp, cp-sp);
4223           if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
4224             fullname = name;
4225           else
4226             fullname = concat (package, "::", name);
4227           pfnote (fullname, TRUE,
4228                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4229           if (name != fullname)
4230             free (name);
4231         }
4232        else if (globals         /* only if tagging global vars is enabled */
4233                 && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
4234         {
4235           /* After "my" or "local", but before any following paren or space. */
4236           char *varname = NULL;
4237
4238           if (*cp == '$' || *cp == '@' || *cp == '%')
4239             {
4240               char* varstart = ++cp;
4241               while (ISALNUM (*cp) || *cp == '_')
4242                 cp++;
4243               varname = savenstr (varstart, cp-varstart);
4244             }
4245           else
4246             {
4247               /* Should be examining a variable list at this point;
4248                  could insist on seeing an open parenthesis. */
4249               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4250                 cp++;
4251             }
4252
4253           /* Perhaps I should back cp up one character, so the TAGS table
4254              doesn't mention (and so depend upon) the following char. */
4255           pfnote (varname, FALSE,
4256                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4257         }
4258     }
4259 }
4260
4261
4262 /*
4263  * Python support
4264  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4265  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4266  * More ideas by seb bacon <seb@jamkit.com> (2002)
4267  */
4268 static void
4269 Python_functions (inf)
4270      FILE *inf;
4271 {
4272   register char *cp;
4273
4274   LOOP_ON_INPUT_LINES (inf, lb, cp)
4275     {
4276       cp = skip_spaces (cp);
4277       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4278         {
4279           char *name = cp;
4280           while (!notinname (*cp) && *cp != ':')
4281             cp++;
4282           pfnote (savenstr (name, cp-name), TRUE,
4283                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4284         }
4285     }
4286 }
4287
4288 \f
4289 /*
4290  * PHP support
4291  * Look for:
4292  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4293  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4294  *  - /^[ \t]*define\(\"[^\"]+/
4295  * Only with --members:
4296  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4297  * Idea by Diez B. Roggisch (2001)
4298  */
4299 static void
4300 PHP_functions (inf)
4301      FILE *inf;
4302 {
4303   register char *cp, *name;
4304   bool search_identifier = FALSE;
4305
4306   LOOP_ON_INPUT_LINES (inf, lb, cp)
4307     {
4308       cp = skip_spaces (cp);
4309       name = cp;
4310       if (search_identifier
4311           && *cp != '\0')
4312         {
4313           while (!notinname (*cp))
4314             cp++;
4315           pfnote (savenstr (name, cp-name), TRUE,
4316                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4317           search_identifier = FALSE;
4318         }
4319       else if (LOOKING_AT (cp, "function"))
4320         {
4321           if(*cp == '&')
4322             cp = skip_spaces (cp+1);
4323           if(*cp != '\0')
4324             {
4325               name = cp;
4326               while (!notinname (*cp))
4327                 cp++;
4328               pfnote (savenstr (name, cp-name), TRUE,
4329                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4330             }
4331           else
4332             search_identifier = TRUE;
4333         }
4334       else if (LOOKING_AT (cp, "class"))
4335         {
4336           if (*cp != '\0')
4337             {
4338               name = cp;
4339               while (*cp != '\0' && !iswhite (*cp))
4340                 cp++;
4341               pfnote (savenstr (name, cp-name), FALSE,
4342                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4343             }
4344           else
4345             search_identifier = TRUE;
4346         }
4347       else if (strneq (cp, "define", 6)
4348                && (cp = skip_spaces (cp+6))
4349                && *cp++ == '('
4350                && (*cp == '"' || *cp == '\''))
4351         {
4352           char quote = *cp++;
4353           name = cp;
4354           while (*cp != quote && *cp != '\0')
4355             cp++;
4356           pfnote (savenstr (name, cp-name), FALSE,
4357                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4358         }
4359       else if (members
4360                && LOOKING_AT (cp, "var")
4361                && *cp == '$')
4362         {
4363           name = cp;
4364           while (!notinname(*cp))
4365             cp++;
4366           pfnote (savenstr (name, cp-name), FALSE,
4367                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4368         }
4369     }
4370 }
4371
4372 \f
4373 /*
4374  * Cobol tag functions
4375  * We could look for anything that could be a paragraph name.
4376  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4377  * Idea by Corny de Souza (1993)
4378  */
4379 static void
4380 Cobol_paragraphs (inf)
4381      FILE *inf;
4382 {
4383   register char *bp, *ep;
4384
4385   LOOP_ON_INPUT_LINES (inf, lb, bp)
4386     {
4387       if (lb.len < 9)
4388         continue;
4389       bp += 8;
4390
4391       /* If eoln, compiler option or comment ignore whole line. */
4392       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4393         continue;
4394
4395       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4396         continue;
4397       if (*ep++ == '.')
4398         pfnote (savenstr (bp, ep-bp), TRUE,
4399                 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4400     }
4401 }
4402
4403 \f
4404 /*
4405  * Makefile support
4406  * Idea by Assar Westerlund <assar@sics.se> (2001)
4407  */
4408 static void
4409 Makefile_targets (inf)
4410      FILE *inf;
4411 {
4412   register char *bp;
4413
4414   LOOP_ON_INPUT_LINES (inf, lb, bp)
4415     {
4416       if (*bp == '\t' || *bp == '#')
4417         continue;
4418       while (*bp != '\0' && *bp != '=' && *bp != ':')
4419         bp++;
4420       if (*bp == ':')
4421         pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
4422                 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4423     }
4424 }
4425
4426 \f
4427 /*
4428  * Pascal parsing
4429  * Original code by Mosur K. Mohan (1989)
4430  *
4431  *  Locates tags for procedures & functions.  Doesn't do any type- or
4432  *  var-definitions.  It does look for the keyword "extern" or
4433  *  "forward" immediately following the procedure statement; if found,
4434  *  the tag is skipped.
4435  */
4436 static void
4437 Pascal_functions (inf)
4438      FILE *inf;
4439 {
4440   linebuffer tline;             /* mostly copied from C_entries */
4441   long save_lcno;
4442   int save_lineno, save_len;
4443   char c, *cp, *namebuf;
4444
4445   bool                          /* each of these flags is TRUE iff: */
4446     incomment,                  /* point is inside a comment */
4447     inquote,                    /* point is inside '..' string */
4448     get_tagname,                /* point is after PROCEDURE/FUNCTION
4449                                    keyword, so next item = potential tag */
4450     found_tag,                  /* point is after a potential tag */
4451     inparms,                    /* point is within parameter-list */
4452     verify_tag;                 /* point has passed the parm-list, so the
4453                                    next token will determine whether this
4454                                    is a FORWARD/EXTERN to be ignored, or
4455                                    whether it is a real tag */
4456
4457   save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
4458   namebuf = NULL;               /* keep compiler quiet */
4459   lineno = 0;
4460   charno = 0;
4461   dbp = lb.buffer;
4462   *dbp = '\0';
4463   initbuffer (&tline);
4464
4465   incomment = inquote = FALSE;
4466   found_tag = FALSE;            /* have a proc name; check if extern */
4467   get_tagname = FALSE;          /* have found "procedure" keyword    */
4468   inparms = FALSE;              /* found '(' after "proc"            */
4469   verify_tag = FALSE;           /* check if "extern" is ahead        */
4470
4471
4472   while (!feof (inf))           /* long main loop to get next char */
4473     {
4474       c = *dbp++;
4475       if (c == '\0')            /* if end of line */
4476         {
4477           lineno++;
4478           linecharno = charno;
4479           charno += readline (&lb, inf);
4480           dbp = lb.buffer;
4481           if (*dbp == '\0')
4482             continue;
4483           if (!((found_tag && verify_tag)
4484                 || get_tagname))
4485             c = *dbp++;         /* only if don't need *dbp pointing
4486                                    to the beginning of the name of
4487                                    the procedure or function */
4488         }
4489       if (incomment)
4490         {
4491           if (c == '}')         /* within { } comments */
4492             incomment = FALSE;
4493           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4494             {
4495               dbp++;
4496               incomment = FALSE;
4497             }
4498           continue;
4499         }
4500       else if (inquote)
4501         {
4502           if (c == '\'')
4503             inquote = FALSE;
4504           continue;
4505         }
4506       else
4507         switch (c)
4508           {
4509           case '\'':
4510             inquote = TRUE;     /* found first quote */
4511             continue;
4512           case '{':             /* found open { comment */
4513             incomment = TRUE;
4514             continue;
4515           case '(':
4516             if (*dbp == '*')    /* found open (* comment */
4517               {
4518                 incomment = TRUE;
4519                 dbp++;
4520               }
4521             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4522               inparms = TRUE;
4523             continue;
4524           case ')':             /* end of parms list */
4525             if (inparms)
4526               inparms = FALSE;
4527             continue;
4528           case ';':
4529             if (found_tag && !inparms) /* end of proc or fn stmt */
4530               {
4531                 verify_tag = TRUE;
4532                 break;
4533               }
4534             continue;
4535           }
4536       if (found_tag && verify_tag && (*dbp != ' '))
4537         {
4538           /* check if this is an "extern" declaration */
4539           if (*dbp == '\0')
4540             continue;
4541           if (lowcase (*dbp == 'e'))
4542             {
4543               if (nocase_tail ("extern")) /* superfluous, really! */
4544                 {
4545                   found_tag = FALSE;
4546                   verify_tag = FALSE;
4547                 }
4548             }
4549           else if (lowcase (*dbp) == 'f')
4550             {
4551               if (nocase_tail ("forward")) /*  check for forward reference */
4552                 {
4553                   found_tag = FALSE;
4554                   verify_tag = FALSE;
4555                 }
4556             }
4557           if (found_tag && verify_tag) /* not external proc, so make tag */
4558             {
4559               found_tag = FALSE;
4560               verify_tag = FALSE;
4561               pfnote (namebuf, TRUE,
4562                       tline.buffer, save_len, save_lineno, save_lcno);
4563               continue;
4564             }
4565         }
4566       if (get_tagname)          /* grab name of proc or fn */
4567         {
4568           if (*dbp == '\0')
4569             continue;
4570
4571           /* save all values for later tagging */
4572           linebuffer_setlen (&tline, lb.len);
4573           strcpy (tline.buffer, lb.buffer);
4574           save_lineno = lineno;
4575           save_lcno = linecharno;
4576
4577           /* grab block name */
4578           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4579             continue;
4580           namebuf = savenstr (dbp, cp-dbp);
4581           dbp = cp;             /* set dbp to e-o-token */
4582           save_len = dbp - lb.buffer + 1;
4583           get_tagname = FALSE;
4584           found_tag = TRUE;
4585           continue;
4586
4587           /* and proceed to check for "extern" */
4588         }
4589       else if (!incomment && !inquote && !found_tag)
4590         {
4591           /* check for proc/fn keywords */
4592           switch (lowcase (c))
4593             {
4594             case 'p':
4595               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4596                 get_tagname = TRUE;
4597               continue;
4598             case 'f':
4599               if (nocase_tail ("unction"))
4600                 get_tagname = TRUE;
4601               continue;
4602             }
4603         }
4604     }                           /* while not eof */
4605
4606   free (tline.buffer);
4607 }
4608
4609 \f
4610 /*
4611  * Lisp tag functions
4612  *  look for (def or (DEF, quote or QUOTE
4613  */
4614
4615 static void L_getit __P((void));
4616
4617 static void
4618 L_getit ()
4619 {
4620   if (*dbp == '\'')             /* Skip prefix quote */
4621     dbp++;
4622   else if (*dbp == '(')
4623   {
4624     dbp++;
4625     /* Try to skip "(quote " */
4626     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4627       /* Ok, then skip "(" before name in (defstruct (foo)) */
4628       dbp = skip_spaces (dbp);
4629   }
4630   get_tag (dbp);
4631 }
4632
4633 static void
4634 Lisp_functions (inf)
4635      FILE *inf;
4636 {
4637   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4638     {
4639       if (dbp[0] != '(')
4640         continue;
4641
4642       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4643         {
4644           dbp = skip_non_spaces (dbp);
4645           dbp = skip_spaces (dbp);
4646           L_getit ();
4647         }
4648       else
4649         {
4650           /* Check for (foo::defmumble name-defined ... */
4651           do
4652             dbp++;
4653           while (!notinname (*dbp) && *dbp != ':');
4654           if (*dbp == ':')
4655             {
4656               do
4657                 dbp++;
4658               while (*dbp == ':');
4659
4660               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4661                 {
4662                   dbp = skip_non_spaces (dbp);
4663                   dbp = skip_spaces (dbp);
4664                   L_getit ();
4665                 }
4666             }
4667         }
4668     }
4669 }
4670
4671 \f
4672 /*
4673  * Postscript tag functions
4674  * Just look for lines where the first character is '/'
4675  * Also look at "defineps" for PSWrap
4676  * Ideas by:
4677  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4678  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4679  */
4680 static void
4681 Postscript_functions (inf)
4682      FILE *inf;
4683 {
4684   register char *bp, *ep;
4685
4686   LOOP_ON_INPUT_LINES (inf, lb, bp)
4687     {
4688       if (bp[0] == '/')
4689         {
4690           for (ep = bp+1;
4691                *ep != '\0' && *ep != ' ' && *ep != '{';
4692                ep++)
4693             continue;
4694           pfnote (savenstr (bp, ep-bp), TRUE,
4695                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4696         }
4697       else if (LOOKING_AT (bp, "defineps"))
4698         get_tag (bp);
4699     }
4700 }
4701
4702 \f
4703 /*
4704  * Scheme tag functions
4705  * look for (def... xyzzy
4706  *          (def... (xyzzy
4707  *          (def ... ((...(xyzzy ....
4708  *          (set! xyzzy
4709  * Original code by Ken Haase (1985?)
4710  */
4711
4712 static void
4713 Scheme_functions (inf)
4714      FILE *inf;
4715 {
4716   register char *bp;
4717
4718   LOOP_ON_INPUT_LINES (inf, lb, bp)
4719     {
4720       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
4721         {
4722           bp = skip_non_spaces (bp+4);
4723           /* Skip over open parens and white space */
4724           while (notinname (*bp))
4725             bp++;
4726           get_tag (bp);
4727         }
4728       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
4729         get_tag (bp);
4730     }
4731 }
4732
4733 \f
4734 /* Find tags in TeX and LaTeX input files.  */
4735
4736 /* TEX_toktab is a table of TeX control sequences that define tags.
4737  * Each entry records one such control sequence.
4738  *
4739  * Original code from who knows whom.
4740  * Ideas by:
4741  *   Stefan Monnier (2002)
4742  */
4743
4744 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
4745
4746 /* Default set of control sequences to put into TEX_toktab.
4747    The value of environment var TEXTAGS is prepended to this.  */
4748 static char *TEX_defenv = "\
4749 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4750 :part:appendix:entry:index:def\
4751 :newcommand:renewcommand:newenvironment:renewenvironment";
4752
4753 static void TEX_mode __P((FILE *));
4754 static void TEX_decode_env __P((char *, char *));
4755
4756 static char TEX_esc = '\\';
4757 static char TEX_opgrp = '{';
4758 static char TEX_clgrp = '}';
4759
4760 /*
4761  * TeX/LaTeX scanning loop.
4762  */
4763 static void
4764 TeX_commands (inf)
4765      FILE *inf;
4766 {
4767   char *cp;
4768   linebuffer *key;
4769
4770   /* Select either \ or ! as escape character.  */
4771   TEX_mode (inf);
4772
4773   /* Initialize token table once from environment. */
4774   if (TEX_toktab == NULL)
4775     TEX_decode_env ("TEXTAGS", TEX_defenv);
4776
4777   LOOP_ON_INPUT_LINES (inf, lb, cp)
4778     {
4779       /* Look at each TEX keyword in line. */
4780       for (;;)
4781         {
4782           /* Look for a TEX escape. */
4783           while (*cp++ != TEX_esc)
4784             if (cp[-1] == '\0' || cp[-1] == '%')
4785               goto tex_next_line;
4786
4787           for (key = TEX_toktab; key->buffer != NULL; key++)
4788             if (strneq (cp, key->buffer, key->len))
4789               {
4790                 register char *p;
4791                 char *name;
4792                 int linelen;
4793                 bool opgrp = FALSE;
4794
4795                 cp = skip_spaces (cp + key->len);
4796                 if (*cp == TEX_opgrp)
4797                   {
4798                     opgrp = TRUE;
4799                     cp++;
4800                   }
4801                 for (p = cp;
4802                      (!iswhite (*p) && *p != '#' &&
4803                       *p != TEX_opgrp && *p != TEX_clgrp);
4804                      p++)
4805                   continue;
4806                 name = savenstr (cp, p-cp);
4807                 linelen = lb.len;
4808                 if (!opgrp || *p == TEX_clgrp)
4809                   {
4810                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
4811                       *p++;
4812                     linelen = p - lb.buffer + 1;
4813                   }
4814                 pfnote (name, TRUE, lb.buffer, linelen, lineno, linecharno);
4815                 goto tex_next_line; /* We only tag a line once */
4816               }
4817         }
4818     tex_next_line:
4819       ;
4820     }
4821 }
4822
4823 #define TEX_LESC '\\'
4824 #define TEX_SESC '!'
4825
4826 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4827    chars accordingly. */
4828 static void
4829 TEX_mode (inf)
4830      FILE *inf;
4831 {
4832   int c;
4833
4834   while ((c = getc (inf)) != EOF)
4835     {
4836       /* Skip to next line if we hit the TeX comment char. */
4837       if (c == '%')
4838         while (c != '\n')
4839           c = getc (inf);
4840       else if (c == TEX_LESC || c == TEX_SESC )
4841         break;
4842     }
4843
4844   if (c == TEX_LESC)
4845     {
4846       TEX_esc = TEX_LESC;
4847       TEX_opgrp = '{';
4848       TEX_clgrp = '}';
4849     }
4850   else
4851     {
4852       TEX_esc = TEX_SESC;
4853       TEX_opgrp = '<';
4854       TEX_clgrp = '>';
4855     }
4856   /* If the input file is compressed, inf is a pipe, and rewind may fail.
4857      No attempt is made to correct the situation. */
4858   rewind (inf);
4859 }
4860
4861 /* Read environment and prepend it to the default string.
4862    Build token table. */
4863 static void
4864 TEX_decode_env (evarname, defenv)
4865      char *evarname;
4866      char *defenv;
4867 {
4868   register char *env, *p;
4869   int i, len;
4870
4871   /* Append default string to environment. */
4872   env = getenv (evarname);
4873   if (!env)
4874     env = defenv;
4875   else
4876     {
4877       char *oldenv = env;
4878       env = concat (oldenv, defenv, "");
4879     }
4880
4881   /* Allocate a token table */
4882   for (len = 1, p = env; p;)
4883     if ((p = etags_strchr (p, ':')) && *++p != '\0')
4884       len++;
4885   TEX_toktab = xnew (len, linebuffer);
4886
4887   /* Unpack environment string into token table. Be careful about */
4888   /* zero-length strings (leading ':', "::" and trailing ':') */
4889   for (i = 0; *env != '\0';)
4890     {
4891       p = etags_strchr (env, ':');
4892       if (!p)                   /* End of environment string. */
4893         p = env + strlen (env);
4894       if (p - env > 0)
4895         {                       /* Only non-zero strings. */
4896           TEX_toktab[i].buffer = savenstr (env, p - env);
4897           TEX_toktab[i].len = p - env;
4898           i++;
4899         }
4900       if (*p)
4901         env = p + 1;
4902       else
4903         {
4904           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
4905           TEX_toktab[i].len = 0;
4906           break;
4907         }
4908     }
4909 }
4910
4911 \f
4912 /* Texinfo support.  Dave Love, Mar. 2000.  */
4913 static void
4914 Texinfo_nodes (inf)
4915      FILE * inf;
4916 {
4917   char *cp, *start;
4918   LOOP_ON_INPUT_LINES (inf, lb, cp)
4919     if (LOOKING_AT (cp, "@node"))
4920       {
4921         start = cp;
4922         while (*cp != '\0' && *cp != ',')
4923           cp++;
4924         pfnote (savenstr (start, cp - start), TRUE,
4925                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4926       }
4927 }
4928
4929 \f
4930 /*
4931  * Prolog support
4932  *
4933  * Assumes that the predicate or rule starts at column 0.
4934  * Only the first clause of a predicate or rule is added.
4935  * Original code by Sunichirou Sugou (1989)
4936  * Rewritten by Anders Lindgren (1996)
4937  */
4938 static int prolog_pr __P((char *, char *));
4939 static void prolog_skip_comment __P((linebuffer *, FILE *));
4940 static int prolog_atom __P((char *, int));
4941
4942 static void
4943 Prolog_functions (inf)
4944      FILE *inf;
4945 {
4946   char *cp, *last;
4947   int len;
4948   int allocated;
4949
4950   allocated = 0;
4951   len = 0;
4952   last = NULL;
4953
4954   LOOP_ON_INPUT_LINES (inf, lb, cp)
4955     {
4956       if (cp[0] == '\0')        /* Empty line */
4957         continue;
4958       else if (iswhite (cp[0])) /* Not a predicate */
4959         continue;
4960       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
4961         prolog_skip_comment (&lb, inf);
4962       else if ((len = prolog_pr (cp, last)) > 0)
4963         {
4964           /* Predicate or rule.  Store the function name so that we
4965              only generate a tag for the first clause.  */
4966           if (last == NULL)
4967             last = xnew(len + 1, char);
4968           else if (len + 1 > allocated)
4969             xrnew (last, len + 1, char);
4970           allocated = len + 1;
4971           strncpy (last, cp, len);
4972           last[len] = '\0';
4973         }
4974     }
4975 }
4976
4977
4978 static void
4979 prolog_skip_comment (plb, inf)
4980      linebuffer *plb;
4981      FILE *inf;
4982 {
4983   char *cp;
4984
4985   do
4986     {
4987       for (cp = plb->buffer; *cp != '\0'; cp++)
4988         if (cp[0] == '*' && cp[1] == '/')
4989           return;
4990       lineno++;
4991       linecharno += readline (plb, inf);
4992     }
4993   while (!feof(inf));
4994 }
4995
4996 /*
4997  * A predicate or rule definition is added if it matches:
4998  *     <beginning of line><Prolog Atom><whitespace>(
4999  * or  <beginning of line><Prolog Atom><whitespace>:-
5000  *
5001  * It is added to the tags database if it doesn't match the
5002  * name of the previous clause header.
5003  *
5004  * Return the size of the name of the predicate or rule, or 0 if no
5005  * header was found.
5006  */
5007 static int
5008 prolog_pr (s, last)
5009      char *s;
5010      char *last;                /* Name of last clause. */
5011 {
5012   int pos;
5013   int len;
5014
5015   pos = prolog_atom (s, 0);
5016   if (pos < 1)
5017     return 0;
5018
5019   len = pos;
5020   pos = skip_spaces (s + pos) - s;
5021
5022   if ((s[pos] == '.'
5023        || (s[pos] == '(' && (pos += 1))
5024        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5025       && (last == NULL          /* save only the first clause */
5026           || len != strlen (last)
5027           || !strneq (s, last, len)))
5028         {
5029           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5030           return len;
5031         }
5032   else
5033     return 0;
5034 }
5035
5036 /*
5037  * Consume a Prolog atom.
5038  * Return the number of bytes consumed, or -1 if there was an error.
5039  *
5040  * A prolog atom, in this context, could be one of:
5041  * - An alphanumeric sequence, starting with a lower case letter.
5042  * - A quoted arbitrary string. Single quotes can escape themselves.
5043  *   Backslash quotes everything.
5044  */
5045 static int
5046 prolog_atom (s, pos)
5047      char *s;
5048      int pos;
5049 {
5050   int origpos;
5051
5052   origpos = pos;
5053
5054   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5055     {
5056       /* The atom is unquoted. */
5057       pos++;
5058       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5059         {
5060           pos++;
5061         }
5062       return pos - origpos;
5063     }
5064   else if (s[pos] == '\'')
5065     {
5066       pos++;
5067
5068       for (;;)
5069         {
5070           if (s[pos] == '\'')
5071             {
5072               pos++;
5073               if (s[pos] != '\'')
5074                 break;
5075               pos++;            /* A double quote */
5076             }
5077           else if (s[pos] == '\0')
5078             /* Multiline quoted atoms are ignored. */
5079             return -1;
5080           else if (s[pos] == '\\')
5081             {
5082               if (s[pos+1] == '\0')
5083                 return -1;
5084               pos += 2;
5085             }
5086           else
5087             pos++;
5088         }
5089       return pos - origpos;
5090     }
5091   else
5092     return -1;
5093 }
5094
5095 \f
5096 /*
5097  * Support for Erlang
5098  *
5099  * Generates tags for functions, defines, and records.
5100  * Assumes that Erlang functions start at column 0.
5101  * Original code by Anders Lindgren (1996)
5102  */
5103 static int erlang_func __P((char *, char *));
5104 static void erlang_attribute __P((char *));
5105 static int erlang_atom __P((char *, int));
5106
5107 static void
5108 Erlang_functions (inf)
5109      FILE *inf;
5110 {
5111   char *cp, *last;
5112   int len;
5113   int allocated;
5114
5115   allocated = 0;
5116   len = 0;
5117   last = NULL;
5118
5119   LOOP_ON_INPUT_LINES (inf, lb, cp)
5120     {
5121       if (cp[0] == '\0')        /* Empty line */
5122         continue;
5123       else if (iswhite (cp[0])) /* Not function nor attribute */
5124         continue;
5125       else if (cp[0] == '%')    /* comment */
5126         continue;
5127       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5128         continue;
5129       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5130         {
5131           erlang_attribute (cp);
5132           last = NULL;
5133         }
5134       else if ((len = erlang_func (cp, last)) > 0)
5135         {
5136           /*
5137            * Function.  Store the function name so that we only
5138            * generates a tag for the first clause.
5139            */
5140           if (last == NULL)
5141             last = xnew (len + 1, char);
5142           else if (len + 1 > allocated)
5143             xrnew (last, len + 1, char);
5144           allocated = len + 1;
5145           strncpy (last, cp, len);
5146           last[len] = '\0';
5147         }
5148     }
5149 }
5150
5151
5152 /*
5153  * A function definition is added if it matches:
5154  *     <beginning of line><Erlang Atom><whitespace>(
5155  *
5156  * It is added to the tags database if it doesn't match the
5157  * name of the previous clause header.
5158  *
5159  * Return the size of the name of the function, or 0 if no function
5160  * was found.
5161  */
5162 static int
5163 erlang_func (s, last)
5164      char *s;
5165      char *last;                /* Name of last clause. */
5166 {
5167   int pos;
5168   int len;
5169
5170   pos = erlang_atom (s, 0);
5171   if (pos < 1)
5172     return 0;
5173
5174   len = pos;
5175   pos = skip_spaces (s + pos) - s;
5176
5177   /* Save only the first clause. */
5178   if (s[pos++] == '('
5179       && (last == NULL
5180           || len != (int)strlen (last)
5181           || !strneq (s, last, len)))
5182         {
5183           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
5184           return len;
5185         }
5186
5187   return 0;
5188 }
5189
5190
5191 /*
5192  * Handle attributes.  Currently, tags are generated for defines
5193  * and records.
5194  *
5195  * They are on the form:
5196  * -define(foo, bar).
5197  * -define(Foo(M, N), M+N).
5198  * -record(graph, {vtab = notable, cyclic = true}).
5199  */
5200 static void
5201 erlang_attribute (s)
5202      char *s;
5203 {
5204   int pos;
5205   int len;
5206
5207   if (LOOKING_AT (s, "-define") || LOOKING_AT (s, "-record"))
5208     {
5209       if (s[pos++] == '(')
5210         {
5211           pos = skip_spaces (s + pos) - s;
5212           len = erlang_atom (s, pos);
5213           if (len != 0)
5214             pfnote (savenstr (& s[pos], len), TRUE,
5215                     s, pos + len, lineno, linecharno);
5216         }
5217     }
5218   return;
5219 }
5220
5221
5222 /*
5223  * Consume an Erlang atom (or variable).
5224  * Return the number of bytes consumed, or -1 if there was an error.
5225  */
5226 static int
5227 erlang_atom (s, pos)
5228      char *s;
5229      int pos;
5230 {
5231   int origpos;
5232
5233   origpos = pos;
5234
5235   if (ISALPHA (s[pos]) || s[pos] == '_')
5236     {
5237       /* The atom is unquoted. */
5238       pos++;
5239       while (ISALNUM (s[pos]) || s[pos] == '_')
5240         pos++;
5241       return pos - origpos;
5242     }
5243   else if (s[pos] == '\'')
5244     {
5245       pos++;
5246
5247       for (;;)
5248         {
5249           if (s[pos] == '\'')
5250             {
5251               pos++;
5252               break;
5253             }
5254           else if (s[pos] == '\0')
5255             /* Multiline quoted atoms are ignored. */
5256             return -1;
5257           else if (s[pos] == '\\')
5258             {
5259               if (s[pos+1] == '\0')
5260                 return -1;
5261               pos += 2;
5262             }
5263           else
5264             pos++;
5265         }
5266       return pos - origpos;
5267     }
5268   else
5269     return -1;
5270 }
5271
5272 \f
5273 #ifdef ETAGS_REGEXPS
5274
5275 static char *scan_separators __P((char *));
5276 static void analyse_regex __P((char *, bool));
5277 static void add_regex __P((char *, bool, language *));
5278 static char *substitute __P((char *, char *, struct re_registers *));
5279
5280 /* Take a string like "/blah/" and turn it into "blah", making sure
5281    that the first and last characters are the same, and handling
5282    quoted separator characters.  Actually, stops on the occurrence of
5283    an unquoted separator.  Also turns "\t" into a Tab character.
5284    Returns pointer to terminating separator.  Works in place.  Null
5285    terminates name string. */
5286 static char *
5287 scan_separators (name)
5288      char *name;
5289 {
5290   char sep = name[0];
5291   char *copyto = name;
5292   bool quoted = FALSE;
5293
5294   for (++name; *name != '\0'; ++name)
5295     {
5296       if (quoted)
5297         {
5298           if (*name == 't')
5299             *copyto++ = '\t';
5300           else if (*name == sep)
5301             *copyto++ = sep;
5302           else
5303             {
5304               /* Something else is quoted, so preserve the quote. */
5305               *copyto++ = '\\';
5306               *copyto++ = *name;
5307             }
5308           quoted = FALSE;
5309         }
5310       else if (*name == '\\')
5311         quoted = TRUE;
5312       else if (*name == sep)
5313         break;
5314       else
5315         *copyto++ = *name;
5316     }
5317
5318   /* Terminate copied string. */
5319   *copyto = '\0';
5320   return name;
5321 }
5322
5323 /* Look at the argument of --regex or --no-regex and do the right
5324    thing.  Same for each line of a regexp file. */
5325 static void
5326 analyse_regex (regex_arg, ignore_case)
5327      char *regex_arg;
5328      bool ignore_case;
5329 {
5330   if (regex_arg == NULL)
5331     {
5332       free_patterns ();         /* --no-regex: remove existing regexps */
5333       return;
5334     }
5335
5336   /* A real --regexp option or a line in a regexp file. */
5337   switch (regex_arg[0])
5338     {
5339       /* Comments in regexp file or null arg to --regex. */
5340     case '\0':
5341     case ' ':
5342     case '\t':
5343       break;
5344
5345       /* Read a regex file.  This is recursive and may result in a
5346          loop, which will stop when the file descriptors are exhausted. */
5347     case '@':
5348       {
5349         FILE *regexfp;
5350         linebuffer regexbuf;
5351         char *regexfile = regex_arg + 1;
5352
5353         /* regexfile is a file containing regexps, one per line. */
5354         regexfp = fopen (regexfile, "r");
5355         if (regexfp == NULL)
5356           {
5357             pfatal (regexfile);
5358             return;
5359           }
5360         initbuffer (&regexbuf);
5361         while (readline_internal (&regexbuf, regexfp) > 0)
5362           analyse_regex (regexbuf.buffer, ignore_case);
5363         free (regexbuf.buffer);
5364         fclose (regexfp);
5365       }
5366       break;
5367
5368       /* Regexp to be used for a specific language only. */
5369     case '{':
5370       {
5371         language *lang;
5372         char *lang_name = regex_arg + 1;
5373         char *cp;
5374
5375         for (cp = lang_name; *cp != '}'; cp++)
5376           if (*cp == '\0')
5377             {
5378               error ("unterminated language name in regex: %s", regex_arg);
5379               return;
5380             }
5381         *cp = '\0';
5382         lang = get_language_from_langname (lang_name);
5383         if (lang == NULL)
5384           return;
5385         add_regex (cp + 1, ignore_case, lang);
5386       }
5387       break;
5388
5389       /* Regexp to be used for any language. */
5390     default:
5391       add_regex (regex_arg, ignore_case, NULL);
5392       break;
5393     }
5394 }
5395
5396 /* Turn a name, which is an ed-style (but Emacs syntax) regular
5397    expression, into a real regular expression by compiling it. */
5398 static void
5399 add_regex (regexp_pattern, ignore_case, lang)
5400      char *regexp_pattern;
5401      bool ignore_case;
5402      language *lang;
5403 {
5404   static struct re_pattern_buffer zeropattern;
5405   char *name;
5406   const char *err;
5407   struct re_pattern_buffer *patbuf;
5408   pattern *pp;
5409
5410
5411   if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
5412     {
5413       error ("%s: unterminated regexp", regexp_pattern);
5414       return;
5415     }
5416   name = scan_separators (regexp_pattern);
5417   if (regexp_pattern[0] == '\0')
5418     {
5419       error ("null regexp", (char *)NULL);
5420       return;
5421     }
5422   (void) scan_separators (name);
5423
5424   patbuf = xnew (1, struct re_pattern_buffer);
5425   *patbuf = zeropattern;
5426   if (ignore_case)
5427     patbuf->translate = lc_trans;       /* translation table to fold case  */
5428
5429   err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
5430   if (err != NULL)
5431     {
5432       error ("%s while compiling pattern", err);
5433       return;
5434     }
5435
5436   pp = p_head;
5437   p_head = xnew (1, pattern);
5438   p_head->regex = savestr (regexp_pattern);
5439   p_head->p_next = pp;
5440   p_head->lang = lang;
5441   p_head->pat = patbuf;
5442   p_head->name_pattern = savestr (name);
5443   p_head->error_signaled = FALSE;
5444   p_head->ignore_case = ignore_case;
5445 }
5446
5447 /*
5448  * Do the substitutions indicated by the regular expression and
5449  * arguments.
5450  */
5451 static char *
5452 substitute (in, out, regs)
5453      char *in, *out;
5454      struct re_registers *regs;
5455 {
5456   char *result, *t;
5457   int size, dig, diglen;
5458
5459   result = NULL;
5460   size = strlen (out);
5461
5462   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5463   if (out[size - 1] == '\\')
5464     fatal ("pattern error in \"%s\"", out);
5465   for (t = etags_strchr (out, '\\');
5466        t != NULL;
5467        t = etags_strchr (t + 2, '\\'))
5468     if (ISDIGIT (t[1]))
5469       {
5470         dig = t[1] - '0';
5471         diglen = regs->end[dig] - regs->start[dig];
5472         size += diglen - 2;
5473       }
5474     else
5475       size -= 1;
5476
5477   /* Allocate space and do the substitutions. */
5478   result = xnew (size + 1, char);
5479
5480   for (t = result; *out != '\0'; out++)
5481     if (*out == '\\' && ISDIGIT (*++out))
5482       {
5483         dig = *out - '0';
5484         diglen = regs->end[dig] - regs->start[dig];
5485         strncpy (t, in + regs->start[dig], diglen);
5486         t += diglen;
5487       }
5488     else
5489       *t++ = *out;
5490   *t = '\0';
5491
5492   assert (t <= result + size && t - result == (int)strlen (result));
5493
5494   return result;
5495 }
5496
5497 /* Deallocate all patterns. */
5498 static void
5499 free_patterns ()
5500 {
5501   pattern *pp;
5502   while (p_head != NULL)
5503     {
5504       pp = p_head->p_next;
5505       free (p_head->regex);
5506       free (p_head->name_pattern);
5507       free (p_head);
5508       p_head = pp;
5509     }
5510   return;
5511 }
5512 #endif /* ETAGS_REGEXPS */
5513
5514 \f
5515 static bool
5516 nocase_tail (cp)
5517      char *cp;
5518 {
5519   register int len = 0;
5520
5521   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
5522     cp++, len++;
5523   if (*cp == '\0' && !intoken (dbp[len]))
5524     {
5525       dbp += len;
5526       return TRUE;
5527     }
5528   return FALSE;
5529 }
5530
5531 static char *
5532 get_tag (bp)
5533      register char *bp;
5534 {
5535   register char *cp, *name;
5536
5537   if (*bp == '\0')
5538     return NULL;
5539   /* Go till you get to white space or a syntactic break */
5540   for (cp = bp + 1; !notinname (*cp); cp++)
5541     continue;
5542   name = savenstr (bp, cp-bp);
5543   pfnote (name, TRUE,
5544           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5545   return name;
5546 }
5547
5548 /* Initialize a linebuffer for use */
5549 static void
5550 initbuffer (lbp)
5551      linebuffer *lbp;
5552 {
5553   lbp->size = (DEBUG) ? 3 : 200;
5554   lbp->buffer = xnew (lbp->size, char);
5555   lbp->buffer[0] = '\0';
5556   lbp->len = 0;
5557 }
5558
5559 /*
5560  * Read a line of text from `stream' into `lbp', excluding the
5561  * newline or CR-NL, if any.  Return the number of characters read from
5562  * `stream', which is the length of the line including the newline.
5563  *
5564  * On DOS or Windows we do not count the CR character, if any, before the
5565  * NL, in the returned length; this mirrors the behavior of emacs on those
5566  * platforms (for text files, it translates CR-NL to NL as it reads in the
5567  * file).
5568  */
5569 static long
5570 readline_internal (lbp, stream)
5571      linebuffer *lbp;
5572      register FILE *stream;
5573 {
5574   char *buffer = lbp->buffer;
5575   register char *p = lbp->buffer;
5576   register char *pend;
5577   int chars_deleted;
5578
5579   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
5580
5581   for (;;)
5582     {
5583       register int c = getc (stream);
5584       if (p == pend)
5585         {
5586           /* We're at the end of linebuffer: expand it. */
5587           lbp->size *= 2;
5588           xrnew (buffer, lbp->size, char);
5589           p += buffer - lbp->buffer;
5590           pend = buffer + lbp->size;
5591           lbp->buffer = buffer;
5592         }
5593       if (c == EOF)
5594         {
5595           *p = '\0';
5596           chars_deleted = 0;
5597           break;
5598         }
5599       if (c == '\n')
5600         {
5601           if (p > buffer && p[-1] == '\r')
5602             {
5603               p -= 1;
5604 #ifdef DOS_NT
5605              /* Assume CRLF->LF translation will be performed by Emacs
5606                 when loading this file, so CRs won't appear in the buffer.
5607                 It would be cleaner to compensate within Emacs;
5608                 however, Emacs does not know how many CRs were deleted
5609                 before any given point in the file.  */
5610               chars_deleted = 1;
5611 #else
5612               chars_deleted = 2;
5613 #endif
5614             }
5615           else
5616             {
5617               chars_deleted = 1;
5618             }
5619           *p = '\0';
5620           break;
5621         }
5622       *p++ = c;
5623     }
5624   lbp->len = p - buffer;
5625
5626   return lbp->len + chars_deleted;
5627 }
5628
5629 /*
5630  * Like readline_internal, above, but in addition try to match the
5631  * input line against relevant regular expressions.
5632  */
5633 static long
5634 readline (lbp, stream)
5635      linebuffer *lbp;
5636      FILE *stream;
5637 {
5638   /* Read new line. */
5639   long result = readline_internal (lbp, stream);
5640
5641   /* Honour #line directives. */
5642   if (!no_line_directive)
5643     {
5644       static bool discard_until_line_directive;
5645
5646       /* Check whether this is a #line directive. */
5647       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
5648         {
5649           int start, lno;
5650
5651           if (DEBUG) start = 0; /* shut up the compiler */
5652           if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
5653             {
5654               char *endp = lbp->buffer + start;
5655
5656               assert (start > 0);
5657               while ((endp = etags_strchr (endp, '"')) != NULL
5658                      && endp[-1] == '\\')
5659                 endp++;
5660               if (endp != NULL)
5661                 /* Ok, this is a real #line directive.  Let's deal with it. */
5662                 {
5663                   char *taggedabsname;  /* absolute name of original file */
5664                   char *taggedfname;    /* name of original file as given */
5665                   char *name;           /* temp var */
5666
5667                   discard_until_line_directive = FALSE; /* found it */
5668                   name = lbp->buffer + start;
5669                   *endp = '\0';
5670                   canonicalize_filename (name); /* for DOS */
5671                   taggedabsname = absolute_filename (name, curfdp->infabsdir);
5672                   if (filename_is_absolute (name)
5673                       || filename_is_absolute (curfdp->infname))
5674                     taggedfname = savestr (taggedabsname);
5675                   else
5676                     taggedfname = relative_filename (taggedabsname,tagfiledir);
5677
5678                   if (streq (curfdp->taggedfname, taggedfname))
5679                     /* The #line directive is only a line number change.  We
5680                        deal with this afterwards. */
5681                     free (taggedfname);
5682                   else
5683                     /* The tags following this #line directive should be
5684                        attributed to taggedfname.  In order to do this, set
5685                        curfdp accordingly. */
5686                     {
5687                       fdesc *fdp; /* file description pointer */
5688
5689                       /* Go look for a file description already set up for the
5690                          file indicated in the #line directive.  If there is
5691                          one, use it from now until the next #line
5692                          directive. */
5693                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5694                         if (streq (fdp->infname, curfdp->infname)
5695                             && streq (fdp->taggedfname, taggedfname))
5696                           /* If we remove the second test above (after the &&)
5697                              then all entries pertaining to the same file are
5698                              coalesced in the tags file.  If we use it, then
5699                              entries pertaining to the same file but generated
5700                              from different files (via #line directives) will
5701                              go into separate sections in the tags file.  These
5702                              alternatives look equivalent.  The first one
5703                              destroys some apparently useless information. */
5704                           {
5705                             curfdp = fdp;
5706                             free (taggedfname);
5707                             break;
5708                           }
5709                       /* Else, if we already tagged the real file, skip all
5710                          input lines until the next #line directive. */
5711                       if (fdp == NULL) /* not found */
5712                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
5713                           if (streq (fdp->infabsname, taggedabsname))
5714                             {
5715                               discard_until_line_directive = TRUE;
5716                               free (taggedfname);
5717                               break;
5718                             }
5719                       /* Else create a new file description and use that from
5720                          now on, until the next #line directive. */
5721                       if (fdp == NULL) /* not found */
5722                         {
5723                           fdp = fdhead;
5724                           fdhead = xnew (1, fdesc);
5725                           *fdhead = *curfdp; /* copy curr. file description */
5726                           fdhead->next = fdp;
5727                           fdhead->infname = savestr (curfdp->infname);
5728                           fdhead->infabsname = savestr (curfdp->infabsname);
5729                           fdhead->infabsdir = savestr (curfdp->infabsdir);
5730                           fdhead->taggedfname = taggedfname;
5731                           fdhead->usecharno = FALSE;
5732                           curfdp = fdhead;
5733                         }
5734                     }
5735                   free (taggedabsname);
5736                   lineno = lno;
5737                   return readline (lbp, stream);
5738                 } /* if a real #line directive */
5739             } /* if #line is followed by a a number */
5740         } /* if line begins with "#line " */
5741
5742       /* If we are here, no #line directive was found. */
5743       if (discard_until_line_directive)
5744         {
5745           if (result > 0)
5746             /* Do a tail recursion on ourselves, thus discarding the contents
5747                of the line buffer. */
5748             return readline (lbp, stream);
5749           /* End of file. */
5750           discard_until_line_directive = FALSE;
5751           return 0;
5752         }
5753     } /* if #line directives should be considered */
5754
5755 #ifdef ETAGS_REGEXPS
5756   {
5757     int match;
5758     pattern *pp;
5759
5760     /* Match against relevant patterns. */
5761     if (lbp->len > 0)
5762       for (pp = p_head; pp != NULL; pp = pp->p_next)
5763         {
5764           /* Only use generic regexps or those for the current language. */
5765           if (pp->lang != NULL && pp->lang != fdhead->lang)
5766             continue;
5767
5768           match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
5769           switch (match)
5770             {
5771             case -2:
5772               /* Some error. */
5773               if (!pp->error_signaled)
5774                 {
5775                   error ("error while matching \"%s\"", pp->regex);
5776                   pp->error_signaled = TRUE;
5777                 }
5778               break;
5779             case -1:
5780               /* No match. */
5781               break;
5782             default:
5783               /* Match occurred.  Construct a tag. */
5784               if (pp->name_pattern[0] != '\0')
5785                 {
5786                   /* Make a named tag. */
5787                   char *name = substitute (lbp->buffer,
5788                                            pp->name_pattern, &pp->regs);
5789                   if (name != NULL)
5790                     pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5791                 }
5792               else
5793                 {
5794                   /* Make an unnamed tag. */
5795                   pfnote ((char *)NULL, TRUE,
5796                           lbp->buffer, match, lineno, linecharno);
5797                 }
5798               break;
5799             }
5800         }
5801   }
5802 #endif /* ETAGS_REGEXPS */
5803
5804   return result;
5805 }
5806
5807 \f
5808 /*
5809  * Return a pointer to a space of size strlen(cp)+1 allocated
5810  * with xnew where the string CP has been copied.
5811  */
5812 static char *
5813 savestr (cp)
5814      char *cp;
5815 {
5816   return savenstr (cp, strlen (cp));
5817 }
5818
5819 /*
5820  * Return a pointer to a space of size LEN+1 allocated with xnew where
5821  * the string CP has been copied for at most the first LEN characters.
5822  */
5823 static char *
5824 savenstr (cp, len)
5825      char *cp;
5826      int len;
5827 {
5828   register char *dp;
5829
5830   dp = xnew (len + 1, char);
5831   strncpy (dp, cp, len);
5832   dp[len] = '\0';
5833   return dp;
5834 }
5835
5836 /*
5837  * Return the ptr in sp at which the character c last
5838  * appears; NULL if not found
5839  *
5840  * Identical to POSIX strrchr, included for portability.
5841  */
5842 static char *
5843 etags_strrchr (sp, c)
5844      register const char *sp;
5845      register int c;
5846 {
5847   register const char *r;
5848
5849   r = NULL;
5850   do
5851     {
5852       if (*sp == c)
5853         r = sp;
5854   } while (*sp++);
5855   return (char *)r;
5856 }
5857
5858 /*
5859  * Return the ptr in sp at which the character c first
5860  * appears; NULL if not found
5861  *
5862  * Identical to POSIX strchr, included for portability.
5863  */
5864 static char *
5865 etags_strchr (sp, c)
5866      register const char *sp;
5867      register int c;
5868 {
5869   do
5870     {
5871       if (*sp == c)
5872         return (char *)sp;
5873     } while (*sp++);
5874   return NULL;
5875 }
5876
5877 /*
5878  * Return TRUE if the two strings are equal, ignoring case for alphabetic
5879  * characters.
5880  *
5881  * Analogous to BSD's strcasecmp, included for portability.
5882  */
5883 static bool
5884 strcaseeq (s1, s2)
5885      register const char *s1;
5886      register const char *s2;
5887 {
5888   while (*s1 != '\0'
5889          && (ISALPHA (*s1) && ISALPHA (*s2)
5890              ? lowcase (*s1) == lowcase (*s2)
5891              : *s1 == *s2))
5892     s1++, s2++;
5893
5894   return (*s1 == *s2);
5895 }
5896
5897 /* Skip spaces, return new pointer. */
5898 static char *
5899 skip_spaces (cp)
5900      char *cp;
5901 {
5902   while (iswhite (*cp))
5903     cp++;
5904   return cp;
5905 }
5906
5907 /* Skip non spaces, return new pointer. */
5908 static char *
5909 skip_non_spaces (cp)
5910      char *cp;
5911 {
5912   while (*cp != '\0' && !iswhite (*cp))
5913     cp++;
5914   return cp;
5915 }
5916
5917 /* Print error message and exit.  */
5918 void
5919 fatal (s1, s2)
5920      char *s1, *s2;
5921 {
5922   error (s1, s2);
5923   exit (BAD);
5924 }
5925
5926 static void
5927 pfatal (s1)
5928      char *s1;
5929 {
5930   perror (s1);
5931   exit (BAD);
5932 }
5933
5934 static void
5935 suggest_asking_for_help ()
5936 {
5937   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5938            progname,
5939 #ifdef LONG_OPTIONS
5940            "--help"
5941 #else
5942            "-h"
5943 #endif
5944            );
5945   exit (BAD);
5946 }
5947
5948 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
5949 static void
5950 error (s1, s2)
5951      const char *s1, *s2;
5952 {
5953   fprintf (stderr, "%s: ", progname);
5954   fprintf (stderr, s1, s2);
5955   fprintf (stderr, "\n");
5956 }
5957
5958 /* Return a newly-allocated string whose contents
5959    concatenate those of s1, s2, s3.  */
5960 static char *
5961 concat (s1, s2, s3)
5962      char *s1, *s2, *s3;
5963 {
5964   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5965   char *result = xnew (len1 + len2 + len3 + 1, char);
5966
5967   strcpy (result, s1);
5968   strcpy (result + len1, s2);
5969   strcpy (result + len1 + len2, s3);
5970   result[len1 + len2 + len3] = '\0';
5971
5972   return result;
5973 }
5974
5975 \f
5976 /* Does the same work as the system V getcwd, but does not need to
5977    guess the buffer size in advance. */
5978 static char *
5979 etags_getcwd ()
5980 {
5981 #ifdef HAVE_GETCWD
5982   int bufsize = 200;
5983   char *path = xnew (bufsize, char);
5984
5985   while (getcwd (path, bufsize) == NULL)
5986     {
5987       if (errno != ERANGE)
5988         pfatal ("getcwd");
5989       bufsize *= 2;
5990       free (path);
5991       path = xnew (bufsize, char);
5992     }
5993
5994   canonicalize_filename (path);
5995   return path;
5996
5997 #else /* not HAVE_GETCWD */
5998 #if MSDOS
5999
6000   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6001
6002   getwd (path);
6003
6004   for (p = path; *p != '\0'; p++)
6005     if (*p == '\\')
6006       *p = '/';
6007     else
6008       *p = lowcase (*p);
6009
6010   return strdup (path);
6011 #else /* not MSDOS */
6012   linebuffer path;
6013   FILE *pipe;
6014
6015   initbuffer (&path);
6016   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6017   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6018     pfatal ("pwd");
6019   pclose (pipe);
6020
6021   return path.buffer;
6022 #endif /* not MSDOS */
6023 #endif /* not HAVE_GETCWD */
6024 }
6025
6026 /* Return a newly allocated string containing the file name of FILE
6027    relative to the absolute directory DIR (which should end with a slash). */
6028 static char *
6029 relative_filename (file, dir)
6030      char *file, *dir;
6031 {
6032   char *fp, *dp, *afn, *res;
6033   int i;
6034
6035   /* Find the common root of file and dir (with a trailing slash). */
6036   afn = absolute_filename (file, cwd);
6037   fp = afn;
6038   dp = dir;
6039   while (*fp++ == *dp++)
6040     continue;
6041   fp--, dp--;                   /* back to the first differing char */
6042 #ifdef DOS_NT
6043   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6044     return afn;
6045 #endif
6046   do                            /* look at the equal chars until '/' */
6047     fp--, dp--;
6048   while (*fp != '/');
6049
6050   /* Build a sequence of "../" strings for the resulting relative file name. */
6051   i = 0;
6052   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6053     i += 1;
6054   res = xnew (3*i + strlen (fp + 1) + 1, char);
6055   res[0] = '\0';
6056   while (i-- > 0)
6057     strcat (res, "../");
6058
6059   /* Add the file name relative to the common root of file and dir. */
6060   strcat (res, fp + 1);
6061   free (afn);
6062
6063   return res;
6064 }
6065
6066 /* Return a newly allocated string containing the absolute file name
6067    of FILE given DIR (which should end with a slash). */
6068 static char *
6069 absolute_filename (file, dir)
6070      char *file, *dir;
6071 {
6072   char *slashp, *cp, *res;
6073
6074   if (filename_is_absolute (file))
6075     res = savestr (file);
6076 #ifdef DOS_NT
6077   /* We don't support non-absolute file names with a drive
6078      letter, like `d:NAME' (it's too much hassle).  */
6079   else if (file[1] == ':')
6080     fatal ("%s: relative file names with drive letters not supported", file);
6081 #endif
6082   else
6083     res = concat (dir, file, "");
6084
6085   /* Delete the "/dirname/.." and "/." substrings. */
6086   slashp = etags_strchr (res, '/');
6087   while (slashp != NULL && slashp[0] != '\0')
6088     {
6089       if (slashp[1] == '.')
6090         {
6091           if (slashp[2] == '.'
6092               && (slashp[3] == '/' || slashp[3] == '\0'))
6093             {
6094               cp = slashp;
6095               do
6096                 cp--;
6097               while (cp >= res && !filename_is_absolute (cp));
6098               if (cp < res)
6099                 cp = slashp;    /* the absolute name begins with "/.." */
6100 #ifdef DOS_NT
6101               /* Under MSDOS and NT we get `d:/NAME' as absolute
6102                  file name, so the luser could say `d:/../NAME'.
6103                  We silently treat this as `d:/NAME'.  */
6104               else if (cp[0] != '/')
6105                 cp = slashp;
6106 #endif
6107               strcpy (cp, slashp + 3);
6108               slashp = cp;
6109               continue;
6110             }
6111           else if (slashp[2] == '/' || slashp[2] == '\0')
6112             {
6113               strcpy (slashp, slashp + 2);
6114               continue;
6115             }
6116         }
6117
6118       slashp = etags_strchr (slashp + 1, '/');
6119     }
6120
6121   if (res[0] == '\0')
6122     return savestr ("/");
6123   else
6124     return res;
6125 }
6126
6127 /* Return a newly allocated string containing the absolute
6128    file name of dir where FILE resides given DIR (which should
6129    end with a slash). */
6130 static char *
6131 absolute_dirname (file, dir)
6132      char *file, *dir;
6133 {
6134   char *slashp, *res;
6135   char save;
6136
6137   canonicalize_filename (file);
6138   slashp = etags_strrchr (file, '/');
6139   if (slashp == NULL)
6140     return savestr (dir);
6141   save = slashp[1];
6142   slashp[1] = '\0';
6143   res = absolute_filename (file, dir);
6144   slashp[1] = save;
6145
6146   return res;
6147 }
6148
6149 /* Whether the argument string is an absolute file name.  The argument
6150    string must have been canonicalized with canonicalize_filename. */
6151 static bool
6152 filename_is_absolute (fn)
6153      char *fn;
6154 {
6155   return (fn[0] == '/'
6156 #ifdef DOS_NT
6157           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6158 #endif
6159           );
6160 }
6161
6162 /* Translate backslashes into slashes.  Works in place. */
6163 static void
6164 canonicalize_filename (fn)
6165      register char *fn;
6166 {
6167 #ifdef DOS_NT
6168   /* Canonicalize drive letter case.  */
6169   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6170     fn[0] = upcase (fn[0]);
6171   /* Convert backslashes to slashes.  */
6172   for (; *fn != '\0'; fn++)
6173     if (*fn == '\\')
6174       *fn = '/';
6175 #else
6176   /* No action. */
6177   fn = NULL;                    /* shut up the compiler */
6178 #endif
6179 }
6180
6181 /* Set the minimum size of a string contained in a linebuffer. */
6182 static void
6183 linebuffer_setlen (lbp, toksize)
6184      linebuffer *lbp;
6185      int toksize;
6186 {
6187   while (lbp->size <= toksize)
6188     {
6189       lbp->size *= 2;
6190       xrnew (lbp->buffer, lbp->size, char);
6191     }
6192   lbp->len = toksize;
6193 }
6194
6195 /* Like malloc but get fatal error if memory is exhausted.  */
6196 static PTR
6197 xmalloc (size)
6198      unsigned int size;
6199 {
6200   PTR result = (PTR) malloc (size);
6201   if (result == NULL)
6202     fatal ("virtual memory exhausted", (char *)NULL);
6203   return result;
6204 }
6205
6206 static PTR
6207 xrealloc (ptr, size)
6208      char *ptr;
6209      unsigned int size;
6210 {
6211   PTR result = (PTR) realloc (ptr, size);
6212   if (result == NULL)
6213     fatal ("virtual memory exhausted", (char *)NULL);
6214   return result;
6215 }
6216
6217 /*
6218  * Local Variables:
6219  * c-indentation-style: gnu
6220  * indent-tabs-mode: t
6221  * tab-width: 8
6222  * fill-column: 79
6223  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node")
6224  * End:
6225  */