lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs           -*- coding: latin-1 -*-
   2    Copyright (C) 1984, 1987, 1988, 1989, 1993, 1994, 1995,
   3                  1998, 1999, 2000, 2001, 2002, 2003, 2004,
   4                  2005 Free Software Foundation, Inc. and Ken Arnold
   5
   6  This file is not considered part of GNU Emacs.
   7
   8  This program is free software; you can redistribute it and/or modify
   9  it under the terms of the GNU General Public License as published by
  10  the Free Software Foundation; either version 2 of the License, or
  11  (at your option) any later version.
  12
  13  This program is distributed in the hope that it will be useful,
  14  but WITHOUT ANY WARRANTY; without even the implied warranty of
  15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16  GNU General Public License for more details.
  17
  18  You should have received a copy of the GNU General Public License
  19  along with this program; if not, write to the Free Software Foundation,
  20  Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
  21
  22 /*
  23  * Authors:
  24  *      Ctags originally by Ken Arnold.
  25  *      Fortran added by Jim Kleckner.
  26  *      Ed Pelegri-Llopart added C typedefs.
  27  *      Gnu Emacs TAGS format and modifications by RMS?
  28  * 1989 Sam Kendall added C++.
  29  * 1992 Joseph B. Wells improved C and C++ parsing.
  30  * 1993 Francesco Potortì reorganised C and C++.
  31  * 1994 Line-by-line regexp tags by Tom Tromey.
  32  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  33  * 2002 #line directives by Francesco Potortì.
  34  *
  35  * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
  36  */
  37
  38 /*
  39  * If you want to add support for a new language, start by looking at the LUA
  40  * language, which is the simplest.  Alternatively, consider shipping a
  41  * configuration file containing regexp definitions for etags.
  42  */
  43
  44 char pot_etags_version[] = "@(#) pot revision number is 17.14";
  45
  46 #define TRUE    1
  47 #define FALSE   0
  48
  49 #ifdef DEBUG
  50 #  undef DEBUG
  51 #  define DEBUG TRUE
  52 #else
  53 #  define DEBUG  FALSE
  54 #  define NDEBUG                /* disable assert */
  55 #endif
  56
  57 #ifdef HAVE_CONFIG_H
  58 # include <config.h>
  59   /* On some systems, Emacs defines static as nothing for the sake
  60      of unexec.  We don't want that here since we don't use unexec. */
  61 # undef static
  62 # define ETAGS_REGEXPS          /* use the regexp features */
  63 # define LONG_OPTIONS           /* accept long options */
  64 # ifndef PTR                    /* for Xemacs */
  65 #   define PTR void *
  66 # endif
  67 # ifndef __P                    /* for Xemacs */
  68 #   define __P(args) args
  69 # endif
  70 #else  /* no config.h */
  71 # if defined(__STDC__) && (__STDC__ || defined(__SUNPRO_C))
  72 #   define __P(args) args       /* use prototypes */
  73 #   define PTR void *           /* for generic pointers */
  74 # else /* not standard C */
  75 #   define __P(args) ()         /* no prototypes */
  76 #   define const                /* remove const for old compilers' sake */
  77 #   define PTR long *           /* don't use void* */
  78 # endif
  79 #endif /* !HAVE_CONFIG_H */
  80
  81 #ifndef _GNU_SOURCE
  82 # define _GNU_SOURCE 1          /* enables some compiler checks on GNU */
  83 #endif
  84
  85 #ifdef LONG_OPTIONS
  86 #  undef LONG_OPTIONS
  87 #  define LONG_OPTIONS TRUE
  88 #else
  89 #  define LONG_OPTIONS  FALSE
  90 #endif
  91
  92 /* WIN32_NATIVE is for Xemacs.
  93    MSDOS, WINDOWSNT, DOS_NT are for Emacs. */
  94 #ifdef WIN32_NATIVE
  95 # undef MSDOS
  96 # undef  WINDOWSNT
  97 # define WINDOWSNT
  98 #endif /* WIN32_NATIVE */
  99
 100 #ifdef MSDOS
 101 # undef MSDOS
 102 # define MSDOS TRUE
 103 # include <fcntl.h>
 104 # include <sys/param.h>
 105 # include <io.h>
 106 # ifndef HAVE_CONFIG_H
 107 #   define DOS_NT
 108 #   include <sys/config.h>
 109 # endif
 110 #else
 111 # define MSDOS FALSE
 112 #endif /* MSDOS */
 113
 114 #ifdef WINDOWSNT
 115 # include <stdlib.h>
 116 # include <fcntl.h>
 117 # include <string.h>
 118 # include <direct.h>
 119 # include <io.h>
 120 # define MAXPATHLEN _MAX_PATH
 121 # undef HAVE_NTGUI
 122 # undef  DOS_NT
 123 # define DOS_NT
 124 # ifndef HAVE_GETCWD
 125 #   define HAVE_GETCWD
 126 # endif /* undef HAVE_GETCWD */
 127 #else /* not WINDOWSNT */
 128 # ifdef STDC_HEADERS
 129 #  include <stdlib.h>
 130 #  include <string.h>
 131 # else /* no standard C headers */
 132     extern char *getenv ();
 133 #  ifdef VMS
 134 #   define EXIT_SUCCESS 1
 135 #   define EXIT_FAILURE 0
 136 #  else /* no VMS */
 137 #   define EXIT_SUCCESS 0
 138 #   define EXIT_FAILURE 1
 139 #  endif
 140 # endif
 141 #endif /* !WINDOWSNT */
 142
 143 #ifdef HAVE_UNISTD_H
 144 # include <unistd.h>
 145 #else
 146 # if defined (HAVE_GETCWD) && !defined (WINDOWSNT)
 147     extern char *getcwd (char *buf, size_t size);
 148 # endif
 149 #endif /* HAVE_UNISTD_H */
 150
 151 #include <stdio.h>
 152 #include <ctype.h>
 153 #include <errno.h>
 154 #ifndef errno
 155   extern int errno;
 156 #endif
 157 #include <sys/types.h>
 158 #include <sys/stat.h>
 159
 160 #include <assert.h>
 161 #ifdef NDEBUG
 162 # undef  assert                 /* some systems have a buggy assert.h */
 163 # define assert(x) ((void) 0)
 164 #endif
 165
 166 #if !defined (S_ISREG) && defined (S_IFREG)
 167 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 168 #endif
 169
 170 #if LONG_OPTIONS
 171 # include <getopt.h>
 172 #else
 173 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 174   extern char *optarg;
 175   extern int optind, opterr;
 176 #endif /* LONG_OPTIONS */
 177
 178 #ifdef ETAGS_REGEXPS
 179 # ifndef HAVE_CONFIG_H          /* this is a standalone compilation */
 180 #   ifdef __CYGWIN__            /* compiling on Cygwin */
 181                              !!! NOTICE !!!
 182  the regex.h distributed with Cygwin is not compatible with etags, alas!
 183 If you want regular expression support, you should delete this notice and
 184               arrange to use the GNU regex.h and regex.c.
 185 #   endif
 186 # endif
 187 # include <regex.h>
 188 #endif /* ETAGS_REGEXPS */
 189
 190 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 191  Leave it undefined to make the program "etags", which makes emacs-style
 192  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 193 #ifdef CTAGS
 194 # undef  CTAGS
 195 # define CTAGS TRUE
 196 #else
 197 # define CTAGS FALSE
 198 #endif
 199
 200 #define streq(s,t)      (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
 201 #define strcaseeq(s,t)  (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 202 #define strneq(s,t,n)   (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
 203 #define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 204
 205 #define CHARS 256               /* 2^sizeof(char) */
 206 #define CHAR(x)         ((unsigned int)(x) & (CHARS - 1))
 207 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
 208 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
 209 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
 210 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
 211 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 212
 213 #define ISALNUM(c)      isalnum (CHAR(c))
 214 #define ISALPHA(c)      isalpha (CHAR(c))
 215 #define ISDIGIT(c)      isdigit (CHAR(c))
 216 #define ISLOWER(c)      islower (CHAR(c))
 217
 218 #define lowcase(c)      tolower (CHAR(c))
 219 #define upcase(c)       toupper (CHAR(c))
 220
 221
 222 /*
 223  *      xnew, xrnew -- allocate, reallocate storage
 224  *
 225  * SYNOPSIS:    Type *xnew (int n, Type);
 226  *              void xrnew (OldPointer, int n, Type);
 227  */
 228 #if DEBUG
 229 # include "chkmalloc.h"
 230 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 231                                                   (n) * sizeof (Type)))
 232 # define xrnew(op,n,Type) ((op) = (Type *) trace_realloc (__FILE__, __LINE__, \
 233                                         (char *) (op), (n) * sizeof (Type)))
 234 #else
 235 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 236 # define xrnew(op,n,Type) ((op) = (Type *) xrealloc ( \
 237                                         (char *) (op), (n) * sizeof (Type)))
 238 #endif
 239
 240 #define bool int
 241
 242 typedef void Lang_function __P((FILE *));
 243
 244 typedef struct
 245 {
 246   char *suffix;                 /* file name suffix for this compressor */
 247   char *command;                /* takes one arg and decompresses to stdout */
 248 } compressor;
 249
 250 typedef struct
 251 {
 252   char *name;                   /* language name */
 253   char *help;                   /* detailed help for the language */
 254   Lang_function *function;      /* parse function */
 255   char **suffixes;              /* name suffixes of this language's files */
 256   char **filenames;             /* names of this language's files */
 257   char **interpreters;          /* interpreters for this language */
 258   bool metasource;              /* source used to generate other sources */
 259 } language;
 260
 261 typedef struct fdesc
 262 {
 263   struct fdesc *next;           /* for the linked list */
 264   char *infname;                /* uncompressed input file name */
 265   char *infabsname;             /* absolute uncompressed input file name */
 266   char *infabsdir;              /* absolute dir of input file */
 267   char *taggedfname;            /* file name to write in tagfile */
 268   language *lang;               /* language of file */
 269   char *prop;                   /* file properties to write in tagfile */
 270   bool usecharno;               /* etags tags shall contain char number */
 271   bool written;                 /* entry written in the tags file */
 272 } fdesc;
 273
 274 typedef struct node_st
 275 {                               /* sorting structure */
 276   struct node_st *left, *right; /* left and right sons */
 277   fdesc *fdp;                   /* description of file to whom tag belongs */
 278   char *name;                   /* tag name */
 279   char *regex;                  /* search regexp */
 280   bool valid;                   /* write this tag on the tag file */
 281   bool is_func;                 /* function tag: use regexp in CTAGS mode */
 282   bool been_warned;             /* warning already given for duplicated tag */
 283   int lno;                      /* line number tag is on */
 284   long cno;                     /* character number line starts on */
 285 } node;
 286
 287 /*
 288  * A `linebuffer' is a structure which holds a line of text.
 289  * `readline_internal' reads a line from a stream into a linebuffer
 290  * and works regardless of the length of the line.
 291  * SIZE is the size of BUFFER, LEN is the length of the string in
 292  * BUFFER after readline reads it.
 293  */
 294 typedef struct
 295 {
 296   long size;
 297   int len;
 298   char *buffer;
 299 } linebuffer;
 300
 301 /* Used to support mixing of --lang and file names. */
 302 typedef struct
 303 {
 304   enum {
 305     at_language,                /* a language specification */
 306     at_regexp,                  /* a regular expression */
 307     at_filename,                /* a file name */
 308     at_stdin,                   /* read from stdin here */
 309     at_end                      /* stop parsing the list */
 310   } arg_type;                   /* argument type */
 311   language *lang;               /* language associated with the argument */
 312   char *what;                   /* the argument itself */
 313 } argument;
 314
 315 #ifdef ETAGS_REGEXPS
 316 /* Structure defining a regular expression. */
 317 typedef struct regexp
 318 {
 319   struct regexp *p_next;        /* pointer to next in list */
 320   language *lang;               /* if set, use only for this language */
 321   char *pattern;                /* the regexp pattern */
 322   char *name;                   /* tag name */
 323   struct re_pattern_buffer *pat; /* the compiled pattern */
 324   struct re_registers regs;     /* re registers */
 325   bool error_signaled;          /* already signaled for this regexp */
 326   bool force_explicit_name;     /* do not allow implict tag name */
 327   bool ignore_case;             /* ignore case when matching */
 328   bool multi_line;              /* do a multi-line match on the whole file */
 329 } regexp;
 330 #endif /* ETAGS_REGEXPS */
 331
 332
 333 /* Many compilers barf on this:
 334         Lang_function Ada_funcs;
 335    so let's write it this way */
 336 static void Ada_funcs __P((FILE *));
 337 static void Asm_labels __P((FILE *));
 338 static void C_entries __P((int c_ext, FILE *));
 339 static void default_C_entries __P((FILE *));
 340 static void plain_C_entries __P((FILE *));
 341 static void Cjava_entries __P((FILE *));
 342 static void Cobol_paragraphs __P((FILE *));
 343 static void Cplusplus_entries __P((FILE *));
 344 static void Cstar_entries __P((FILE *));
 345 static void Erlang_functions __P((FILE *));
 346 static void Forth_words __P((FILE *));
 347 static void Fortran_functions __P((FILE *));
 348 static void HTML_labels __P((FILE *));
 349 static void Lisp_functions __P((FILE *));
 350 static void Lua_functions __P((FILE *));
 351 static void Makefile_targets __P((FILE *));
 352 static void Pascal_functions __P((FILE *));
 353 static void Perl_functions __P((FILE *));
 354 static void PHP_functions __P((FILE *));
 355 static void PS_functions __P((FILE *));
 356 static void Prolog_functions __P((FILE *));
 357 static void Python_functions __P((FILE *));
 358 static void Scheme_functions __P((FILE *));
 359 static void TeX_commands __P((FILE *));
 360 static void Texinfo_nodes __P((FILE *));
 361 static void Yacc_entries __P((FILE *));
 362 static void just_read_file __P((FILE *));
 363
 364 static void print_language_names __P((void));
 365 static void print_version __P((void));
 366 static void print_help __P((argument *));
 367 int main __P((int, char **));
 368
 369 static compressor *get_compressor_from_suffix __P((char *, char **));
 370 static language *get_language_from_langname __P((const char *));
 371 static language *get_language_from_interpreter __P((char *));
 372 static language *get_language_from_filename __P((char *, bool));
 373 static void readline __P((linebuffer *, FILE *));
 374 static long readline_internal __P((linebuffer *, FILE *));
 375 static bool nocase_tail __P((char *));
 376 static void get_tag __P((char *, char **));
 377
 378 #ifdef ETAGS_REGEXPS
 379 static void analyse_regex __P((char *));
 380 static void free_regexps __P((void));
 381 static void regex_tag_multiline __P((void));
 382 #endif /* ETAGS_REGEXPS */
 383 static void error __P((const char *, const char *));
 384 static void suggest_asking_for_help __P((void));
 385 void fatal __P((char *, char *));
 386 static void pfatal __P((char *));
 387 static void add_node __P((node *, node **));
 388
 389 static void init __P((void));
 390 static void process_file_name __P((char *, language *));
 391 static void process_file __P((FILE *, char *, language *));
 392 static void find_entries __P((FILE *));
 393 static void free_tree __P((node *));
 394 static void free_fdesc __P((fdesc *));
 395 static void pfnote __P((char *, bool, char *, int, int, long));
 396 static void make_tag __P((char *, int, bool, char *, int, int, long));
 397 static void invalidate_nodes __P((fdesc *, node **));
 398 static void put_entries __P((node *));
 399
 400 static char *concat __P((char *, char *, char *));
 401 static char *skip_spaces __P((char *));
 402 static char *skip_non_spaces __P((char *));
 403 static char *savenstr __P((char *, int));
 404 static char *savestr __P((char *));
 405 static char *etags_strchr __P((const char *, int));
 406 static char *etags_strrchr __P((const char *, int));
 407 static int etags_strcasecmp __P((const char *, const char *));
 408 static int etags_strncasecmp __P((const char *, const char *, int));
 409 static char *etags_getcwd __P((void));
 410 static char *relative_filename __P((char *, char *));
 411 static char *absolute_filename __P((char *, char *));
 412 static char *absolute_dirname __P((char *, char *));
 413 static bool filename_is_absolute __P((char *f));
 414 static void canonicalize_filename __P((char *));
 415 static void linebuffer_init __P((linebuffer *));
 416 static void linebuffer_setlen __P((linebuffer *, int));
 417 static PTR xmalloc __P((unsigned int));
 418 static PTR xrealloc __P((char *, unsigned int));
 419
 420 \f
 421 static char searchar = '/';     /* use /.../ searches */
 422
 423 static char *tagfile;           /* output file */
 424 static char *progname;          /* name this program was invoked with */
 425 static char *cwd;               /* current working directory */
 426 static char *tagfiledir;        /* directory of tagfile */
 427 static FILE *tagf;              /* ioptr for tags file */
 428
 429 static fdesc *fdhead;           /* head of file description list */
 430 static fdesc *curfdp;           /* current file description */
 431 static int lineno;              /* line number of current line */
 432 static long charno;             /* current character number */
 433 static long linecharno;         /* charno of start of current line */
 434 static char *dbp;               /* pointer to start of current tag */
 435
 436 static const int invalidcharno = -1;
 437
 438 static node *nodehead;          /* the head of the binary tree of tags */
 439 static node *last_node;         /* the last node created */
 440
 441 static linebuffer lb;           /* the current line */
 442 static linebuffer filebuf;      /* a buffer containing the whole file */
 443 static linebuffer token_name;   /* a buffer containing a tag name */
 444
 445 /* boolean "functions" (see init)       */
 446 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 447 static char
 448   /* white chars */
 449   *white = " \f\t\n\r\v",
 450   /* not in a name */
 451   *nonam = " \f\t\n\r()=,;",    /* look at make_tag before modifying! */
 452   /* token ending chars */
 453   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 454   /* token starting chars */
 455   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 456   /* valid in-token chars */
 457   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 458
 459 static bool append_to_tagfile;  /* -a: append to tags */
 460 /* The next four default to TRUE for etags, but to FALSE for ctags.  */
 461 static bool typedefs;           /* -t: create tags for C and Ada typedefs */
 462 static bool typedefs_or_cplusplus; /* -T: create tags for C typedefs, level */
 463                                 /* 0 struct/enum/union decls, and C++ */
 464                                 /* member functions. */
 465 static bool constantypedefs;    /* -d: create tags for C #define, enum */
 466                                 /* constants and variables. */
 467                                 /* -D: opposite of -d.  Default under ctags. */
 468 static bool globals;            /* create tags for global variables */
 469 static bool declarations;       /* --declarations: tag them and extern in C&Co*/
 470 static bool members;            /* create tags for C member variables */
 471 static bool no_line_directive;  /* ignore #line directives (undocumented) */
 472 static bool update;             /* -u: update tags */
 473 static bool vgrind_style;       /* -v: create vgrind style index output */
 474 static bool no_warnings;        /* -w: suppress warnings */
 475 static bool cxref_style;        /* -x: create cxref style output */
 476 static bool cplusplus;          /* .[hc] means C++, not C */
 477 static bool ignoreindent;       /* -I: ignore indentation in C */
 478 static bool packages_only;      /* --packages-only: in Ada, only tag packages*/
 479
 480 /* STDIN is defined in LynxOS system headers */
 481 #ifdef STDIN
 482 #undef STDIN
 483 #endif
 484
 485 #define STDIN 0x1001            /* returned by getopt_long on --parse-stdin */
 486 static bool parsing_stdin;      /* --parse-stdin used */
 487
 488 #ifdef ETAGS_REGEXPS
 489 static regexp *p_head;          /* list of all regexps */
 490 static bool need_filebuf;       /* some regexes are multi-line */
 491 #else
 492 # define need_filebuf FALSE
 493 #endif /* ETAGS_REGEXPS */
 494
 495 #if LONG_OPTIONS
 496 static struct option longopts[] =
 497 {
 498   { "append",             no_argument,       NULL,               'a'   },
 499   { "packages-only",      no_argument,       &packages_only,     TRUE  },
 500   { "c++",                no_argument,       NULL,               'C'   },
 501   { "declarations",       no_argument,       &declarations,      TRUE  },
 502   { "no-line-directive",  no_argument,       &no_line_directive, TRUE  },
 503   { "help",               no_argument,       NULL,               'h'   },
 504   { "help",               no_argument,       NULL,               'H'   },
 505   { "ignore-indentation", no_argument,       NULL,               'I'   },
 506   { "language",           required_argument, NULL,               'l'   },
 507   { "members",            no_argument,       &members,           TRUE  },
 508   { "no-members",         no_argument,       &members,           FALSE },
 509   { "output",             required_argument, NULL,               'o'   },
 510 #ifdef ETAGS_REGEXPS
 511   { "regex",              required_argument, NULL,               'r'   },
 512   { "no-regex",           no_argument,       NULL,               'R'   },
 513   { "ignore-case-regex",  required_argument, NULL,               'c'   },
 514 #endif /* ETAGS_REGEXPS */
 515   { "parse-stdin",        required_argument, NULL,               STDIN },
 516   { "version",            no_argument,       NULL,               'V'   },
 517
 518 #if CTAGS /* Ctags options */
 519   { "backward-search",    no_argument,       NULL,               'B'   },
 520   { "cxref",              no_argument,       NULL,               'x'   },
 521   { "defines",            no_argument,       NULL,               'd'   },
 522   { "globals",            no_argument,       &globals,           TRUE  },
 523   { "typedefs",           no_argument,       NULL,               't'   },
 524   { "typedefs-and-c++",   no_argument,       NULL,               'T'   },
 525   { "update",             no_argument,       NULL,               'u'   },
 526   { "vgrind",             no_argument,       NULL,               'v'   },
 527   { "no-warn",            no_argument,       NULL,               'w'   },
 528
 529 #else /* Etags options */
 530   { "no-defines",         no_argument,       NULL,               'D'   },
 531   { "no-globals",         no_argument,       &globals,           FALSE },
 532   { "include",            required_argument, NULL,               'i'   },
 533 #endif
 534   { NULL }
 535 };
 536 #endif /* LONG_OPTIONS */
 537
 538 static compressor compressors[] =
 539 {
 540   { "z", "gzip -d -c"},
 541   { "Z", "gzip -d -c"},
 542   { "gz", "gzip -d -c"},
 543   { "GZ", "gzip -d -c"},
 544   { "bz2", "bzip2 -d -c" },
 545   { NULL }
 546 };
 547
 548 /*
 549  * Language stuff.
 550  */
 551
 552 /* Ada code */
 553 static char *Ada_suffixes [] =
 554   { "ads", "adb", "ada", NULL };
 555 static char Ada_help [] =
 556 "In Ada code, functions, procedures, packages, tasks and types are\n\
 557 tags.  Use the `--packages-only' option to create tags for\n\
 558 packages only.\n\
 559 Ada tag names have suffixes indicating the type of entity:\n\
 560         Entity type:    Qualifier:\n\
 561         ------------    ----------\n\
 562         function        /f\n\
 563         procedure       /p\n\
 564         package spec    /s\n\
 565         package body    /b\n\
 566         type            /t\n\
 567         task            /k\n\
 568 Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
 569 body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
 570 will just search for any tag `bidule'.";
 571
 572 /* Assembly code */
 573 static char *Asm_suffixes [] =
 574   { "a",        /* Unix assembler */
 575     "asm", /* Microcontroller assembly */
 576     "def", /* BSO/Tasking definition includes  */
 577     "inc", /* Microcontroller include files */
 578     "ins", /* Microcontroller include files */
 579     "s", "sa", /* Unix assembler */
 580     "S",   /* cpp-processed Unix assembler */
 581     "src", /* BSO/Tasking C compiler output */
 582     NULL
 583   };
 584 static char Asm_help [] =
 585 "In assembler code, labels appearing at the beginning of a line,\n\
 586 followed by a colon, are tags.";
 587
 588
 589 /* Note that .c and .h can be considered C++, if the --c++ flag was
 590    given, or if the `class' or `template' keyowrds are met inside the file.
 591    That is why default_C_entries is called for these. */
 592 static char *default_C_suffixes [] =
 593   { "c", "h", NULL };
 594 static char default_C_help [] =
 595 "In C code, any C function or typedef is a tag, and so are\n\
 596 definitions of `struct', `union' and `enum'.  `#define' macro\n\
 597 definitions and `enum' constants are tags unless you specify\n\
 598 `--no-defines'.  Global variables are tags unless you specify\n\
 599 `--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
 600 can make the tags table file much smaller.\n\
 601 You can tag function declarations and external variables by\n\
 602 using `--declarations', and struct members by using `--members'.";
 603
 604 static char *Cplusplus_suffixes [] =
 605   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
 606     "M",                        /* Objective C++ */
 607     "pdb",                      /* Postscript with C syntax */
 608     NULL };
 609 static char Cplusplus_help [] =
 610 "In C++ code, all the tag constructs of C code are tagged.  (Use\n\
 611 --help --lang=c --lang=c++ for full help.)\n\
 612 In addition to C tags, member functions are also recognized, and\n\
 613 optionally member variables if you use the `--members' option.\n\
 614 Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
 615 and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
 616 `operator+'.";
 617
 618 static char *Cjava_suffixes [] =
 619   { "java", NULL };
 620 static char Cjava_help [] =
 621 "In Java code, all the tags constructs of C and C++ code are\n\
 622 tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
 623
 624
 625 static char *Cobol_suffixes [] =
 626   { "COB", "cob", NULL };
 627 static char Cobol_help [] =
 628 "In Cobol code, tags are paragraph names; that is, any word\n\
 629 starting in column 8 and followed by a period.";
 630
 631 static char *Cstar_suffixes [] =
 632   { "cs", "hs", NULL };
 633
 634 static char *Erlang_suffixes [] =
 635   { "erl", "hrl", NULL };
 636 static char Erlang_help [] =
 637 "In Erlang code, the tags are the functions, records and macros\n\
 638 defined in the file.";
 639
 640 char *Forth_suffixes [] =
 641   { "fth", "tok", NULL };
 642 static char Forth_help [] =
 643 "In Forth code, tags are words defined by `:',\n\
 644 constant, code, create, defer, value, variable, buffer:, field.";
 645
 646 static char *Fortran_suffixes [] =
 647   { "F", "f", "f90", "for", NULL };
 648 static char Fortran_help [] =
 649 "In Fortran code, functions, subroutines and block data are tags.";
 650
 651 static char *HTML_suffixes [] =
 652   { "htm", "html", "shtml", NULL };
 653 static char HTML_help [] =
 654 "In HTML input files, the tags are the `title' and the `h1', `h2',\n\
 655 `h3' headers.  Also, tags are `name=' in anchors and all\n\
 656 occurrences of `id='.";
 657
 658 static char *Lisp_suffixes [] =
 659   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
 660 static char Lisp_help [] =
 661 "In Lisp code, any function defined with `defun', any variable\n\
 662 defined with `defvar' or `defconst', and in general the first\n\
 663 argument of any expression that starts with `(def' in column zero\n\
 664 is a tag.";
 665
 666 static char *Lua_suffixes [] =
 667   { "lua", "LUA", NULL };
 668 static char Lua_help [] =
 669 "In Lua scripts, all functions are tags.";
 670
 671 static char *Makefile_filenames [] =
 672   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
 673 static char Makefile_help [] =
 674 "In makefiles, targets are tags; additionally, variables are tags\n\
 675 unless you specify `--no-globals'.";
 676
 677 static char *Objc_suffixes [] =
 678   { "lm",                       /* Objective lex file */
 679     "m",                        /* Objective C file */
 680      NULL };
 681 static char Objc_help [] =
 682 "In Objective C code, tags include Objective C definitions for classes,\n\
 683 class categories, methods and protocols.  Tags for variables and\n\
 684 functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
 685
 686 static char *Pascal_suffixes [] =
 687   { "p", "pas", NULL };
 688 static char Pascal_help [] =
 689 "In Pascal code, the tags are the functions and procedures defined\n\
 690 in the file.";
 691
 692 static char *Perl_suffixes [] =
 693   { "pl", "pm", NULL };
 694 static char *Perl_interpreters [] =
 695   { "perl", "@PERL@", NULL };
 696 static char Perl_help [] =
 697 "In Perl code, the tags are the packages, subroutines and variables\n\
 698 defined by the `package', `sub', `my' and `local' keywords.  Use\n\
 699 `--globals' if you want to tag global variables.  Tags for\n\
 700 subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
 701 defined in the default package is `main::SUB'.";
 702
 703 static char *PHP_suffixes [] =
 704   { "php", "php3", "php4", NULL };
 705 static char PHP_help [] =
 706 "In PHP code, tags are functions, classes and defines.  When using\n\
 707 the `--members' option, vars are tags too.";
 708
 709 static char *plain_C_suffixes [] =
 710   { "pc",                       /* Pro*C file */
 711      NULL };
 712
 713 static char *PS_suffixes [] =
 714   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 715 static char PS_help [] =
 716 "In PostScript code, the tags are the functions.";
 717
 718 static char *Prolog_suffixes [] =
 719   { "prolog", NULL };
 720 static char Prolog_help [] =
 721 "In Prolog code, tags are predicates and rules at the beginning of\n\
 722 line.";
 723
 724 static char *Python_suffixes [] =
 725   { "py", NULL };
 726 static char Python_help [] =
 727 "In Python code, `def' or `class' at the beginning of a line\n\
 728 generate a tag.";
 729
 730 /* Can't do the `SCM' or `scm' prefix with a version number. */
 731 static char *Scheme_suffixes [] =
 732   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
 733 static char Scheme_help [] =
 734 "In Scheme code, tags include anything defined with `def' or with a\n\
 735 construct whose name starts with `def'.  They also include\n\
 736 variables set with `set!' at top level in the file.";
 737
 738 static char *TeX_suffixes [] =
 739   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
 740 static char TeX_help [] =
 741 "In LaTeX text, the argument of any of the commands `\\chapter',\n\
 742 `\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
 743 `\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
 744 `\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
 745 `\\newenvironment' or `\\renewenvironment' is a tag.\n\
 746 \n\
 747 Other commands can be specified by setting the environment variable\n\
 748 `TEXTAGS' to a colon-separated list like, for example,\n\
 749      TEXTAGS=\"mycommand:myothercommand\".";
 750
 751
 752 static char *Texinfo_suffixes [] =
 753   { "texi", "texinfo", "txi", NULL };
 754 static char Texinfo_help [] =
 755 "for texinfo files, lines starting with @node are tagged.";
 756
 757 static char *Yacc_suffixes [] =
 758   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
 759 static char Yacc_help [] =
 760 "In Bison or Yacc input files, each rule defines as a tag the\n\
 761 nonterminal it constructs.  The portions of the file that contain\n\
 762 C code are parsed as C code (use --help --lang=c --lang=yacc\n\
 763 for full help).";
 764
 765 static char auto_help [] =
 766 "`auto' is not a real language, it indicates to use\n\
 767 a default language for files base on file name suffix and file contents.";
 768
 769 static char none_help [] =
 770 "`none' is not a real language, it indicates to only do\n\
 771 regexp processing on files.";
 772
 773 static char no_lang_help [] =
 774 "No detailed help available for this language.";
 775
 776
 777 /*
 778  * Table of languages.
 779  *
 780  * It is ok for a given function to be listed under more than one
 781  * name.  I just didn't.
 782  */
 783
 784 static language lang_names [] =
 785 {
 786   { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
 787   { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
 788   { "c",         default_C_help, default_C_entries, default_C_suffixes },
 789   { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
 790   { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
 791   { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
 792   { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
 793   { "forth",     Forth_help,     Forth_words,       Forth_suffixes     },
 794   { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
 795   { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
 796   { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
 797   { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
 798   { "lua",       Lua_help,       Lua_functions,     Lua_suffixes       },
 799   { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
 800   { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
 801   { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
 802   { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
 803   { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
 804   { "postscript",PS_help,        PS_functions,      PS_suffixes        },
 805   { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
 806   { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
 807   { "python",    Python_help,    Python_functions,  Python_suffixes    },
 808   { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
 809   { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
 810   { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
 811   { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
 812   { "auto",      auto_help },                      /* default guessing scheme */
 813   { "none",      none_help,      just_read_file }, /* regexp matching only */
 814   { NULL }                /* end of list */
 815 };
 816
 817 \f
 818 static void
 819 print_language_names ()
 820 {
 821   language *lang;
 822   char **name, **ext;
 823
 824   puts ("\nThese are the currently supported languages, along with the\n\
 825 default file names and dot suffixes:");
 826   for (lang = lang_names; lang->name != NULL; lang++)
 827     {
 828       printf ("  %-*s", 10, lang->name);
 829       if (lang->filenames != NULL)
 830         for (name = lang->filenames; *name != NULL; name++)
 831           printf (" %s", *name);
 832       if (lang->suffixes != NULL)
 833         for (ext = lang->suffixes; *ext != NULL; ext++)
 834           printf (" .%s", *ext);
 835       puts ("");
 836     }
 837   puts ("where `auto' means use default language for files based on file\n\
 838 name suffix, and `none' means only do regexp processing on files.\n\
 839 If no language is specified and no matching suffix is found,\n\
 840 the first line of the file is read for a sharp-bang (#!) sequence\n\
 841 followed by the name of an interpreter.  If no such sequence is found,\n\
 842 Fortran is tried first; if no tags are found, C is tried next.\n\
 843 When parsing any C file, a \"class\" or \"template\" keyword\n\
 844 switches to C++.");
 845   puts ("Compressed files are supported using gzip and bzip2.\n\
 846 \n\
 847 For detailed help on a given language use, for example,\n\
 848 etags --help --lang=ada.");
 849 }
 850
 851 #ifndef EMACS_NAME
 852 # define EMACS_NAME "standalone"
 853 #endif
 854 #ifndef VERSION
 855 # define VERSION "version"
 856 #endif
 857 static void
 858 print_version ()
 859 {
 860   printf ("%s (%s %s)\n", (CTAGS) ? "ctags" : "etags", EMACS_NAME, VERSION);
 861   puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
 862   puts ("This program is distributed under the same terms as Emacs");
 863
 864   exit (EXIT_SUCCESS);
 865 }
 866
 867 static void
 868 print_help (argbuffer)
 869      argument *argbuffer;
 870 {
 871   bool help_for_lang = FALSE;
 872
 873   for (; argbuffer->arg_type != at_end; argbuffer++)
 874     if (argbuffer->arg_type == at_language)
 875       {
 876         if (help_for_lang)
 877           puts ("");
 878         puts (argbuffer->lang->help);
 879         help_for_lang = TRUE;
 880       }
 881
 882   if (help_for_lang)
 883     exit (EXIT_SUCCESS);
 884
 885   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 886 \n\
 887 These are the options accepted by %s.\n", progname, progname);
 888   if (LONG_OPTIONS)
 889     puts ("You may use unambiguous abbreviations for the long option names.");
 890   else
 891     puts ("Long option names do not work with this executable, as it is not\n\
 892 linked with GNU getopt.");
 893   puts ("  A - as file name means read names from stdin (one per line).\n\
 894 Absolute names are stored in the output file as they are.\n\
 895 Relative ones are stored relative to the output file's directory.\n");
 896
 897   puts ("-a, --append\n\
 898         Append tag entries to existing tags file.");
 899
 900   puts ("--packages-only\n\
 901         For Ada files, only generate tags for packages.");
 902
 903   if (CTAGS)
 904     puts ("-B, --backward-search\n\
 905         Write the search commands for the tag entries using '?', the\n\
 906         backward-search command instead of '/', the forward-search command.");
 907
 908   /* This option is mostly obsolete, because etags can now automatically
 909      detect C++.  Retained for backward compatibility and for debugging and
 910      experimentation.  In principle, we could want to tag as C++ even
 911      before any "class" or "template" keyword.
 912   puts ("-C, --c++\n\
 913         Treat files whose name suffix defaults to C language as C++ files.");
 914   */
 915
 916   puts ("--declarations\n\
 917         In C and derived languages, create tags for function declarations,");
 918   if (CTAGS)
 919     puts ("\tand create tags for extern variables if --globals is used.");
 920   else
 921     puts
 922       ("\tand create tags for extern variables unless --no-globals is used.");
 923
 924   if (CTAGS)
 925     puts ("-d, --defines\n\
 926         Create tag entries for C #define constants and enum constants, too.");
 927   else
 928     puts ("-D, --no-defines\n\
 929         Don't create tag entries for C #define constants and enum constants.\n\
 930         This makes the tags file smaller.");
 931
 932   if (!CTAGS)
 933     puts ("-i FILE, --include=FILE\n\
 934         Include a note in tag file indicating that, when searching for\n\
 935         a tag, one should also consult the tags file FILE after\n\
 936         checking the current file.");
 937
 938   puts ("-l LANG, --language=LANG\n\
 939         Force the following files to be considered as written in the\n\
 940         named language up to the next --language=LANG option.");
 941
 942   if (CTAGS)
 943     puts ("--globals\n\
 944         Create tag entries for global variables in some languages.");
 945   else
 946     puts ("--no-globals\n\
 947         Do not create tag entries for global variables in some\n\
 948         languages.  This makes the tags file smaller.");
 949   puts ("--members\n\
 950         Create tag entries for members of structures in some languages.");
 951
 952 #ifdef ETAGS_REGEXPS
 953   puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
 954         Make a tag for each line matching a regular expression pattern\n\
 955         in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
 956         files only.  REGEXFILE is a file containing one REGEXP per line.\n\
 957         REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
 958         optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
 959   puts ("       If TAGNAME/ is present, the tags created are named.\n\
 960         For example Tcl named tags can be created with:\n\
 961           --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
 962         MODS are optional one-letter modifiers: `i' means to ignore case,\n\
 963         `m' means to allow multi-line matches, `s' implies `m' and\n\
 964         causes dot to match any character, including newline.");
 965   puts ("-R, --no-regex\n\
 966         Don't create tags from regexps for the following files.");
 967 #endif /* ETAGS_REGEXPS */
 968   puts ("-I, --ignore-indentation\n\
 969         In C and C++ do not assume that a closing brace in the first\n\
 970         column is the final brace of a function or structure definition.");
 971   puts ("-o FILE, --output=FILE\n\
 972         Write the tags to FILE.");
 973   puts ("--parse-stdin=NAME\n\
 974         Read from standard input and record tags as belonging to file NAME.");
 975
 976   if (CTAGS)
 977     {
 978       puts ("-t, --typedefs\n\
 979         Generate tag entries for C and Ada typedefs.");
 980       puts ("-T, --typedefs-and-c++\n\
 981         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 982         and C++ member functions.");
 983     }
 984
 985   if (CTAGS)
 986     puts ("-u, --update\n\
 987         Update the tag entries for the given files, leaving tag\n\
 988         entries for other files in place.  Currently, this is\n\
 989         implemented by deleting the existing entries for the given\n\
 990         files and then rewriting the new entries at the end of the\n\
 991         tags file.  It is often faster to simply rebuild the entire\n\
 992         tag file than to use this.");
 993
 994   if (CTAGS)
 995     {
 996       puts ("-v, --vgrind\n\
 997         Generates an index of items intended for human consumption,\n\
 998         similar to the output of vgrind.  The index is sorted, and\n\
 999         gives the page number of each item.");
1000       puts ("-w, --no-warn\n\
1001         Suppress warning messages about entries defined in multiple\n\
1002         files.");
1003       puts ("-x, --cxref\n\
1004         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
1005         The output uses line numbers instead of page numbers, but\n\
1006         beyond that the differences are cosmetic; try both to see\n\
1007         which you like.");
1008     }
1009
1010   puts ("-V, --version\n\
1011         Print the version of the program.\n\
1012 -h, --help\n\
1013         Print this help message.\n\
1014         Followed by one or more `--language' options prints detailed\n\
1015         help about tag generation for the specified languages.");
1016
1017   print_language_names ();
1018
1019   puts ("");
1020   puts ("Report bugs to bug-gnu-emacs@gnu.org");
1021
1022   exit (EXIT_SUCCESS);
1023 }
1024
1025 \f
1026 #ifdef VMS                      /* VMS specific functions */
1027
1028 #define EOS     '\0'
1029
1030 /* This is a BUG!  ANY arbitrary limit is a BUG!
1031    Won't someone please fix this?  */
1032 #define MAX_FILE_SPEC_LEN       255
1033 typedef struct  {
1034   short   curlen;
1035   char    body[MAX_FILE_SPEC_LEN + 1];
1036 } vspec;
1037
1038 /*
1039  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
1040  returning in each successive call the next file name matching the input
1041  spec. The function expects that each in_spec passed
1042  to it will be processed to completion; in particular, up to and
1043  including the call following that in which the last matching name
1044  is returned, the function ignores the value of in_spec, and will
1045  only start processing a new spec with the following call.
1046  If an error occurs, on return out_spec contains the value
1047  of in_spec when the error occurred.
1048
1049  With each successive file name returned in out_spec, the
1050  function's return value is one. When there are no more matching
1051  names the function returns zero. If on the first call no file
1052  matches in_spec, or there is any other error, -1 is returned.
1053 */
1054
1055 #include        <rmsdef.h>
1056 #include        <descrip.h>
1057 #define         OUTSIZE MAX_FILE_SPEC_LEN
1058 static short
1059 fn_exp (out, in)
1060      vspec *out;
1061      char *in;
1062 {
1063   static long context = 0;
1064   static struct dsc$descriptor_s o;
1065   static struct dsc$descriptor_s i;
1066   static bool pass1 = TRUE;
1067   long status;
1068   short retval;
1069
1070   if (pass1)
1071     {
1072       pass1 = FALSE;
1073       o.dsc$a_pointer = (char *) out;
1074       o.dsc$w_length = (short)OUTSIZE;
1075       i.dsc$a_pointer = in;
1076       i.dsc$w_length = (short)strlen(in);
1077       i.dsc$b_dtype = DSC$K_DTYPE_T;
1078       i.dsc$b_class = DSC$K_CLASS_S;
1079       o.dsc$b_dtype = DSC$K_DTYPE_VT;
1080       o.dsc$b_class = DSC$K_CLASS_VS;
1081     }
1082   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
1083     {
1084       out->body[out->curlen] = EOS;
1085       return 1;
1086     }
1087   else if (status == RMS$_NMF)
1088     retval = 0;
1089   else
1090     {
1091       strcpy(out->body, in);
1092       retval = -1;
1093     }
1094   lib$find_file_end(&context);
1095   pass1 = TRUE;
1096   return retval;
1097 }
1098
1099 /*
1100   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
1101   name of each file specified by the provided arg expanding wildcards.
1102 */
1103 static char *
1104 gfnames (arg, p_error)
1105      char *arg;
1106      bool *p_error;
1107 {
1108   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
1109
1110   switch (fn_exp (&filename, arg))
1111     {
1112     case 1:
1113       *p_error = FALSE;
1114       return filename.body;
1115     case 0:
1116       *p_error = FALSE;
1117       return NULL;
1118     default:
1119       *p_error = TRUE;
1120       return filename.body;
1121     }
1122 }
1123
1124 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
1125 system (cmd)
1126      char *cmd;
1127 {
1128   error ("%s", "system() function not implemented under VMS");
1129 }
1130 #endif
1131
1132 #define VERSION_DELIM   ';'
1133 char *massage_name (s)
1134      char *s;
1135 {
1136   char *start = s;
1137
1138   for ( ; *s; s++)
1139     if (*s == VERSION_DELIM)
1140       {
1141         *s = EOS;
1142         break;
1143       }
1144     else
1145       *s = lowcase (*s);
1146   return start;
1147 }
1148 #endif /* VMS */
1149
1150 \f
1151 int
1152 main (argc, argv)
1153      int argc;
1154      char *argv[];
1155 {
1156   int i;
1157   unsigned int nincluded_files;
1158   char **included_files;
1159   argument *argbuffer;
1160   int current_arg, file_count;
1161   linebuffer filename_lb;
1162   bool help_asked = FALSE;
1163 #ifdef VMS
1164   bool got_err;
1165 #endif
1166  char *optstring;
1167  int opt;
1168
1169
1170 #ifdef DOS_NT
1171   _fmode = O_BINARY;   /* all of files are treated as binary files */
1172 #endif /* DOS_NT */
1173
1174   progname = argv[0];
1175   nincluded_files = 0;
1176   included_files = xnew (argc, char *);
1177   current_arg = 0;
1178   file_count = 0;
1179
1180   /* Allocate enough no matter what happens.  Overkill, but each one
1181      is small. */
1182   argbuffer = xnew (argc, argument);
1183
1184   /*
1185    * If etags, always find typedefs and structure tags.  Why not?
1186    * Also default to find macro constants, enum constants and
1187    * global variables.
1188    */
1189   if (!CTAGS)
1190     {
1191       typedefs = typedefs_or_cplusplus = constantypedefs = TRUE;
1192       globals = TRUE;
1193     }
1194
1195   /* When the optstring begins with a '-' getopt_long does not rearrange the
1196      non-options arguments to be at the end, but leaves them alone. */
1197   optstring = "-";
1198 #ifdef ETAGS_REGEXPS
1199   optstring = "-r:Rc:";
1200 #endif /* ETAGS_REGEXPS */
1201   if (!LONG_OPTIONS)
1202     optstring += 1;             /* remove the initial '-' */
1203   optstring = concat (optstring,
1204                       "aCf:Il:o:SVhH",
1205                       (CTAGS) ? "BxdtTuvw" : "Di:");
1206
1207   while ((opt = getopt_long (argc, argv, optstring, longopts, NULL)) != EOF)
1208     switch (opt)
1209       {
1210       case 0:
1211         /* If getopt returns 0, then it has already processed a
1212            long-named option.  We should do nothing.  */
1213         break;
1214
1215       case 1:
1216         /* This means that a file name has been seen.  Record it. */
1217         argbuffer[current_arg].arg_type = at_filename;
1218         argbuffer[current_arg].what     = optarg;
1219         ++current_arg;
1220         ++file_count;
1221         break;
1222
1223       case STDIN:
1224         /* Parse standard input.  Idea by Vivek <vivek@etla.org>. */
1225         argbuffer[current_arg].arg_type = at_stdin;
1226         argbuffer[current_arg].what     = optarg;
1227         ++current_arg;
1228         ++file_count;
1229         if (parsing_stdin)
1230           fatal ("cannot parse standard input more than once", (char *)NULL);
1231         parsing_stdin = TRUE;
1232         break;
1233
1234         /* Common options. */
1235       case 'a': append_to_tagfile = TRUE;       break;
1236       case 'C': cplusplus = TRUE;               break;
1237       case 'f':         /* for compatibility with old makefiles */
1238       case 'o':
1239         if (tagfile)
1240           {
1241             error ("-o option may only be given once.", (char *)NULL);
1242             suggest_asking_for_help ();
1243             /* NOTREACHED */
1244           }
1245         tagfile = optarg;
1246         break;
1247       case 'I':
1248       case 'S':         /* for backward compatibility */
1249         ignoreindent = TRUE;
1250         break;
1251       case 'l':
1252         {
1253           language *lang = get_language_from_langname (optarg);
1254           if (lang != NULL)
1255             {
1256               argbuffer[current_arg].lang = lang;
1257               argbuffer[current_arg].arg_type = at_language;
1258               ++current_arg;
1259             }
1260         }
1261         break;
1262       case 'c':
1263         /* Backward compatibility: support obsolete --ignore-case-regexp. */
1264         optarg = concat (optarg, "i", ""); /* memory leak here */
1265         /* FALLTHRU */
1266       case 'r':
1267         argbuffer[current_arg].arg_type = at_regexp;
1268         argbuffer[current_arg].what = optarg;
1269         ++current_arg;
1270         break;
1271       case 'R':
1272         argbuffer[current_arg].arg_type = at_regexp;
1273         argbuffer[current_arg].what = NULL;
1274         ++current_arg;
1275         break;
1276       case 'V':
1277         print_version ();
1278         break;
1279       case 'h':
1280       case 'H':
1281         help_asked = TRUE;
1282         break;
1283
1284         /* Etags options */
1285       case 'D': constantypedefs = FALSE;                        break;
1286       case 'i': included_files[nincluded_files++] = optarg;     break;
1287
1288         /* Ctags options. */
1289       case 'B': searchar = '?';                                 break;
1290       case 'd': constantypedefs = TRUE;                         break;
1291       case 't': typedefs = TRUE;                                break;
1292       case 'T': typedefs = typedefs_or_cplusplus = TRUE;        break;
1293       case 'u': update = TRUE;                                  break;
1294       case 'v': vgrind_style = TRUE;                      /*FALLTHRU*/
1295       case 'x': cxref_style = TRUE;                             break;
1296       case 'w': no_warnings = TRUE;                             break;
1297       default:
1298         suggest_asking_for_help ();
1299         /* NOTREACHED */
1300       }
1301
1302   /* No more options.  Store the rest of arguments. */
1303   for (; optind < argc; optind++)
1304     {
1305       argbuffer[current_arg].arg_type = at_filename;
1306       argbuffer[current_arg].what = argv[optind];
1307       ++current_arg;
1308       ++file_count;
1309     }
1310
1311   argbuffer[current_arg].arg_type = at_end;
1312
1313   if (help_asked)
1314     print_help (argbuffer);
1315     /* NOTREACHED */
1316
1317   if (nincluded_files == 0 && file_count == 0)
1318     {
1319       error ("no input files specified.", (char *)NULL);
1320       suggest_asking_for_help ();
1321       /* NOTREACHED */
1322     }
1323
1324   if (tagfile == NULL)
1325     tagfile = CTAGS ? "tags" : "TAGS";
1326   cwd = etags_getcwd ();        /* the current working directory */
1327   if (cwd[strlen (cwd) - 1] != '/')
1328     {
1329       char *oldcwd = cwd;
1330       cwd = concat (oldcwd, "/", "");
1331       free (oldcwd);
1332     }
1333   /* Relative file names are made relative to the current directory. */
1334   if (streq (tagfile, "-")
1335       || strneq (tagfile, "/dev/", 5))
1336     tagfiledir = cwd;
1337   else
1338     tagfiledir = absolute_dirname (tagfile, cwd);
1339
1340   init ();                      /* set up boolean "functions" */
1341
1342   linebuffer_init (&lb);
1343   linebuffer_init (&filename_lb);
1344   linebuffer_init (&filebuf);
1345   linebuffer_init (&token_name);
1346
1347   if (!CTAGS)
1348     {
1349       if (streq (tagfile, "-"))
1350         {
1351           tagf = stdout;
1352 #ifdef DOS_NT
1353           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1354              doesn't take effect until after `stdout' is already open). */
1355           if (!isatty (fileno (stdout)))
1356             setmode (fileno (stdout), O_BINARY);
1357 #endif /* DOS_NT */
1358         }
1359       else
1360         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1361       if (tagf == NULL)
1362         pfatal (tagfile);
1363     }
1364
1365   /*
1366    * Loop through files finding functions.
1367    */
1368   for (i = 0; i < current_arg; i++)
1369     {
1370       static language *lang;    /* non-NULL if language is forced */
1371       char *this_file;
1372
1373       switch (argbuffer[i].arg_type)
1374         {
1375         case at_language:
1376           lang = argbuffer[i].lang;
1377           break;
1378 #ifdef ETAGS_REGEXPS
1379         case at_regexp:
1380           analyse_regex (argbuffer[i].what);
1381           break;
1382 #endif
1383         case at_filename:
1384 #ifdef VMS
1385           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1386             {
1387               if (got_err)
1388                 {
1389                   error ("can't find file %s\n", this_file);
1390                   argc--, argv++;
1391                 }
1392               else
1393                 {
1394                   this_file = massage_name (this_file);
1395                 }
1396 #else
1397               this_file = argbuffer[i].what;
1398 #endif
1399               /* Input file named "-" means read file names from stdin
1400                  (one per line) and use them. */
1401               if (streq (this_file, "-"))
1402                 {
1403                   if (parsing_stdin)
1404                     fatal ("cannot parse standard input AND read file names from it",
1405                            (char *)NULL);
1406                   while (readline_internal (&filename_lb, stdin) > 0)
1407                     process_file_name (filename_lb.buffer, lang);
1408                 }
1409               else
1410                 process_file_name (this_file, lang);
1411 #ifdef VMS
1412             }
1413 #endif
1414           break;
1415         case at_stdin:
1416           this_file = argbuffer[i].what;
1417           process_file (stdin, this_file, lang);
1418           break;
1419         }
1420     }
1421
1422 #ifdef ETAGS_REGEXPS
1423   free_regexps ();
1424 #endif /* ETAGS_REGEXPS */
1425   free (lb.buffer);
1426   free (filebuf.buffer);
1427   free (token_name.buffer);
1428
1429   if (!CTAGS || cxref_style)
1430     {
1431       put_entries (nodehead);   /* write the remaining tags (ETAGS) */
1432       free_tree (nodehead);
1433       nodehead = NULL;
1434       if (!CTAGS)
1435         {
1436           fdesc *fdp;
1437
1438           /* Output file entries that have no tags. */
1439           for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1440             if (!fdp->written)
1441               fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
1442
1443           while (nincluded_files-- > 0)
1444             fprintf (tagf, "\f\n%s,include\n", *included_files++);
1445         }
1446
1447       if (fclose (tagf) == EOF)
1448         pfatal (tagfile);
1449       exit (EXIT_SUCCESS);
1450     }
1451
1452   if (update)
1453     {
1454       char cmd[BUFSIZ];
1455       for (i = 0; i < current_arg; ++i)
1456         {
1457           switch (argbuffer[i].arg_type)
1458             {
1459             case at_filename:
1460             case at_stdin:
1461               break;
1462             default:
1463               continue;         /* the for loop */
1464             }
1465           sprintf (cmd,
1466                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1467                    tagfile, argbuffer[i].what, tagfile);
1468           if (system (cmd) != EXIT_SUCCESS)
1469             fatal ("failed to execute shell command", (char *)NULL);
1470         }
1471       append_to_tagfile = TRUE;
1472     }
1473
1474   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1475   if (tagf == NULL)
1476     pfatal (tagfile);
1477   put_entries (nodehead);       /* write all the tags (CTAGS) */
1478   free_tree (nodehead);
1479   nodehead = NULL;
1480   if (fclose (tagf) == EOF)
1481     pfatal (tagfile);
1482
1483   if (CTAGS)
1484     if (append_to_tagfile || update)
1485       {
1486         char cmd[2*BUFSIZ+10];
1487         sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
1488         exit (system (cmd));
1489       }
1490   return EXIT_SUCCESS;
1491 }
1492
1493
1494 /*
1495  * Return a compressor given the file name.  If EXTPTR is non-zero,
1496  * return a pointer into FILE where the compressor-specific
1497  * extension begins.  If no compressor is found, NULL is returned
1498  * and EXTPTR is not significant.
1499  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca> (1998)
1500  */
1501 static compressor *
1502 get_compressor_from_suffix (file, extptr)
1503      char *file;
1504      char **extptr;
1505 {
1506   compressor *compr;
1507   char *slash, *suffix;
1508
1509   /* This relies on FN to be after canonicalize_filename,
1510      so we don't need to consider backslashes on DOS_NT.  */
1511   slash = etags_strrchr (file, '/');
1512   suffix = etags_strrchr (file, '.');
1513   if (suffix == NULL || suffix < slash)
1514     return NULL;
1515   if (extptr != NULL)
1516     *extptr = suffix;
1517   suffix += 1;
1518   /* Let those poor souls who live with DOS 8+3 file name limits get
1519      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1520      Only the first do loop is run if not MSDOS */
1521   do
1522     {
1523       for (compr = compressors; compr->suffix != NULL; compr++)
1524         if (streq (compr->suffix, suffix))
1525           return compr;
1526       if (!MSDOS)
1527         break;                  /* do it only once: not really a loop */
1528       if (extptr != NULL)
1529         *extptr = ++suffix;
1530     } while (*suffix != '\0');
1531   return NULL;
1532 }
1533
1534
1535
1536 /*
1537  * Return a language given the name.
1538  */
1539 static language *
1540 get_language_from_langname (name)
1541      const char *name;
1542 {
1543   language *lang;
1544
1545   if (name == NULL)
1546     error ("empty language name", (char *)NULL);
1547   else
1548     {
1549       for (lang = lang_names; lang->name != NULL; lang++)
1550         if (streq (name, lang->name))
1551           return lang;
1552       error ("unknown language \"%s\"", name);
1553     }
1554
1555   return NULL;
1556 }
1557
1558
1559 /*
1560  * Return a language given the interpreter name.
1561  */
1562 static language *
1563 get_language_from_interpreter (interpreter)
1564      char *interpreter;
1565 {
1566   language *lang;
1567   char **iname;
1568
1569   if (interpreter == NULL)
1570     return NULL;
1571   for (lang = lang_names; lang->name != NULL; lang++)
1572     if (lang->interpreters != NULL)
1573       for (iname = lang->interpreters; *iname != NULL; iname++)
1574         if (streq (*iname, interpreter))
1575             return lang;
1576
1577   return NULL;
1578 }
1579
1580
1581
1582 /*
1583  * Return a language given the file name.
1584  */
1585 static language *
1586 get_language_from_filename (file, case_sensitive)
1587      char *file;
1588      bool case_sensitive;
1589 {
1590   language *lang;
1591   char **name, **ext, *suffix;
1592
1593   /* Try whole file name first. */
1594   for (lang = lang_names; lang->name != NULL; lang++)
1595     if (lang->filenames != NULL)
1596       for (name = lang->filenames; *name != NULL; name++)
1597         if ((case_sensitive)
1598             ? streq (*name, file)
1599             : strcaseeq (*name, file))
1600           return lang;
1601
1602   /* If not found, try suffix after last dot. */
1603   suffix = etags_strrchr (file, '.');
1604   if (suffix == NULL)
1605     return NULL;
1606   suffix += 1;
1607   for (lang = lang_names; lang->name != NULL; lang++)
1608     if (lang->suffixes != NULL)
1609       for (ext = lang->suffixes; *ext != NULL; ext++)
1610         if ((case_sensitive)
1611             ? streq (*ext, suffix)
1612             : strcaseeq (*ext, suffix))
1613           return lang;
1614   return NULL;
1615 }
1616
1617 \f
1618 /*
1619  * This routine is called on each file argument.
1620  */
1621 static void
1622 process_file_name (file, lang)
1623      char *file;
1624      language *lang;
1625 {
1626   struct stat stat_buf;
1627   FILE *inf;
1628   fdesc *fdp;
1629   compressor *compr;
1630   char *compressed_name, *uncompressed_name;
1631   char *ext, *real_name;
1632   int retval;
1633
1634   canonicalize_filename (file);
1635   if (streq (file, tagfile) && !streq (tagfile, "-"))
1636     {
1637       error ("skipping inclusion of %s in self.", file);
1638       return;
1639     }
1640   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1641     {
1642       compressed_name = NULL;
1643       real_name = uncompressed_name = savestr (file);
1644     }
1645   else
1646     {
1647       real_name = compressed_name = savestr (file);
1648       uncompressed_name = savenstr (file, ext - file);
1649     }
1650
1651   /* If the canonicalized uncompressed name
1652      has already been dealt with, skip it silently. */
1653   for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
1654     {
1655       assert (fdp->infname != NULL);
1656       if (streq (uncompressed_name, fdp->infname))
1657         goto cleanup;
1658     }
1659
1660   if (stat (real_name, &stat_buf) != 0)
1661     {
1662       /* Reset real_name and try with a different name. */
1663       real_name = NULL;
1664       if (compressed_name != NULL) /* try with the given suffix */
1665         {
1666           if (stat (uncompressed_name, &stat_buf) == 0)
1667             real_name = uncompressed_name;
1668         }
1669       else                      /* try all possible suffixes */
1670         {
1671           for (compr = compressors; compr->suffix != NULL; compr++)
1672             {
1673               compressed_name = concat (file, ".", compr->suffix);
1674               if (stat (compressed_name, &stat_buf) != 0)
1675                 {
1676                   if (MSDOS)
1677                     {
1678                       char *suf = compressed_name + strlen (file);
1679                       size_t suflen = strlen (compr->suffix) + 1;
1680                       for ( ; suf[1]; suf++, suflen--)
1681                         {
1682                           memmove (suf, suf + 1, suflen);
1683                           if (stat (compressed_name, &stat_buf) == 0)
1684                             {
1685                               real_name = compressed_name;
1686                               break;
1687                             }
1688                         }
1689                       if (real_name != NULL)
1690                         break;
1691                     } /* MSDOS */
1692                   free (compressed_name);
1693                   compressed_name = NULL;
1694                 }
1695               else
1696                 {
1697                   real_name = compressed_name;
1698                   break;
1699                 }
1700             }
1701         }
1702       if (real_name == NULL)
1703         {
1704           perror (file);
1705           goto cleanup;
1706         }
1707     } /* try with a different name */
1708
1709   if (!S_ISREG (stat_buf.st_mode))
1710     {
1711       error ("skipping %s: it is not a regular file.", real_name);
1712       goto cleanup;
1713     }
1714   if (real_name == compressed_name)
1715     {
1716       char *cmd = concat (compr->command, " ", real_name);
1717       inf = (FILE *) popen (cmd, "r");
1718       free (cmd);
1719     }
1720   else
1721     inf = fopen (real_name, "r");
1722   if (inf == NULL)
1723     {
1724       perror (real_name);
1725       goto cleanup;
1726     }
1727
1728   process_file (inf, uncompressed_name, lang);
1729
1730   if (real_name == compressed_name)
1731     retval = pclose (inf);
1732   else
1733     retval = fclose (inf);
1734   if (retval < 0)
1735     pfatal (file);
1736
1737  cleanup:
1738   if (compressed_name) free (compressed_name);
1739   if (uncompressed_name) free (uncompressed_name);
1740   last_node = NULL;
1741   curfdp = NULL;
1742   return;
1743 }
1744
1745 static void
1746 process_file (fh, fn, lang)
1747      FILE *fh;
1748      char *fn;
1749      language *lang;
1750 {
1751   static const fdesc emptyfdesc;
1752   fdesc *fdp;
1753
1754   /* Create a new input file description entry. */
1755   fdp = xnew (1, fdesc);
1756   *fdp = emptyfdesc;
1757   fdp->next = fdhead;
1758   fdp->infname = savestr (fn);
1759   fdp->lang = lang;
1760   fdp->infabsname = absolute_filename (fn, cwd);
1761   fdp->infabsdir = absolute_dirname (fn, cwd);
1762   if (filename_is_absolute (fn))
1763     {
1764       /* An absolute file name.  Canonicalize it. */
1765       fdp->taggedfname = absolute_filename (fn, NULL);
1766     }
1767   else
1768     {
1769       /* A file name relative to cwd.  Make it relative
1770          to the directory of the tags file. */
1771       fdp->taggedfname = relative_filename (fn, tagfiledir);
1772     }
1773   fdp->usecharno = TRUE;        /* use char position when making tags */
1774   fdp->prop = NULL;
1775   fdp->written = FALSE;         /* not written on tags file yet */
1776
1777   fdhead = fdp;
1778   curfdp = fdhead;              /* the current file description */
1779
1780   find_entries (fh);
1781
1782   /* If not Ctags, and if this is not metasource and if it contained no #line
1783      directives, we can write the tags and free all nodes pointing to
1784      curfdp. */
1785   if (!CTAGS
1786       && curfdp->usecharno      /* no #line directives in this file */
1787       && !curfdp->lang->metasource)
1788     {
1789       node *np, *prev;
1790
1791       /* Look for the head of the sublist relative to this file.  See add_node
1792          for the structure of the node tree. */
1793       prev = NULL;
1794       for (np = nodehead; np != NULL; prev = np, np = np->left)
1795         if (np->fdp == curfdp)
1796           break;
1797
1798       /* If we generated tags for this file, write and delete them. */
1799       if (np != NULL)
1800         {
1801           /* This is the head of the last sublist, if any.  The following
1802              instructions depend on this being true. */
1803           assert (np->left == NULL);
1804
1805           assert (fdhead == curfdp);
1806           assert (last_node->fdp == curfdp);
1807           put_entries (np);     /* write tags for file curfdp->taggedfname */
1808           free_tree (np);       /* remove the written nodes */
1809           if (prev == NULL)
1810             nodehead = NULL;    /* no nodes left */
1811           else
1812             prev->left = NULL;  /* delete the pointer to the sublist */
1813         }
1814     }
1815 }
1816
1817 /*
1818  * This routine sets up the boolean pseudo-functions which work
1819  * by setting boolean flags dependent upon the corresponding character.
1820  * Every char which is NOT in that string is not a white char.  Therefore,
1821  * all of the array "_wht" is set to FALSE, and then the elements
1822  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1823  * of a char is TRUE if it is the string "white", else FALSE.
1824  */
1825 static void
1826 init ()
1827 {
1828   register char *sp;
1829   register int i;
1830
1831   for (i = 0; i < CHARS; i++)
1832     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1833   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1834   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1835   notinname('\0') = notinname('\n');
1836   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1837   begtoken('\0') = begtoken('\n');
1838   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1839   intoken('\0') = intoken('\n');
1840   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1841   endtoken('\0') = endtoken('\n');
1842 }
1843
1844 /*
1845  * This routine opens the specified file and calls the function
1846  * which finds the function and type definitions.
1847  */
1848 static void
1849 find_entries (inf)
1850      FILE *inf;
1851 {
1852   char *cp;
1853   language *lang = curfdp->lang;
1854   Lang_function *parser = NULL;
1855
1856   /* If user specified a language, use it. */
1857   if (lang != NULL && lang->function != NULL)
1858     {
1859       parser = lang->function;
1860     }
1861
1862   /* Else try to guess the language given the file name. */
1863   if (parser == NULL)
1864     {
1865       lang = get_language_from_filename (curfdp->infname, TRUE);
1866       if (lang != NULL && lang->function != NULL)
1867         {
1868           curfdp->lang = lang;
1869           parser = lang->function;
1870         }
1871     }
1872
1873   /* Else look for sharp-bang as the first two characters. */
1874   if (parser == NULL
1875       && readline_internal (&lb, inf) > 0
1876       && lb.len >= 2
1877       && lb.buffer[0] == '#'
1878       && lb.buffer[1] == '!')
1879     {
1880       char *lp;
1881
1882       /* Set lp to point at the first char after the last slash in the
1883          line or, if no slashes, at the first nonblank.  Then set cp to
1884          the first successive blank and terminate the string. */
1885       lp = etags_strrchr (lb.buffer+2, '/');
1886       if (lp != NULL)
1887         lp += 1;
1888       else
1889         lp = skip_spaces (lb.buffer + 2);
1890       cp = skip_non_spaces (lp);
1891       *cp = '\0';
1892
1893       if (strlen (lp) > 0)
1894         {
1895           lang = get_language_from_interpreter (lp);
1896           if (lang != NULL && lang->function != NULL)
1897             {
1898               curfdp->lang = lang;
1899               parser = lang->function;
1900             }
1901         }
1902     }
1903
1904   /* We rewind here, even if inf may be a pipe.  We fail if the
1905      length of the first line is longer than the pipe block size,
1906      which is unlikely. */
1907   rewind (inf);
1908
1909   /* Else try to guess the language given the case insensitive file name. */
1910   if (parser == NULL)
1911     {
1912       lang = get_language_from_filename (curfdp->infname, FALSE);
1913       if (lang != NULL && lang->function != NULL)
1914         {
1915           curfdp->lang = lang;
1916           parser = lang->function;
1917         }
1918     }
1919
1920   /* Else try Fortran or C. */
1921   if (parser == NULL)
1922     {
1923       node *old_last_node = last_node;
1924
1925       curfdp->lang = get_language_from_langname ("fortran");
1926       find_entries (inf);
1927
1928       if (old_last_node == last_node)
1929         /* No Fortran entries found.  Try C. */
1930         {
1931           /* We do not tag if rewind fails.
1932              Only the file name will be recorded in the tags file. */
1933           rewind (inf);
1934           curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
1935           find_entries (inf);
1936         }
1937       return;
1938     }
1939
1940   if (!no_line_directive
1941       && curfdp->lang != NULL && curfdp->lang->metasource)
1942     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
1943        file, or anyway we parsed a file that is automatically generated from
1944        this one.  If this is the case, the bingo.c file contained #line
1945        directives that generated tags pointing to this file.  Let's delete
1946        them all before parsing this file, which is the real source. */
1947     {
1948       fdesc **fdpp = &fdhead;
1949       while (*fdpp != NULL)
1950         if (*fdpp != curfdp
1951             && streq ((*fdpp)->taggedfname, curfdp->taggedfname))
1952           /* We found one of those!  We must delete both the file description
1953              and all tags referring to it. */
1954           {
1955             fdesc *badfdp = *fdpp;
1956
1957             /* Delete the tags referring to badfdp->taggedfname
1958                that were obtained from badfdp->infname. */
1959             invalidate_nodes (badfdp, &nodehead);
1960
1961             *fdpp = badfdp->next; /* remove the bad description from the list */
1962             free_fdesc (badfdp);
1963           }
1964         else
1965           fdpp = &(*fdpp)->next; /* advance the list pointer */
1966     }
1967
1968   assert (parser != NULL);
1969
1970   /* Generic initialisations before reading from file. */
1971   linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
1972
1973   /* Generic initialisations before parsing file with readline. */
1974   lineno = 0;                  /* reset global line number */
1975   charno = 0;                  /* reset global char number */
1976   linecharno = 0;              /* reset global char number of line start */
1977
1978   parser (inf);
1979
1980 #ifdef ETAGS_REGEXPS
1981   regex_tag_multiline ();
1982 #endif /* ETAGS_REGEXPS */
1983 }
1984
1985 \f
1986 /*
1987  * Check whether an implicitly named tag should be created,
1988  * then call `pfnote'.
1989  * NAME is a string that is internally copied by this function.
1990  *
1991  * TAGS format specification
1992  * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
1993  * The following is explained in some more detail in etc/ETAGS.EBNF.
1994  *
1995  * make_tag creates tags with "implicit tag names" (unnamed tags)
1996  * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
1997  *  1. NAME does not contain any of the characters in NONAM;
1998  *  2. LINESTART contains name as either a rightmost, or rightmost but
1999  *     one character, substring;
2000  *  3. the character, if any, immediately before NAME in LINESTART must
2001  *     be a character in NONAM;
2002  *  4. the character, if any, immediately after NAME in LINESTART must
2003  *     also be a character in NONAM.
2004  *
2005  * The implementation uses the notinname() macro, which recognises the
2006  * characters stored in the string `nonam'.
2007  * etags.el needs to use the same characters that are in NONAM.
2008  */
2009 static void
2010 make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
2011      char *name;                /* tag name, or NULL if unnamed */
2012      int namelen;               /* tag length */
2013      bool is_func;              /* tag is a function */
2014      char *linestart;           /* start of the line where tag is */
2015      int linelen;               /* length of the line where tag is */
2016      int lno;                   /* line number */
2017      long cno;                  /* character number */
2018 {
2019   bool named = (name != NULL && namelen > 0);
2020
2021   if (!CTAGS && named)          /* maybe set named to false */
2022     /* Let's try to make an implicit tag name, that is, create an unnamed tag
2023        such that etags.el can guess a name from it. */
2024     {
2025       int i;
2026       register char *cp = name;
2027
2028       for (i = 0; i < namelen; i++)
2029         if (notinname (*cp++))
2030           break;
2031       if (i == namelen)                         /* rule #1 */
2032         {
2033           cp = linestart + linelen - namelen;
2034           if (notinname (linestart[linelen-1]))
2035             cp -= 1;                            /* rule #4 */
2036           if (cp >= linestart                   /* rule #2 */
2037               && (cp == linestart
2038                   || notinname (cp[-1]))        /* rule #3 */
2039               && strneq (name, cp, namelen))    /* rule #2 */
2040             named = FALSE;      /* use implicit tag name */
2041         }
2042     }
2043
2044   if (named)
2045     name = savenstr (name, namelen);
2046   else
2047     name = NULL;
2048   pfnote (name, is_func, linestart, linelen, lno, cno);
2049 }
2050
2051 /* Record a tag. */
2052 static void
2053 pfnote (name, is_func, linestart, linelen, lno, cno)
2054      char *name;                /* tag name, or NULL if unnamed */
2055      bool is_func;              /* tag is a function */
2056      char *linestart;           /* start of the line where tag is */
2057      int linelen;               /* length of the line where tag is */
2058      int lno;                   /* line number */
2059      long cno;                  /* character number */
2060 {
2061   register node *np;
2062
2063   assert (name == NULL || name[0] != '\0');
2064   if (CTAGS && name == NULL)
2065     return;
2066
2067   np = xnew (1, node);
2068
2069   /* If ctags mode, change name "main" to M<thisfilename>. */
2070   if (CTAGS && !cxref_style && streq (name, "main"))
2071     {
2072       register char *fp = etags_strrchr (curfdp->taggedfname, '/');
2073       np->name = concat ("M", fp == NULL ? curfdp->taggedfname : fp + 1, "");
2074       fp = etags_strrchr (np->name, '.');
2075       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
2076         fp[0] = '\0';
2077     }
2078   else
2079     np->name = name;
2080   np->valid = TRUE;
2081   np->been_warned = FALSE;
2082   np->fdp = curfdp;
2083   np->is_func = is_func;
2084   np->lno = lno;
2085   if (np->fdp->usecharno)
2086     /* Our char numbers are 0-base, because of C language tradition?
2087        ctags compatibility?  old versions compatibility?   I don't know.
2088        Anyway, since emacs's are 1-base we expect etags.el to take care
2089        of the difference.  If we wanted to have 1-based numbers, we would
2090        uncomment the +1 below. */
2091     np->cno = cno /* + 1 */ ;
2092   else
2093     np->cno = invalidcharno;
2094   np->left = np->right = NULL;
2095   if (CTAGS && !cxref_style)
2096     {
2097       if (strlen (linestart) < 50)
2098         np->regex = concat (linestart, "$", "");
2099       else
2100         np->regex = savenstr (linestart, 50);
2101     }
2102   else
2103     np->regex = savenstr (linestart, linelen);
2104
2105   add_node (np, &nodehead);
2106 }
2107
2108 /*
2109  * free_tree ()
2110  *      recurse on left children, iterate on right children.
2111  */
2112 static void
2113 free_tree (np)
2114      register node *np;
2115 {
2116   while (np)
2117     {
2118       register node *node_right = np->right;
2119       free_tree (np->left);
2120       if (np->name != NULL)
2121         free (np->name);
2122       free (np->regex);
2123       free (np);
2124       np = node_right;
2125     }
2126 }
2127
2128 /*
2129  * free_fdesc ()
2130  *      delete a file description
2131  */
2132 static void
2133 free_fdesc (fdp)
2134      register fdesc *fdp;
2135 {
2136   if (fdp->infname != NULL) free (fdp->infname);
2137   if (fdp->infabsname != NULL) free (fdp->infabsname);
2138   if (fdp->infabsdir != NULL) free (fdp->infabsdir);
2139   if (fdp->taggedfname != NULL) free (fdp->taggedfname);
2140   if (fdp->prop != NULL) free (fdp->prop);
2141   free (fdp);
2142 }
2143
2144 /*
2145  * add_node ()
2146  *      Adds a node to the tree of nodes.  In etags mode, sort by file
2147  *      name.  In ctags mode, sort by tag name.  Make no attempt at
2148  *      balancing.
2149  *
2150  *      add_node is the only function allowed to add nodes, so it can
2151  *      maintain state.
2152  */
2153 static void
2154 add_node (np, cur_node_p)
2155      node *np, **cur_node_p;
2156 {
2157   register int dif;
2158   register node *cur_node = *cur_node_p;
2159
2160   if (cur_node == NULL)
2161     {
2162       *cur_node_p = np;
2163       last_node = np;
2164       return;
2165     }
2166
2167   if (!CTAGS)
2168     /* Etags Mode */
2169     {
2170       /* For each file name, tags are in a linked sublist on the right
2171          pointer.  The first tags of different files are a linked list
2172          on the left pointer.  last_node points to the end of the last
2173          used sublist. */
2174       if (last_node != NULL && last_node->fdp == np->fdp)
2175         {
2176           /* Let's use the same sublist as the last added node. */
2177           assert (last_node->right == NULL);
2178           last_node->right = np;
2179           last_node = np;
2180         }
2181       else if (cur_node->fdp == np->fdp)
2182         {
2183           /* Scanning the list we found the head of a sublist which is
2184              good for us.  Let's scan this sublist. */
2185           add_node (np, &cur_node->right);
2186         }
2187       else
2188         /* The head of this sublist is not good for us.  Let's try the
2189            next one. */
2190         add_node (np, &cur_node->left);
2191     } /* if ETAGS mode */
2192
2193   else
2194     {
2195       /* Ctags Mode */
2196       dif = strcmp (np->name, cur_node->name);
2197
2198       /*
2199        * If this tag name matches an existing one, then
2200        * do not add the node, but maybe print a warning.
2201        */
2202       if (!dif)
2203         {
2204           if (np->fdp == cur_node->fdp)
2205             {
2206               if (!no_warnings)
2207                 {
2208                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
2209                            np->fdp->infname, lineno, np->name);
2210                   fprintf (stderr, "Second entry ignored\n");
2211                 }
2212             }
2213           else if (!cur_node->been_warned && !no_warnings)
2214             {
2215               fprintf
2216                 (stderr,
2217                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
2218                  np->fdp->infname, cur_node->fdp->infname, np->name);
2219               cur_node->been_warned = TRUE;
2220             }
2221           return;
2222         }
2223
2224       /* Actually add the node */
2225       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
2226     } /* if CTAGS mode */
2227 }
2228
2229 /*
2230  * invalidate_nodes ()
2231  *      Scan the node tree and invalidate all nodes pointing to the
2232  *      given file description (CTAGS case) or free them (ETAGS case).
2233  */
2234 static void
2235 invalidate_nodes (badfdp, npp)
2236      fdesc *badfdp;
2237      node **npp;
2238 {
2239   node *np = *npp;
2240
2241   if (np == NULL)
2242     return;
2243
2244   if (CTAGS)
2245     {
2246       if (np->left != NULL)
2247         invalidate_nodes (badfdp, &np->left);
2248       if (np->fdp == badfdp)
2249         np->valid = FALSE;
2250       if (np->right != NULL)
2251         invalidate_nodes (badfdp, &np->right);
2252     }
2253   else
2254     {
2255       assert (np->fdp != NULL);
2256       if (np->fdp == badfdp)
2257         {
2258           *npp = np->left;      /* detach the sublist from the list */
2259           np->left = NULL;      /* isolate it */
2260           free_tree (np);       /* free it */
2261           invalidate_nodes (badfdp, npp);
2262         }
2263       else
2264         invalidate_nodes (badfdp, &np->left);
2265     }
2266 }
2267
2268 \f
2269 static int total_size_of_entries __P((node *));
2270 static int number_len __P((long));
2271
2272 /* Length of a non-negative number's decimal representation. */
2273 static int
2274 number_len (num)
2275      long num;
2276 {
2277   int len = 1;
2278   while ((num /= 10) > 0)
2279     len += 1;
2280   return len;
2281 }
2282
2283 /*
2284  * Return total number of characters that put_entries will output for
2285  * the nodes in the linked list at the right of the specified node.
2286  * This count is irrelevant with etags.el since emacs 19.34 at least,
2287  * but is still supplied for backward compatibility.
2288  */
2289 static int
2290 total_size_of_entries (np)
2291      register node *np;
2292 {
2293   register int total = 0;
2294
2295   for (; np != NULL; np = np->right)
2296     if (np->valid)
2297       {
2298         total += strlen (np->regex) + 1;                /* pat\177 */
2299         if (np->name != NULL)
2300           total += strlen (np->name) + 1;               /* name\001 */
2301         total += number_len ((long) np->lno) + 1;       /* lno, */
2302         if (np->cno != invalidcharno)                   /* cno */
2303           total += number_len (np->cno);
2304         total += 1;                                     /* newline */
2305       }
2306
2307   return total;
2308 }
2309
2310 static void
2311 put_entries (np)
2312      register node *np;
2313 {
2314   register char *sp;
2315   static fdesc *fdp = NULL;
2316
2317   if (np == NULL)
2318     return;
2319
2320   /* Output subentries that precede this one */
2321   if (CTAGS)
2322     put_entries (np->left);
2323
2324   /* Output this entry */
2325   if (np->valid)
2326     {
2327       if (!CTAGS)
2328         {
2329           /* Etags mode */
2330           if (fdp != np->fdp)
2331             {
2332               fdp = np->fdp;
2333               fprintf (tagf, "\f\n%s,%d\n",
2334                        fdp->taggedfname, total_size_of_entries (np));
2335               fdp->written = TRUE;
2336             }
2337           fputs (np->regex, tagf);
2338           fputc ('\177', tagf);
2339           if (np->name != NULL)
2340             {
2341               fputs (np->name, tagf);
2342               fputc ('\001', tagf);
2343             }
2344           fprintf (tagf, "%d,", np->lno);
2345           if (np->cno != invalidcharno)
2346             fprintf (tagf, "%ld", np->cno);
2347           fputs ("\n", tagf);
2348         }
2349       else
2350         {
2351           /* Ctags mode */
2352           if (np->name == NULL)
2353             error ("internal error: NULL name in ctags mode.", (char *)NULL);
2354
2355           if (cxref_style)
2356             {
2357               if (vgrind_style)
2358                 fprintf (stdout, "%s %s %d\n",
2359                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
2360               else
2361                 fprintf (stdout, "%-16s %3d %-16s %s\n",
2362                          np->name, np->lno, np->fdp->taggedfname, np->regex);
2363             }
2364           else
2365             {
2366               fprintf (tagf, "%s\t%s\t", np->name, np->fdp->taggedfname);
2367
2368               if (np->is_func)
2369                 {               /* function or #define macro with args */
2370                   putc (searchar, tagf);
2371                   putc ('^', tagf);
2372
2373                   for (sp = np->regex; *sp; sp++)
2374                     {
2375                       if (*sp == '\\' || *sp == searchar)
2376                         putc ('\\', tagf);
2377                       putc (*sp, tagf);
2378                     }
2379                   putc (searchar, tagf);
2380                 }
2381               else
2382                 {               /* anything else; text pattern inadequate */
2383                   fprintf (tagf, "%d", np->lno);
2384                 }
2385               putc ('\n', tagf);
2386             }
2387         }
2388     } /* if this node contains a valid tag */
2389
2390   /* Output subentries that follow this one */
2391   put_entries (np->right);
2392   if (!CTAGS)
2393     put_entries (np->left);
2394 }
2395
2396 \f
2397 /* C extensions. */
2398 #define C_EXT   0x00fff         /* C extensions */
2399 #define C_PLAIN 0x00000         /* C */
2400 #define C_PLPL  0x00001         /* C++ */
2401 #define C_STAR  0x00003         /* C* */
2402 #define C_JAVA  0x00005         /* JAVA */
2403 #define C_AUTO  0x01000         /* C, but switch to C++ if `class' is met */
2404 #define YACC    0x10000         /* yacc file */
2405
2406 /*
2407  * The C symbol tables.
2408  */
2409 enum sym_type
2410 {
2411   st_none,
2412   st_C_objprot, st_C_objimpl, st_C_objend,
2413   st_C_gnumacro,
2414   st_C_ignore, st_C_attribute,
2415   st_C_javastruct,
2416   st_C_operator,
2417   st_C_class, st_C_template,
2418   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef
2419 };
2420
2421 static unsigned int hash __P((const char *, unsigned int));
2422 static struct C_stab_entry * in_word_set __P((const char *, unsigned int));
2423 static enum sym_type C_symtype __P((char *, int, int));
2424
2425 /* Feed stuff between (but not including) %[ and %] lines to:
2426      gperf -m 5
2427 %[
2428 %compare-strncmp
2429 %enum
2430 %struct-type
2431 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
2432 %%
2433 if,             0,                      st_C_ignore
2434 for,            0,                      st_C_ignore
2435 while,          0,                      st_C_ignore
2436 switch,         0,                      st_C_ignore
2437 return,         0,                      st_C_ignore
2438 __attribute__,  0,                      st_C_attribute
2439 @interface,     0,                      st_C_objprot
2440 @protocol,      0,                      st_C_objprot
2441 @implementation,0,                      st_C_objimpl
2442 @end,           0,                      st_C_objend
2443 import,         (C_JAVA & !C_PLPL),     st_C_ignore
2444 package,        (C_JAVA & !C_PLPL),     st_C_ignore
2445 friend,         C_PLPL,                 st_C_ignore
2446 extends,        (C_JAVA & !C_PLPL),     st_C_javastruct
2447 implements,     (C_JAVA & !C_PLPL),     st_C_javastruct
2448 interface,      (C_JAVA & !C_PLPL),     st_C_struct
2449 class,          0,                      st_C_class
2450 namespace,      C_PLPL,                 st_C_struct
2451 domain,         C_STAR,                 st_C_struct
2452 union,          0,                      st_C_struct
2453 struct,         0,                      st_C_struct
2454 extern,         0,                      st_C_extern
2455 enum,           0,                      st_C_enum
2456 typedef,        0,                      st_C_typedef
2457 define,         0,                      st_C_define
2458 operator,       C_PLPL,                 st_C_operator
2459 template,       0,                      st_C_template
2460 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
2461 DEFUN,          0,                      st_C_gnumacro
2462 SYSCALL,        0,                      st_C_gnumacro
2463 ENTRY,          0,                      st_C_gnumacro
2464 PSEUDO,         0,                      st_C_gnumacro
2465 # These are defined inside C functions, so currently they are not met.
2466 # EXFUN used in glibc, DEFVAR_* in emacs.
2467 #EXFUN,         0,                      st_C_gnumacro
2468 #DEFVAR_,       0,                      st_C_gnumacro
2469 %]
2470 and replace lines between %< and %> with its output, then:
2471  - remove the #if characterset check
2472  - make in_word_set static and not inline. */
2473 /*%<*/
2474 /* C code produced by gperf version 3.0.1 */
2475 /* Command-line: gperf -m 5  */
2476 /* Computed positions: -k'1-2' */
2477
2478 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
2479 /* maximum key range = 31, duplicates = 0 */
2480
2481 #ifdef __GNUC__
2482 __inline
2483 #else
2484 #ifdef __cplusplus
2485 inline
2486 #endif
2487 #endif
2488 static unsigned int
2489 hash (str, len)
2490      register const char *str;
2491      register unsigned int len;
2492 {
2493   static unsigned char asso_values[] =
2494     {
2495       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2496       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2497       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2498       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2499       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2500       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2501       34, 34, 34, 34,  1, 34, 34, 34, 14, 14,
2502       34, 34, 34, 34, 34, 34, 34, 34, 13, 34,
2503       13, 34, 34, 12, 34, 34, 34, 34, 34, 11,
2504       34, 34, 34, 34, 34,  8, 34, 11, 34, 12,
2505       11,  0,  1, 34,  7,  0, 34, 34, 11,  9,
2506        0,  4,  0, 34,  7,  4, 14, 21, 34, 15,
2507        0,  2, 34, 34, 34, 34, 34, 34, 34, 34,
2508       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2509       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2510       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2511       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2512       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2513       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2514       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2515       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2516       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2517       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2518       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2519       34, 34, 34, 34, 34, 34, 34, 34, 34, 34,
2520       34, 34, 34, 34, 34, 34
2521     };
2522   return len + asso_values[(unsigned char)str[1]] + asso_values[(unsigned char)str[0]];
2523 }
2524
2525 static struct C_stab_entry *
2526 in_word_set (str, len)
2527      register const char *str;
2528      register unsigned int len;
2529 {
2530   enum
2531     {
2532       TOTAL_KEYWORDS = 31,
2533       MIN_WORD_LENGTH = 2,
2534       MAX_WORD_LENGTH = 15,
2535       MIN_HASH_VALUE = 3,
2536       MAX_HASH_VALUE = 33
2537     };
2538
2539   static struct C_stab_entry wordlist[] =
2540     {
2541       {""}, {""}, {""},
2542       {"if",            0,                      st_C_ignore},
2543       {"enum",          0,                      st_C_enum},
2544       {"@end",          0,                      st_C_objend},
2545       {"extern",                0,                      st_C_extern},
2546       {"extends",       (C_JAVA & !C_PLPL),     st_C_javastruct},
2547       {"for",           0,                      st_C_ignore},
2548       {"interface",     (C_JAVA & !C_PLPL),     st_C_struct},
2549       {"@protocol",     0,                      st_C_objprot},
2550       {"@interface",    0,                      st_C_objprot},
2551       {"operator",      C_PLPL,                 st_C_operator},
2552       {"return",                0,                      st_C_ignore},
2553       {"friend",                C_PLPL,                 st_C_ignore},
2554       {"import",                (C_JAVA & !C_PLPL),     st_C_ignore},
2555       {"@implementation",0,                     st_C_objimpl},
2556       {"define",                0,                      st_C_define},
2557       {"package",       (C_JAVA & !C_PLPL),     st_C_ignore},
2558       {"implements",    (C_JAVA & !C_PLPL),     st_C_javastruct},
2559       {"namespace",     C_PLPL,                 st_C_struct},
2560       {"domain",                C_STAR,                 st_C_struct},
2561       {"template",      0,                      st_C_template},
2562       {"typedef",       0,                      st_C_typedef},
2563       {"struct",                0,                      st_C_struct},
2564       {"switch",                0,                      st_C_ignore},
2565       {"union",         0,                      st_C_struct},
2566       {"while",         0,                      st_C_ignore},
2567       {"class",         0,                      st_C_class},
2568       {"__attribute__", 0,                      st_C_attribute},
2569       {"SYSCALL",       0,                      st_C_gnumacro},
2570       {"PSEUDO",                0,                      st_C_gnumacro},
2571       {"ENTRY",         0,                      st_C_gnumacro},
2572       {"DEFUN",         0,                      st_C_gnumacro}
2573     };
2574
2575   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2576     {
2577       register int key = hash (str, len);
2578
2579       if (key <= MAX_HASH_VALUE && key >= 0)
2580         {
2581           register const char *s = wordlist[key].name;
2582
2583           if (*str == *s && !strncmp (str + 1, s + 1, len - 1) && s[len] == '\0')
2584             return &wordlist[key];
2585         }
2586     }
2587   return 0;
2588 }
2589 /*%>*/
2590
2591 static enum sym_type
2592 C_symtype (str, len, c_ext)
2593      char *str;
2594      int len;
2595      int c_ext;
2596 {
2597   register struct C_stab_entry *se = in_word_set (str, len);
2598
2599   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2600     return st_none;
2601   return se->type;
2602 }
2603
2604 \f
2605 /*
2606  * Ignoring __attribute__ ((list))
2607  */
2608 static bool inattribute;        /* looking at an __attribute__ construct */
2609
2610 /*
2611  * C functions and variables are recognized using a simple
2612  * finite automaton.  fvdef is its state variable.
2613  */
2614 static enum
2615 {
2616   fvnone,                       /* nothing seen */
2617   fdefunkey,                    /* Emacs DEFUN keyword seen */
2618   fdefunname,                   /* Emacs DEFUN name seen */
2619   foperator,                    /* func: operator keyword seen (cplpl) */
2620   fvnameseen,                   /* function or variable name seen */
2621   fstartlist,                   /* func: just after open parenthesis */
2622   finlist,                      /* func: in parameter list */
2623   flistseen,                    /* func: after parameter list */
2624   fignore,                      /* func: before open brace */
2625   vignore                       /* var-like: ignore until ';' */
2626 } fvdef;
2627
2628 static bool fvextern;           /* func or var: extern keyword seen; */
2629
2630 /*
2631  * typedefs are recognized using a simple finite automaton.
2632  * typdef is its state variable.
2633  */
2634 static enum
2635 {
2636   tnone,                        /* nothing seen */
2637   tkeyseen,                     /* typedef keyword seen */
2638   ttypeseen,                    /* defined type seen */
2639   tinbody,                      /* inside typedef body */
2640   tend,                         /* just before typedef tag */
2641   tignore                       /* junk after typedef tag */
2642 } typdef;
2643
2644 /*
2645  * struct-like structures (enum, struct and union) are recognized
2646  * using another simple finite automaton.  `structdef' is its state
2647  * variable.
2648  */
2649 static enum
2650 {
2651   snone,                        /* nothing seen yet,
2652                                    or in struct body if bracelev > 0 */
2653   skeyseen,                     /* struct-like keyword seen */
2654   stagseen,                     /* struct-like tag seen */
2655   scolonseen                    /* colon seen after struct-like tag */
2656 } structdef;
2657
2658 /*
2659  * When objdef is different from onone, objtag is the name of the class.
2660  */
2661 static char *objtag = "<uninited>";
2662
2663 /*
2664  * Yet another little state machine to deal with preprocessor lines.
2665  */
2666 static enum
2667 {
2668   dnone,                        /* nothing seen */
2669   dsharpseen,                   /* '#' seen as first char on line */
2670   ddefineseen,                  /* '#' and 'define' seen */
2671   dignorerest                   /* ignore rest of line */
2672 } definedef;
2673
2674 /*
2675  * State machine for Objective C protocols and implementations.
2676  * Idea by Tom R.Hageman <tom@basil.icce.rug.nl> (1995)
2677  */
2678 static enum
2679 {
2680   onone,                        /* nothing seen */
2681   oprotocol,                    /* @interface or @protocol seen */
2682   oimplementation,              /* @implementations seen */
2683   otagseen,                     /* class name seen */
2684   oparenseen,                   /* parenthesis before category seen */
2685   ocatseen,                     /* category name seen */
2686   oinbody,                      /* in @implementation body */
2687   omethodsign,                  /* in @implementation body, after +/- */
2688   omethodtag,                   /* after method name */
2689   omethodcolon,                 /* after method colon */
2690   omethodparm,                  /* after method parameter */
2691   oignore                       /* wait for @end */
2692 } objdef;
2693
2694
2695 /*
2696  * Use this structure to keep info about the token read, and how it
2697  * should be tagged.  Used by the make_C_tag function to build a tag.
2698  */
2699 static struct tok
2700 {
2701   char *line;                   /* string containing the token */
2702   int offset;                   /* where the token starts in LINE */
2703   int length;                   /* token length */
2704   /*
2705     The previous members can be used to pass strings around for generic
2706     purposes.  The following ones specifically refer to creating tags.  In this
2707     case the token contained here is the pattern that will be used to create a
2708     tag.
2709   */
2710   bool valid;                   /* do not create a tag; the token should be
2711                                    invalidated whenever a state machine is
2712                                    reset prematurely */
2713   bool named;                   /* create a named tag */
2714   int lineno;                   /* source line number of tag */
2715   long linepos;                 /* source char number of tag */
2716 } token;                        /* latest token read */
2717
2718 /*
2719  * Variables and functions for dealing with nested structures.
2720  * Idea by Mykola Dzyuba <mdzyuba@yahoo.com> (2001)
2721  */
2722 static void pushclass_above __P((int, char *, int));
2723 static void popclass_above __P((int));
2724 static void write_classname __P((linebuffer *, char *qualifier));
2725
2726 static struct {
2727   char **cname;                 /* nested class names */
2728   int *bracelev;                /* nested class brace level */
2729   int nl;                       /* class nesting level (elements used) */
2730   int size;                     /* length of the array */
2731 } cstack;                       /* stack for nested declaration tags */
2732 /* Current struct nesting depth (namespace, class, struct, union, enum). */
2733 #define nestlev         (cstack.nl)
2734 /* After struct keyword or in struct body, not inside a nested function. */
2735 #define instruct        (structdef == snone && nestlev > 0                      \
2736                          && bracelev == cstack.bracelev[nestlev-1] + 1)
2737
2738 static void
2739 pushclass_above (bracelev, str, len)
2740      int bracelev;
2741      char *str;
2742      int len;
2743 {
2744   int nl;
2745
2746   popclass_above (bracelev);
2747   nl = cstack.nl;
2748   if (nl >= cstack.size)
2749     {
2750       int size = cstack.size *= 2;
2751       xrnew (cstack.cname, size, char *);
2752       xrnew (cstack.bracelev, size, int);
2753     }
2754   assert (nl == 0 || cstack.bracelev[nl-1] < bracelev);
2755   cstack.cname[nl] = (str == NULL) ? NULL : savenstr (str, len);
2756   cstack.bracelev[nl] = bracelev;
2757   cstack.nl = nl + 1;
2758 }
2759
2760 static void
2761 popclass_above (bracelev)
2762      int bracelev;
2763 {
2764   int nl;
2765
2766   for (nl = cstack.nl - 1;
2767        nl >= 0 && cstack.bracelev[nl] >= bracelev;
2768        nl--)
2769     {
2770       if (cstack.cname[nl] != NULL)
2771         free (cstack.cname[nl]);
2772       cstack.nl = nl;
2773     }
2774 }
2775
2776 static void
2777 write_classname (cn, qualifier)
2778      linebuffer *cn;
2779      char *qualifier;
2780 {
2781   int i, len;
2782   int qlen = strlen (qualifier);
2783
2784   if (cstack.nl == 0 || cstack.cname[0] == NULL)
2785     {
2786       len = 0;
2787       cn->len = 0;
2788       cn->buffer[0] = '\0';
2789     }
2790   else
2791     {
2792       len = strlen (cstack.cname[0]);
2793       linebuffer_setlen (cn, len);
2794       strcpy (cn->buffer, cstack.cname[0]);
2795     }
2796   for (i = 1; i < cstack.nl; i++)
2797     {
2798       char *s;
2799       int slen;
2800
2801       s = cstack.cname[i];
2802       if (s == NULL)
2803         continue;
2804       slen = strlen (s);
2805       len += slen + qlen;
2806       linebuffer_setlen (cn, len);
2807       strncat (cn->buffer, qualifier, qlen);
2808       strncat (cn->buffer, s, slen);
2809     }
2810 }
2811
2812 \f
2813 static bool consider_token __P((char *, int, int, int *, int, int, bool *));
2814 static void make_C_tag __P((bool));
2815
2816 /*
2817  * consider_token ()
2818  *      checks to see if the current token is at the start of a
2819  *      function or variable, or corresponds to a typedef, or
2820  *      is a struct/union/enum tag, or #define, or an enum constant.
2821  *
2822  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2823  *      with args.  C_EXTP points to which language we are looking at.
2824  *
2825  * Globals
2826  *      fvdef                   IN OUT
2827  *      structdef               IN OUT
2828  *      definedef               IN OUT
2829  *      typdef                  IN OUT
2830  *      objdef                  IN OUT
2831  */
2832
2833 static bool
2834 consider_token (str, len, c, c_extp, bracelev, parlev, is_func_or_var)
2835      register char *str;        /* IN: token pointer */
2836      register int len;          /* IN: token length */
2837      register int c;            /* IN: first char after the token */
2838      int *c_extp;               /* IN, OUT: C extensions mask */
2839      int bracelev;              /* IN: brace level */
2840      int parlev;                /* IN: parenthesis level */
2841      bool *is_func_or_var;      /* OUT: function or variable found */
2842 {
2843   /* When structdef is stagseen, scolonseen, or snone with bracelev > 0,
2844      structtype is the type of the preceding struct-like keyword, and
2845      structbracelev is the brace level where it has been seen. */
2846   static enum sym_type structtype;
2847   static int structbracelev;
2848   static enum sym_type toktype;
2849
2850
2851   toktype = C_symtype (str, len, *c_extp);
2852
2853   /*
2854    * Skip __attribute__
2855    */
2856   if (toktype == st_C_attribute)
2857     {
2858       inattribute = TRUE;
2859       return FALSE;
2860      }
2861
2862    /*
2863     * Advance the definedef state machine.
2864     */
2865    switch (definedef)
2866      {
2867      case dnone:
2868        /* We're not on a preprocessor line. */
2869        if (toktype == st_C_gnumacro)
2870          {
2871            fvdef = fdefunkey;
2872            return FALSE;
2873          }
2874        break;
2875      case dsharpseen:
2876        if (toktype == st_C_define)
2877          {
2878            definedef = ddefineseen;
2879          }
2880        else
2881          {
2882            definedef = dignorerest;
2883          }
2884        return FALSE;
2885      case ddefineseen:
2886        /*
2887         * Make a tag for any macro, unless it is a constant
2888         * and constantypedefs is FALSE.
2889         */
2890        definedef = dignorerest;
2891        *is_func_or_var = (c == '(');
2892        if (!*is_func_or_var && !constantypedefs)
2893          return FALSE;
2894        else
2895          return TRUE;
2896      case dignorerest:
2897        return FALSE;
2898      default:
2899        error ("internal error: definedef value.", (char *)NULL);
2900      }
2901
2902    /*
2903     * Now typedefs
2904     */
2905    switch (typdef)
2906      {
2907      case tnone:
2908        if (toktype == st_C_typedef)
2909          {
2910            if (typedefs)
2911              typdef = tkeyseen;
2912            fvextern = FALSE;
2913            fvdef = fvnone;
2914            return FALSE;
2915          }
2916        break;
2917      case tkeyseen:
2918        switch (toktype)
2919          {
2920          case st_none:
2921          case st_C_class:
2922          case st_C_struct:
2923          case st_C_enum:
2924            typdef = ttypeseen;
2925          }
2926        break;
2927      case ttypeseen:
2928        if (structdef == snone && fvdef == fvnone)
2929          {
2930            fvdef = fvnameseen;
2931            return TRUE;
2932          }
2933        break;
2934      case tend:
2935        switch (toktype)
2936          {
2937          case st_C_class:
2938          case st_C_struct:
2939          case st_C_enum:
2940            return FALSE;
2941          }
2942        return TRUE;
2943      }
2944
2945    /*
2946     * This structdef business is NOT invoked when we are ctags and the
2947     * file is plain C.  This is because a struct tag may have the same
2948     * name as another tag, and this loses with ctags.
2949     */
2950    switch (toktype)
2951      {
2952      case st_C_javastruct:
2953        if (structdef == stagseen)
2954          structdef = scolonseen;
2955        return FALSE;
2956      case st_C_template:
2957      case st_C_class:
2958        if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
2959            && bracelev == 0
2960            && definedef == dnone && structdef == snone
2961            && typdef == tnone && fvdef == fvnone)
2962          *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
2963        if (toktype == st_C_template)
2964          break;
2965        /* FALLTHRU */
2966      case st_C_struct:
2967      case st_C_enum:
2968        if (parlev == 0
2969            && fvdef != vignore
2970            && (typdef == tkeyseen
2971                || (typedefs_or_cplusplus && structdef == snone)))
2972          {
2973            structdef = skeyseen;
2974            structtype = toktype;
2975            structbracelev = bracelev;
2976            if (fvdef == fvnameseen)
2977              fvdef = fvnone;
2978          }
2979        return FALSE;
2980      }
2981
2982    if (structdef == skeyseen)
2983      {
2984        structdef = stagseen;
2985        return TRUE;
2986      }
2987
2988    if (typdef != tnone)
2989      definedef = dnone;
2990
2991    /* Detect Objective C constructs. */
2992    switch (objdef)
2993      {
2994      case onone:
2995        switch (toktype)
2996          {
2997          case st_C_objprot:
2998            objdef = oprotocol;
2999            return FALSE;
3000          case st_C_objimpl:
3001            objdef = oimplementation;
3002            return FALSE;
3003          }
3004        break;
3005      case oimplementation:
3006        /* Save the class tag for functions or variables defined inside. */
3007        objtag = savenstr (str, len);
3008        objdef = oinbody;
3009        return FALSE;
3010      case oprotocol:
3011        /* Save the class tag for categories. */
3012        objtag = savenstr (str, len);
3013        objdef = otagseen;
3014        *is_func_or_var = TRUE;
3015        return TRUE;
3016      case oparenseen:
3017        objdef = ocatseen;
3018        *is_func_or_var = TRUE;
3019        return TRUE;
3020      case oinbody:
3021        break;
3022      case omethodsign:
3023        if (parlev == 0)
3024          {
3025            fvdef = fvnone;
3026            objdef = omethodtag;
3027            linebuffer_setlen (&token_name, len);
3028            strncpy (token_name.buffer, str, len);
3029            token_name.buffer[len] = '\0';
3030            return TRUE;
3031          }
3032        return FALSE;
3033      case omethodcolon:
3034        if (parlev == 0)
3035          objdef = omethodparm;
3036        return FALSE;
3037      case omethodparm:
3038        if (parlev == 0)
3039          {
3040            fvdef = fvnone;
3041            objdef = omethodtag;
3042            linebuffer_setlen (&token_name, token_name.len + len);
3043            strncat (token_name.buffer, str, len);
3044            return TRUE;
3045          }
3046        return FALSE;
3047      case oignore:
3048        if (toktype == st_C_objend)
3049          {
3050            /* Memory leakage here: the string pointed by objtag is
3051               never released, because many tests would be needed to
3052               avoid breaking on incorrect input code.  The amount of
3053               memory leaked here is the sum of the lengths of the
3054               class tags.
3055            free (objtag); */
3056            objdef = onone;
3057          }
3058        return FALSE;
3059      }
3060
3061    /* A function, variable or enum constant? */
3062    switch (toktype)
3063      {
3064      case st_C_extern:
3065        fvextern = TRUE;
3066        switch  (fvdef)
3067          {
3068          case finlist:
3069          case flistseen:
3070          case fignore:
3071          case vignore:
3072            break;
3073          default:
3074            fvdef = fvnone;
3075          }
3076        return FALSE;
3077      case st_C_ignore:
3078        fvextern = FALSE;
3079        fvdef = vignore;
3080        return FALSE;
3081      case st_C_operator:
3082        fvdef = foperator;
3083        *is_func_or_var = TRUE;
3084        return TRUE;
3085      case st_none:
3086        if (constantypedefs
3087            && structdef == snone
3088            && structtype == st_C_enum && bracelev > structbracelev)
3089          return TRUE;           /* enum constant */
3090        switch (fvdef)
3091          {
3092          case fdefunkey:
3093            if (bracelev > 0)
3094              break;
3095            fvdef = fdefunname;  /* GNU macro */
3096            *is_func_or_var = TRUE;
3097            return TRUE;
3098          case fvnone:
3099            switch (typdef)
3100              {
3101              case ttypeseen:
3102                return FALSE;
3103              case tnone:
3104                if ((strneq (str, "asm", 3) && endtoken (str[3]))
3105                    || (strneq (str, "__asm__", 7) && endtoken (str[7])))
3106                  {
3107                    fvdef = vignore;
3108                    return FALSE;
3109                  }
3110                break;
3111              }
3112           /* FALLTHRU */
3113           case fvnameseen:
3114           if (len >= 10 && strneq (str+len-10, "::operator", 10))
3115             {
3116               if (*c_extp & C_AUTO) /* automatic detection of C++ */
3117                 *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
3118               fvdef = foperator;
3119               *is_func_or_var = TRUE;
3120               return TRUE;
3121             }
3122           if (bracelev > 0 && !instruct)
3123             break;
3124           fvdef = fvnameseen;   /* function or variable */
3125           *is_func_or_var = TRUE;
3126           return TRUE;
3127         }
3128       break;
3129     }
3130
3131   return FALSE;
3132 }
3133
3134 \f
3135 /*
3136  * C_entries often keeps pointers to tokens or lines which are older than
3137  * the line currently read.  By keeping two line buffers, and switching
3138  * them at end of line, it is possible to use those pointers.
3139  */
3140 static struct
3141 {
3142   long linepos;
3143   linebuffer lb;
3144 } lbs[2];
3145
3146 #define current_lb_is_new (newndx == curndx)
3147 #define switch_line_buffers() (curndx = 1 - curndx)
3148
3149 #define curlb (lbs[curndx].lb)
3150 #define newlb (lbs[newndx].lb)
3151 #define curlinepos (lbs[curndx].linepos)
3152 #define newlinepos (lbs[newndx].linepos)
3153
3154 #define plainc ((c_ext & C_EXT) == C_PLAIN)
3155 #define cplpl (c_ext & C_PLPL)
3156 #define cjava ((c_ext & C_JAVA) == C_JAVA)
3157
3158 #define CNL_SAVE_DEFINEDEF()                                            \
3159 do {                                                                    \
3160   curlinepos = charno;                                                  \
3161   readline (&curlb, inf);                                               \
3162   lp = curlb.buffer;                                                    \
3163   quotednl = FALSE;                                                     \
3164   newndx = curndx;                                                      \
3165 } while (0)
3166
3167 #define CNL()                                                           \
3168 do {                                                                    \
3169   CNL_SAVE_DEFINEDEF();                                                 \
3170   if (savetoken.valid)                                                  \
3171     {                                                                   \
3172       token = savetoken;                                                \
3173       savetoken.valid = FALSE;                                          \
3174     }                                                                   \
3175   definedef = dnone;                                                    \
3176 } while (0)
3177
3178
3179 static void
3180 make_C_tag (isfun)
3181      bool isfun;
3182 {
3183   /* This function should never be called when token.valid is FALSE, but
3184      we must protect against invalid input or internal errors. */
3185   if (!DEBUG && !token.valid)
3186     return;
3187
3188   if (token.valid)
3189     make_tag (token_name.buffer, token_name.len, isfun, token.line,
3190               token.offset+token.length+1, token.lineno, token.linepos);
3191   else                          /* this case is optimised away if !DEBUG */
3192     make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
3193               token_name.len + 17, isfun, token.line,
3194               token.offset+token.length+1, token.lineno, token.linepos);
3195
3196   token.valid = FALSE;
3197 }
3198
3199
3200 /*
3201  * C_entries ()
3202  *      This routine finds functions, variables, typedefs,
3203  *      #define's, enum constants and struct/union/enum definitions in
3204  *      C syntax and adds them to the list.
3205  */
3206 static void
3207 C_entries (c_ext, inf)
3208      int c_ext;                 /* extension of C */
3209      FILE *inf;                 /* input file */
3210 {
3211   register char c;              /* latest char read; '\0' for end of line */
3212   register char *lp;            /* pointer one beyond the character `c' */
3213   int curndx, newndx;           /* indices for current and new lb */
3214   register int tokoff;          /* offset in line of start of current token */
3215   register int toklen;          /* length of current token */
3216   char *qualifier;              /* string used to qualify names */
3217   int qlen;                     /* length of qualifier */
3218   int bracelev;                 /* current brace level */
3219   int bracketlev;               /* current bracket level */
3220   int parlev;                   /* current parenthesis level */
3221   int attrparlev;               /* __attribute__ parenthesis level */
3222   int templatelev;              /* current template level */
3223   int typdefbracelev;           /* bracelev where a typedef struct body begun */
3224   bool incomm, inquote, inchar, quotednl, midtoken;
3225   bool yacc_rules;              /* in the rules part of a yacc file */
3226   struct tok savetoken;         /* token saved during preprocessor handling */
3227
3228
3229   linebuffer_init (&lbs[0].lb);
3230   linebuffer_init (&lbs[1].lb);
3231   if (cstack.size == 0)
3232     {
3233       cstack.size = (DEBUG) ? 1 : 4;
3234       cstack.nl = 0;
3235       cstack.cname = xnew (cstack.size, char *);
3236       cstack.bracelev = xnew (cstack.size, int);
3237     }
3238
3239   tokoff = toklen = typdefbracelev = 0; /* keep compiler quiet */
3240   curndx = newndx = 0;
3241   lp = curlb.buffer;
3242   *lp = 0;
3243
3244   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
3245   structdef = snone; definedef = dnone; objdef = onone;
3246   yacc_rules = FALSE;
3247   midtoken = inquote = inchar = incomm = quotednl = FALSE;
3248   token.valid = savetoken.valid = FALSE;
3249   bracelev = bracketlev = parlev = attrparlev = templatelev = 0;
3250   if (cjava)
3251     { qualifier = "."; qlen = 1; }
3252   else
3253     { qualifier = "::"; qlen = 2; }
3254
3255
3256   while (!feof (inf))
3257     {
3258       c = *lp++;
3259       if (c == '\\')
3260         {
3261           /* If we are at the end of the line, the next character is a
3262              '\0'; do not skip it, because it is what tells us
3263              to read the next line.  */
3264           if (*lp == '\0')
3265             {
3266               quotednl = TRUE;
3267               continue;
3268             }
3269           lp++;
3270           c = ' ';
3271         }
3272       else if (incomm)
3273         {
3274           switch (c)
3275             {
3276             case '*':
3277               if (*lp == '/')
3278                 {
3279                   c = *lp++;
3280                   incomm = FALSE;
3281                 }
3282               break;
3283             case '\0':
3284               /* Newlines inside comments do not end macro definitions in
3285                  traditional cpp. */
3286               CNL_SAVE_DEFINEDEF ();
3287               break;
3288             }
3289           continue;
3290         }
3291       else if (inquote)
3292         {
3293           switch (c)
3294             {
3295             case '"':
3296               inquote = FALSE;
3297               break;
3298             case '\0':
3299               /* Newlines inside strings do not end macro definitions
3300                  in traditional cpp, even though compilers don't
3301                  usually accept them. */
3302               CNL_SAVE_DEFINEDEF ();
3303               break;
3304             }
3305           continue;
3306         }
3307       else if (inchar)
3308         {
3309           switch (c)
3310             {
3311             case '\0':
3312               /* Hmmm, something went wrong. */
3313               CNL ();
3314               /* FALLTHRU */
3315             case '\'':
3316               inchar = FALSE;
3317               break;
3318             }
3319           continue;
3320         }
3321       else if (bracketlev > 0)
3322         {
3323           switch (c)
3324             {
3325             case ']':
3326               if (--bracketlev > 0)
3327                 continue;
3328               break;
3329             case '\0':
3330               CNL_SAVE_DEFINEDEF ();
3331               break;
3332             }
3333           continue;
3334         }
3335       else switch (c)
3336         {
3337         case '"':
3338           inquote = TRUE;
3339           if (inattribute)
3340             break;
3341           switch (fvdef)
3342             {
3343             case fdefunkey:
3344             case fstartlist:
3345             case finlist:
3346             case fignore:
3347             case vignore:
3348               break;
3349             default:
3350               fvextern = FALSE;
3351               fvdef = fvnone;
3352             }
3353           continue;
3354         case '\'':
3355           inchar = TRUE;
3356           if (inattribute)
3357             break;
3358           if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
3359             {
3360               fvextern = FALSE;
3361               fvdef = fvnone;
3362             }
3363           continue;
3364         case '/':
3365           if (*lp == '*')
3366             {
3367               lp++;
3368               incomm = TRUE;
3369               continue;
3370             }
3371           else if (/* cplpl && */ *lp == '/')
3372             {
3373               c = '\0';
3374               break;
3375             }
3376           else
3377             break;
3378         case '%':
3379           if ((c_ext & YACC) && *lp == '%')
3380             {
3381               /* Entering or exiting rules section in yacc file. */
3382               lp++;
3383               definedef = dnone; fvdef = fvnone; fvextern = FALSE;
3384               typdef = tnone; structdef = snone;
3385               midtoken = inquote = inchar = incomm = quotednl = FALSE;
3386               bracelev = 0;
3387               yacc_rules = !yacc_rules;
3388               continue;
3389             }
3390           else
3391             break;
3392         case '#':
3393           if (definedef == dnone)
3394             {
3395               char *cp;
3396               bool cpptoken = TRUE;
3397
3398               /* Look back on this line.  If all blanks, or nonblanks
3399                  followed by an end of comment, this is a preprocessor
3400                  token. */
3401               for (cp = newlb.buffer; cp < lp-1; cp++)
3402                 if (!iswhite (*cp))
3403                   {
3404                     if (*cp == '*' && *(cp+1) == '/')
3405                       {
3406                         cp++;
3407                         cpptoken = TRUE;
3408                       }
3409                     else
3410                       cpptoken = FALSE;
3411                   }
3412               if (cpptoken)
3413                 definedef = dsharpseen;
3414             } /* if (definedef == dnone) */
3415           continue;
3416         case '[':
3417           bracketlev++;
3418             continue;
3419         } /* switch (c) */
3420
3421
3422       /* Consider token only if some involved conditions are satisfied. */
3423       if (typdef != tignore
3424           && definedef != dignorerest
3425           && fvdef != finlist
3426           && templatelev == 0
3427           && (definedef != dnone
3428               || structdef != scolonseen)
3429           && !inattribute)
3430         {
3431           if (midtoken)
3432             {
3433               if (endtoken (c))
3434                 {
3435                   if (c == ':' && *lp == ':' && begtoken (lp[1]))
3436                     /* This handles :: in the middle,
3437                        but not at the beginning of an identifier.
3438                        Also, space-separated :: is not recognised. */
3439                     {
3440                       if (c_ext & C_AUTO) /* automatic detection of C++ */
3441                         c_ext = (c_ext | C_PLPL) & ~C_AUTO;
3442                       lp += 2;
3443                       toklen += 2;
3444                       c = lp[-1];
3445                       goto still_in_token;
3446                     }
3447                   else
3448                     {
3449                       bool funorvar = FALSE;
3450
3451                       if (yacc_rules
3452                           || consider_token (newlb.buffer + tokoff, toklen, c,
3453                                              &c_ext, bracelev, parlev,
3454                                              &funorvar))
3455                         {
3456                           if (fvdef == foperator)
3457                             {
3458                               char *oldlp = lp;
3459                               lp = skip_spaces (lp-1);
3460                               if (*lp != '\0')
3461                                 lp += 1;
3462                               while (*lp != '\0'
3463                                      && !iswhite (*lp) && *lp != '(')
3464                                 lp += 1;
3465                               c = *lp++;
3466                               toklen += lp - oldlp;
3467                             }
3468                           token.named = FALSE;
3469                           if (!plainc
3470                               && nestlev > 0 && definedef == dnone)
3471                             /* in struct body */
3472                             {
3473                               write_classname (&token_name, qualifier);
3474                               linebuffer_setlen (&token_name,
3475                                                  token_name.len+qlen+toklen);
3476                               strcat (token_name.buffer, qualifier);
3477                               strncat (token_name.buffer,
3478                                        newlb.buffer + tokoff, toklen);
3479                               token.named = TRUE;
3480                             }
3481                           else if (objdef == ocatseen)
3482                             /* Objective C category */
3483                             {
3484                               int len = strlen (objtag) + 2 + toklen;
3485                               linebuffer_setlen (&token_name, len);
3486                               strcpy (token_name.buffer, objtag);
3487                               strcat (token_name.buffer, "(");
3488                               strncat (token_name.buffer,
3489                                        newlb.buffer + tokoff, toklen);
3490                               strcat (token_name.buffer, ")");
3491                               token.named = TRUE;
3492                             }
3493                           else if (objdef == omethodtag
3494                                    || objdef == omethodparm)
3495                             /* Objective C method */
3496                             {
3497                               token.named = TRUE;
3498                             }
3499                           else if (fvdef == fdefunname)
3500                             /* GNU DEFUN and similar macros */
3501                             {
3502                               bool defun = (newlb.buffer[tokoff] == 'F');
3503                               int off = tokoff;
3504                               int len = toklen;
3505
3506                               /* Rewrite the tag so that emacs lisp DEFUNs
3507                                  can be found by their elisp name */
3508                               if (defun)
3509                                 {
3510                                   off += 1;
3511                                   len -= 1;
3512                                 }
3513                               len = toklen;
3514                               linebuffer_setlen (&token_name, len);
3515                               strncpy (token_name.buffer,
3516                                        newlb.buffer + off, len);
3517                               token_name.buffer[len] = '\0';
3518                               if (defun)
3519                                 while (--len >= 0)
3520                                   if (token_name.buffer[len] == '_')
3521                                     token_name.buffer[len] = '-';
3522                               token.named = defun;
3523                             }
3524                           else
3525                             {
3526                               linebuffer_setlen (&token_name, toklen);
3527                               strncpy (token_name.buffer,
3528                                        newlb.buffer + tokoff, toklen);
3529                               token_name.buffer[toklen] = '\0';
3530                               /* Name macros and members. */
3531                               token.named = (structdef == stagseen
3532                                              || typdef == ttypeseen
3533                                              || typdef == tend
3534                                              || (funorvar
3535                                                  && definedef == dignorerest)
3536                                              || (funorvar
3537                                                  && definedef == dnone
3538                                                  && structdef == snone
3539                                                  && bracelev > 0));
3540                             }
3541                           token.lineno = lineno;
3542                           token.offset = tokoff;
3543                           token.length = toklen;
3544                           token.line = newlb.buffer;
3545                           token.linepos = newlinepos;
3546                           token.valid = TRUE;
3547
3548                           if (definedef == dnone
3549                               && (fvdef == fvnameseen
3550                                   || fvdef == foperator
3551                                   || structdef == stagseen
3552                                   || typdef == tend
3553                                   || typdef == ttypeseen
3554                                   || objdef != onone))
3555                             {
3556                               if (current_lb_is_new)
3557                                 switch_line_buffers ();
3558                             }
3559                           else if (definedef != dnone
3560                                    || fvdef == fdefunname
3561                                    || instruct)
3562                             make_C_tag (funorvar);
3563                         }
3564                       else /* not yacc and consider_token failed */
3565                         {
3566                           if (inattribute && fvdef == fignore)
3567                             {
3568                               /* We have just met __attribute__ after a
3569                                  function parameter list: do not tag the
3570                                  function again. */
3571                               fvdef = fvnone;
3572                             }
3573                         }
3574                       midtoken = FALSE;
3575                     }
3576                 } /* if (endtoken (c)) */
3577               else if (intoken (c))
3578                 still_in_token:
3579                 {
3580                   toklen++;
3581                   continue;
3582                 }
3583             } /* if (midtoken) */
3584           else if (begtoken (c))
3585             {
3586               switch (definedef)
3587                 {
3588                 case dnone:
3589                   switch (fvdef)
3590                     {
3591                     case fstartlist:
3592                       /* This prevents tagging fb in
3593                          void (__attribute__((noreturn)) *fb) (void);
3594                          Fixing this is not easy and not very important. */
3595                       fvdef = finlist;
3596                       continue;
3597                     case flistseen:
3598                       if (plainc || declarations)
3599                         {
3600                           make_C_tag (TRUE); /* a function */
3601                           fvdef = fignore;
3602                         }
3603                       break;
3604                     }
3605                   if (structdef == stagseen && !cjava)
3606                     {
3607                       popclass_above (bracelev);
3608                       structdef = snone;
3609                     }
3610                   break;
3611                 case dsharpseen:
3612                   savetoken = token;
3613                   break;
3614                 }
3615               if (!yacc_rules || lp == newlb.buffer + 1)
3616                 {
3617                   tokoff = lp - 1 - newlb.buffer;
3618                   toklen = 1;
3619                   midtoken = TRUE;
3620                 }
3621               continue;
3622             } /* if (begtoken) */
3623         } /* if must look at token */
3624
3625
3626       /* Detect end of line, colon, comma, semicolon and various braces
3627          after having handled a token.*/
3628       switch (c)
3629         {
3630         case ':':
3631           if (inattribute)
3632             break;
3633           if (yacc_rules && token.offset == 0 && token.valid)
3634             {
3635               make_C_tag (FALSE); /* a yacc function */
3636               break;
3637             }
3638           if (definedef != dnone)
3639             break;
3640           switch (objdef)
3641             {
3642             case  otagseen:
3643               objdef = oignore;
3644               make_C_tag (TRUE); /* an Objective C class */
3645               break;
3646             case omethodtag:
3647             case omethodparm:
3648               objdef = omethodcolon;
3649               linebuffer_setlen (&token_name, token_name.len + 1);
3650               strcat (token_name.buffer, ":");
3651               break;
3652             }
3653           if (structdef == stagseen)
3654             {
3655               structdef = scolonseen;
3656               break;
3657             }
3658           /* Should be useless, but may be work as a safety net. */
3659           if (cplpl && fvdef == flistseen)
3660             {
3661               make_C_tag (TRUE); /* a function */
3662               fvdef = fignore;
3663               break;
3664             }
3665           break;
3666         case ';':
3667           if (definedef != dnone || inattribute)
3668             break;
3669           switch (typdef)
3670             {
3671             case tend:
3672             case ttypeseen:
3673               make_C_tag (FALSE); /* a typedef */
3674               typdef = tnone;
3675               fvdef = fvnone;
3676               break;
3677             case tnone:
3678             case tinbody:
3679             case tignore:
3680               switch (fvdef)
3681                 {
3682                 case fignore:
3683                   if (typdef == tignore || cplpl)
3684                     fvdef = fvnone;
3685                   break;
3686                 case fvnameseen:
3687                   if ((globals && bracelev == 0 && (!fvextern || declarations))
3688                       || (members && instruct))
3689                     make_C_tag (FALSE); /* a variable */
3690                   fvextern = FALSE;
3691                   fvdef = fvnone;
3692                   token.valid = FALSE;
3693                   break;
3694                 case flistseen:
3695                   if ((declarations
3696                        && (cplpl || !instruct)
3697                        && (typdef == tnone || (typdef != tignore && instruct)))
3698                       || (members
3699                           && plainc && instruct))
3700                     make_C_tag (TRUE);  /* a function */
3701                   /* FALLTHRU */
3702                 default:
3703                   fvextern = FALSE;
3704                   fvdef = fvnone;
3705                   if (declarations
3706                        && cplpl && structdef == stagseen)
3707                     make_C_tag (FALSE); /* forward declaration */
3708                   else
3709                     token.valid = FALSE;
3710                 } /* switch (fvdef) */
3711               /* FALLTHRU */
3712             default:
3713               if (!instruct)
3714                 typdef = tnone;
3715             }
3716           if (structdef == stagseen)
3717             structdef = snone;
3718           break;
3719         case ',':
3720           if (definedef != dnone || inattribute)
3721             break;
3722           switch (objdef)
3723             {
3724             case omethodtag:
3725             case omethodparm:
3726               make_C_tag (TRUE); /* an Objective C method */
3727               objdef = oinbody;
3728               break;
3729             }
3730           switch (fvdef)
3731             {
3732             case fdefunkey:
3733             case foperator:
3734             case fstartlist:
3735             case finlist:
3736             case fignore:
3737             case vignore:
3738               break;
3739             case fdefunname:
3740               fvdef = fignore;
3741               break;
3742             case fvnameseen:
3743               if (parlev == 0
3744                   && ((globals
3745                        && bracelev == 0
3746                        && templatelev == 0
3747                        && (!fvextern || declarations))
3748                       || (members && instruct)))
3749                   make_C_tag (FALSE); /* a variable */
3750               break;
3751             case flistseen:
3752               if ((declarations && typdef == tnone && !instruct)
3753                   || (members && typdef != tignore && instruct))
3754                 {
3755                   make_C_tag (TRUE); /* a function */
3756                   fvdef = fvnameseen;
3757                 }
3758               else if (!declarations)
3759                 fvdef = fvnone;
3760               token.valid = FALSE;
3761               break;
3762             default:
3763               fvdef = fvnone;
3764             }
3765           if (structdef == stagseen)
3766             structdef = snone;
3767           break;
3768         case ']':
3769           if (definedef != dnone || inattribute)
3770             break;
3771           if (structdef == stagseen)
3772             structdef = snone;
3773           switch (typdef)
3774             {
3775             case ttypeseen:
3776             case tend:
3777               typdef = tignore;
3778               make_C_tag (FALSE);       /* a typedef */
3779               break;
3780             case tnone:
3781             case tinbody:
3782               switch (fvdef)
3783                 {
3784                 case foperator:
3785                 case finlist:
3786                 case fignore:
3787                 case vignore:
3788                   break;
3789                 case fvnameseen:
3790                   if ((members && bracelev == 1)
3791                       || (globals && bracelev == 0
3792                           && (!fvextern || declarations)))
3793                     make_C_tag (FALSE); /* a variable */
3794                   /* FALLTHRU */
3795                 default:
3796                   fvdef = fvnone;
3797                 }
3798               break;
3799             }
3800           break;
3801         case '(':
3802           if (inattribute)
3803             {
3804               attrparlev++;
3805               break;
3806             }
3807           if (definedef != dnone)
3808             break;
3809           if (objdef == otagseen && parlev == 0)
3810             objdef = oparenseen;
3811           switch (fvdef)
3812             {
3813             case fvnameseen:
3814               if (typdef == ttypeseen
3815                   && *lp != '*'
3816                   && !instruct)
3817                 {
3818                   /* This handles constructs like:
3819                      typedef void OperatorFun (int fun); */
3820                   make_C_tag (FALSE);
3821                   typdef = tignore;
3822                   fvdef = fignore;
3823                   break;
3824                 }
3825               /* FALLTHRU */
3826             case foperator:
3827               fvdef = fstartlist;
3828               break;
3829             case flistseen:
3830               fvdef = finlist;
3831               break;
3832             }
3833           parlev++;
3834           break;
3835         case ')':
3836           if (inattribute)
3837             {
3838               if (--attrparlev == 0)
3839                 inattribute = FALSE;
3840               break;
3841             }
3842           if (definedef != dnone)
3843             break;
3844           if (objdef == ocatseen && parlev == 1)
3845             {
3846               make_C_tag (TRUE); /* an Objective C category */
3847               objdef = oignore;
3848             }
3849           if (--parlev == 0)
3850             {
3851               switch (fvdef)
3852                 {
3853                 case fstartlist:
3854                 case finlist:
3855                   fvdef = flistseen;
3856                   break;
3857                 }
3858               if (!instruct
3859                   && (typdef == tend
3860                       || typdef == ttypeseen))
3861                 {
3862                   typdef = tignore;
3863                   make_C_tag (FALSE); /* a typedef */
3864                 }
3865             }
3866           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3867             parlev = 0;
3868           break;
3869         case '{':
3870           if (definedef != dnone)
3871             break;
3872           if (typdef == ttypeseen)
3873             {
3874               /* Whenever typdef is set to tinbody (currently only
3875                  here), typdefbracelev should be set to bracelev. */
3876               typdef = tinbody;
3877               typdefbracelev = bracelev;
3878             }
3879           switch (fvdef)
3880             {
3881             case flistseen:
3882               make_C_tag (TRUE);    /* a function */
3883               /* FALLTHRU */
3884             case fignore:
3885               fvdef = fvnone;
3886               break;
3887             case fvnone:
3888               switch (objdef)
3889                 {
3890                 case otagseen:
3891                   make_C_tag (TRUE); /* an Objective C class */
3892                   objdef = oignore;
3893                   break;
3894                 case omethodtag:
3895                 case omethodparm:
3896                   make_C_tag (TRUE); /* an Objective C method */
3897                   objdef = oinbody;
3898                   break;
3899                 default:
3900                   /* Neutralize `extern "C" {' grot. */
3901                   if (bracelev == 0 && structdef == snone && nestlev == 0
3902                       && typdef == tnone)
3903                     bracelev = -1;
3904                 }
3905               break;
3906             }
3907           switch (structdef)
3908             {
3909             case skeyseen:         /* unnamed struct */
3910               pushclass_above (bracelev, NULL, 0);
3911               structdef = snone;
3912               break;
3913             case stagseen:         /* named struct or enum */
3914             case scolonseen:       /* a class */
3915               pushclass_above (bracelev,token.line+token.offset, token.length);
3916               structdef = snone;
3917               make_C_tag (FALSE);  /* a struct or enum */
3918               break;
3919             }
3920           bracelev++;
3921           break;
3922         case '*':
3923           if (definedef != dnone)
3924             break;
3925           if (fvdef == fstartlist)
3926             {
3927               fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
3928               token.valid = FALSE;
3929             }
3930           break;
3931         case '}':
3932           if (definedef != dnone)
3933             break;
3934           if (!ignoreindent && lp == newlb.buffer + 1)
3935             {
3936               if (bracelev != 0)
3937                 token.valid = FALSE;
3938               bracelev = 0;     /* reset brace level if first column */
3939               parlev = 0;       /* also reset paren level, just in case... */
3940             }
3941           else if (bracelev > 0)
3942             bracelev--;
3943           else
3944             token.valid = FALSE; /* something gone amiss, token unreliable */
3945           popclass_above (bracelev);
3946           structdef = snone;
3947           /* Only if typdef == tinbody is typdefbracelev significant. */
3948           if (typdef == tinbody && bracelev <= typdefbracelev)
3949             {
3950               assert (bracelev == typdefbracelev);
3951               typdef = tend;
3952             }
3953           break;
3954         case '=':
3955           if (definedef != dnone)
3956             break;
3957           switch (fvdef)
3958             {
3959             case foperator:
3960             case finlist:
3961             case fignore:
3962             case vignore:
3963               break;
3964             case fvnameseen:
3965               if ((members && bracelev == 1)
3966                   || (globals && bracelev == 0 && (!fvextern || declarations)))
3967                 make_C_tag (FALSE); /* a variable */
3968               /* FALLTHRU */
3969             default:
3970               fvdef = vignore;
3971             }
3972           break;
3973         case '<':
3974           if (cplpl
3975               && (structdef == stagseen || fvdef == fvnameseen))
3976             {
3977               templatelev++;
3978               break;
3979             }
3980           goto resetfvdef;
3981         case '>':
3982           if (templatelev > 0)
3983             {
3984               templatelev--;
3985               break;
3986             }
3987           goto resetfvdef;
3988         case '+':
3989         case '-':
3990           if (objdef == oinbody && bracelev == 0)
3991             {
3992               objdef = omethodsign;
3993               break;
3994             }
3995           /* FALLTHRU */
3996         resetfvdef:
3997         case '#': case '~': case '&': case '%': case '/':
3998         case '|': case '^': case '!': case '.': case '?':
3999           if (definedef != dnone)
4000             break;
4001           /* These surely cannot follow a function tag in C. */
4002           switch (fvdef)
4003             {
4004             case foperator:
4005             case finlist:
4006             case fignore:
4007             case vignore:
4008               break;
4009             default:
4010               fvdef = fvnone;
4011             }
4012           break;
4013         case '\0':
4014           if (objdef == otagseen)
4015             {
4016               make_C_tag (TRUE); /* an Objective C class */
4017               objdef = oignore;
4018             }
4019           /* If a macro spans multiple lines don't reset its state. */
4020           if (quotednl)
4021             CNL_SAVE_DEFINEDEF ();
4022           else
4023             CNL ();
4024           break;
4025         } /* switch (c) */
4026
4027     } /* while not eof */
4028
4029   free (lbs[0].lb.buffer);
4030   free (lbs[1].lb.buffer);
4031 }
4032
4033 /*
4034  * Process either a C++ file or a C file depending on the setting
4035  * of a global flag.
4036  */
4037 static void
4038 default_C_entries (inf)
4039      FILE *inf;
4040 {
4041   C_entries (cplusplus ? C_PLPL : C_AUTO, inf);
4042 }
4043
4044 /* Always do plain C. */
4045 static void
4046 plain_C_entries (inf)
4047      FILE *inf;
4048 {
4049   C_entries (0, inf);
4050 }
4051
4052 /* Always do C++. */
4053 static void
4054 Cplusplus_entries (inf)
4055      FILE *inf;
4056 {
4057   C_entries (C_PLPL, inf);
4058 }
4059
4060 /* Always do Java. */
4061 static void
4062 Cjava_entries (inf)
4063      FILE *inf;
4064 {
4065   C_entries (C_JAVA, inf);
4066 }
4067
4068 /* Always do C*. */
4069 static void
4070 Cstar_entries (inf)
4071      FILE *inf;
4072 {
4073   C_entries (C_STAR, inf);
4074 }
4075
4076 /* Always do Yacc. */
4077 static void
4078 Yacc_entries (inf)
4079      FILE *inf;
4080 {
4081   C_entries (YACC, inf);
4082 }
4083
4084 \f
4085 /* Useful macros. */
4086 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
4087   for (;                        /* loop initialization */               \
4088        !feof (file_pointer)     /* loop test */                         \
4089        &&                       /* instructions at start of loop */     \
4090           (readline (&line_buffer, file_pointer),                       \
4091            char_pointer = line_buffer.buffer,                           \
4092            TRUE);                                                       \
4093       )
4094
4095 #define LOOKING_AT(cp, kw)  /* kw is the keyword, a literal string */   \
4096   ((assert("" kw), TRUE)   /* syntax error if not a literal string */   \
4097    && strneq ((cp), kw, sizeof(kw)-1)           /* cp points at kw */   \
4098    && notinname ((cp)[sizeof(kw)-1])            /* end of kw */         \
4099    && ((cp) = skip_spaces((cp)+sizeof(kw)-1)))  /* skip spaces */
4100
4101 /* Similar to LOOKING_AT but does not use notinname, does not skip */
4102 #define LOOKING_AT_NOCASE(cp, kw) /* the keyword is a literal string */ \
4103   ((assert("" kw), TRUE)     /* syntax error if not a literal string */ \
4104    && strncaseeq ((cp), kw, sizeof(kw)-1)       /* cp points at kw */   \
4105    && ((cp) += sizeof(kw)-1))                   /* skip spaces */
4106
4107 /*
4108  * Read a file, but do no processing.  This is used to do regexp
4109  * matching on files that have no language defined.
4110  */
4111 static void
4112 just_read_file (inf)
4113      FILE *inf;
4114 {
4115   register char *dummy;
4116
4117   LOOP_ON_INPUT_LINES (inf, lb, dummy)
4118     continue;
4119 }
4120
4121 \f
4122 /* Fortran parsing */
4123
4124 static void F_takeprec __P((void));
4125 static void F_getit __P((FILE *));
4126
4127 static void
4128 F_takeprec ()
4129 {
4130   dbp = skip_spaces (dbp);
4131   if (*dbp != '*')
4132     return;
4133   dbp++;
4134   dbp = skip_spaces (dbp);
4135   if (strneq (dbp, "(*)", 3))
4136     {
4137       dbp += 3;
4138       return;
4139     }
4140   if (!ISDIGIT (*dbp))
4141     {
4142       --dbp;                    /* force failure */
4143       return;
4144     }
4145   do
4146     dbp++;
4147   while (ISDIGIT (*dbp));
4148 }
4149
4150 static void
4151 F_getit (inf)
4152      FILE *inf;
4153 {
4154   register char *cp;
4155
4156   dbp = skip_spaces (dbp);
4157   if (*dbp == '\0')
4158     {
4159       readline (&lb, inf);
4160       dbp = lb.buffer;
4161       if (dbp[5] != '&')
4162         return;
4163       dbp += 6;
4164       dbp = skip_spaces (dbp);
4165     }
4166   if (!ISALPHA (*dbp) && *dbp != '_' && *dbp != '$')
4167     return;
4168   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
4169     continue;
4170   make_tag (dbp, cp-dbp, TRUE,
4171             lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4172 }
4173
4174
4175 static void
4176 Fortran_functions (inf)
4177      FILE *inf;
4178 {
4179   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4180     {
4181       if (*dbp == '%')
4182         dbp++;                  /* Ratfor escape to fortran */
4183       dbp = skip_spaces (dbp);
4184       if (*dbp == '\0')
4185         continue;
4186       switch (lowcase (*dbp))
4187         {
4188         case 'i':
4189           if (nocase_tail ("integer"))
4190             F_takeprec ();
4191           break;
4192         case 'r':
4193           if (nocase_tail ("real"))
4194             F_takeprec ();
4195           break;
4196         case 'l':
4197           if (nocase_tail ("logical"))
4198             F_takeprec ();
4199           break;
4200         case 'c':
4201           if (nocase_tail ("complex") || nocase_tail ("character"))
4202             F_takeprec ();
4203           break;
4204         case 'd':
4205           if (nocase_tail ("double"))
4206             {
4207               dbp = skip_spaces (dbp);
4208               if (*dbp == '\0')
4209                 continue;
4210               if (nocase_tail ("precision"))
4211                 break;
4212               continue;
4213             }
4214           break;
4215         }
4216       dbp = skip_spaces (dbp);
4217       if (*dbp == '\0')
4218         continue;
4219       switch (lowcase (*dbp))
4220         {
4221         case 'f':
4222           if (nocase_tail ("function"))
4223             F_getit (inf);
4224           continue;
4225         case 's':
4226           if (nocase_tail ("subroutine"))
4227             F_getit (inf);
4228           continue;
4229         case 'e':
4230           if (nocase_tail ("entry"))
4231             F_getit (inf);
4232           continue;
4233         case 'b':
4234           if (nocase_tail ("blockdata") || nocase_tail ("block data"))
4235             {
4236               dbp = skip_spaces (dbp);
4237               if (*dbp == '\0') /* assume un-named */
4238                 make_tag ("blockdata", 9, TRUE,
4239                           lb.buffer, dbp - lb.buffer, lineno, linecharno);
4240               else
4241                 F_getit (inf);  /* look for name */
4242             }
4243           continue;
4244         }
4245     }
4246 }
4247
4248 \f
4249 /*
4250  * Ada parsing
4251  * Original code by
4252  * Philippe Waroquiers (1998)
4253  */
4254
4255 static void Ada_getit __P((FILE *, char *));
4256
4257 /* Once we are positioned after an "interesting" keyword, let's get
4258    the real tag value necessary. */
4259 static void
4260 Ada_getit (inf, name_qualifier)
4261      FILE *inf;
4262      char *name_qualifier;
4263 {
4264   register char *cp;
4265   char *name;
4266   char c;
4267
4268   while (!feof (inf))
4269     {
4270       dbp = skip_spaces (dbp);
4271       if (*dbp == '\0'
4272           || (dbp[0] == '-' && dbp[1] == '-'))
4273         {
4274           readline (&lb, inf);
4275           dbp = lb.buffer;
4276         }
4277       switch (lowcase(*dbp))
4278         {
4279         case 'b':
4280           if (nocase_tail ("body"))
4281             {
4282               /* Skipping body of   procedure body   or   package body or ....
4283                  resetting qualifier to body instead of spec. */
4284               name_qualifier = "/b";
4285               continue;
4286             }
4287           break;
4288         case 't':
4289           /* Skipping type of   task type   or   protected type ... */
4290           if (nocase_tail ("type"))
4291             continue;
4292           break;
4293         }
4294       if (*dbp == '"')
4295         {
4296           dbp += 1;
4297           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
4298             continue;
4299         }
4300       else
4301         {
4302           dbp = skip_spaces (dbp);
4303           for (cp = dbp;
4304                (*cp != '\0'
4305                 && (ISALPHA (*cp) || ISDIGIT (*cp) || *cp == '_' || *cp == '.'));
4306                cp++)
4307             continue;
4308           if (cp == dbp)
4309             return;
4310         }
4311       c = *cp;
4312       *cp = '\0';
4313       name = concat (dbp, name_qualifier, "");
4314       *cp = c;
4315       make_tag (name, strlen (name), TRUE,
4316                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4317       free (name);
4318       if (c == '"')
4319         dbp = cp + 1;
4320       return;
4321     }
4322 }
4323
4324 static void
4325 Ada_funcs (inf)
4326      FILE *inf;
4327 {
4328   bool inquote = FALSE;
4329   bool skip_till_semicolumn = FALSE;
4330
4331   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4332     {
4333       while (*dbp != '\0')
4334         {
4335           /* Skip a string i.e. "abcd". */
4336           if (inquote || (*dbp == '"'))
4337             {
4338               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
4339               if (dbp != NULL)
4340                 {
4341                   inquote = FALSE;
4342                   dbp += 1;
4343                   continue;     /* advance char */
4344                 }
4345               else
4346                 {
4347                   inquote = TRUE;
4348                   break;        /* advance line */
4349                 }
4350             }
4351
4352           /* Skip comments. */
4353           if (dbp[0] == '-' && dbp[1] == '-')
4354             break;              /* advance line */
4355
4356           /* Skip character enclosed in single quote i.e. 'a'
4357              and skip single quote starting an attribute i.e. 'Image. */
4358           if (*dbp == '\'')
4359             {
4360               dbp++ ;
4361               if (*dbp != '\0')
4362                 dbp++;
4363               continue;
4364             }
4365
4366           if (skip_till_semicolumn)
4367             {
4368               if (*dbp == ';')
4369                 skip_till_semicolumn = FALSE;
4370               dbp++;
4371               continue;         /* advance char */
4372             }
4373
4374           /* Search for beginning of a token.  */
4375           if (!begtoken (*dbp))
4376             {
4377               dbp++;
4378               continue;         /* advance char */
4379             }
4380
4381           /* We are at the beginning of a token. */
4382           switch (lowcase(*dbp))
4383             {
4384             case 'f':
4385               if (!packages_only && nocase_tail ("function"))
4386                 Ada_getit (inf, "/f");
4387               else
4388                 break;          /* from switch */
4389               continue;         /* advance char */
4390             case 'p':
4391               if (!packages_only && nocase_tail ("procedure"))
4392                 Ada_getit (inf, "/p");
4393               else if (nocase_tail ("package"))
4394                 Ada_getit (inf, "/s");
4395               else if (nocase_tail ("protected")) /* protected type */
4396                 Ada_getit (inf, "/t");
4397               else
4398                 break;          /* from switch */
4399               continue;         /* advance char */
4400
4401             case 'u':
4402               if (typedefs && !packages_only && nocase_tail ("use"))
4403                 {
4404                   /* when tagging types, avoid tagging  use type Pack.Typename;
4405                      for this, we will skip everything till a ; */
4406                   skip_till_semicolumn = TRUE;
4407                   continue;     /* advance char */
4408                 }
4409
4410             case 't':
4411               if (!packages_only && nocase_tail ("task"))
4412                 Ada_getit (inf, "/k");
4413               else if (typedefs && !packages_only && nocase_tail ("type"))
4414                 {
4415                   Ada_getit (inf, "/t");
4416                   while (*dbp != '\0')
4417                     dbp += 1;
4418                 }
4419               else
4420                 break;          /* from switch */
4421               continue;         /* advance char */
4422             }
4423
4424           /* Look for the end of the token. */
4425           while (!endtoken (*dbp))
4426             dbp++;
4427
4428         } /* advance char */
4429     } /* advance line */
4430 }
4431
4432 \f
4433 /*
4434  * Unix and microcontroller assembly tag handling
4435  * Labels:  /^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]/
4436  * Idea by Bob Weiner, Motorola Inc. (1994)
4437  */
4438 static void
4439 Asm_labels (inf)
4440      FILE *inf;
4441 {
4442   register char *cp;
4443
4444   LOOP_ON_INPUT_LINES (inf, lb, cp)
4445     {
4446       /* If first char is alphabetic or one of [_.$], test for colon
4447          following identifier. */
4448       if (ISALPHA (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4449         {
4450           /* Read past label. */
4451           cp++;
4452           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
4453             cp++;
4454           if (*cp == ':' || iswhite (*cp))
4455             /* Found end of label, so copy it and add it to the table. */
4456             make_tag (lb.buffer, cp - lb.buffer, TRUE,
4457                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4458         }
4459     }
4460 }
4461
4462 \f
4463 /*
4464  * Perl support
4465  * Perl sub names: /^sub[ \t\n]+[^ \t\n{]+/
4466  * Perl variable names: /^(my|local).../
4467  * Original code by Bart Robinson <lomew@cs.utah.edu> (1995)
4468  * Additions by Michael Ernst <mernst@alum.mit.edu> (1997)
4469  * Ideas by Kai Großjohann <Kai.Grossjohann@CS.Uni-Dortmund.DE> (2001)
4470  */
4471 static void
4472 Perl_functions (inf)
4473      FILE *inf;
4474 {
4475   char *package = savestr ("main"); /* current package name */
4476   register char *cp;
4477
4478   LOOP_ON_INPUT_LINES (inf, lb, cp)
4479     {
4480       skip_spaces(cp);
4481
4482       if (LOOKING_AT (cp, "package"))
4483         {
4484           free (package);
4485           get_tag (cp, &package);
4486         }
4487       else if (LOOKING_AT (cp, "sub"))
4488         {
4489           char *pos;
4490           char *sp = cp;
4491
4492           while (!notinname (*cp))
4493             cp++;
4494           if (cp == sp)
4495             continue;           /* nothing found */
4496           if ((pos = etags_strchr (sp, ':')) != NULL
4497               && pos < cp && pos[1] == ':')
4498             /* The name is already qualified. */
4499             make_tag (sp, cp - sp, TRUE,
4500                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4501           else
4502             /* Qualify it. */
4503             {
4504               char savechar, *name;
4505
4506               savechar = *cp;
4507               *cp = '\0';
4508               name = concat (package, "::", sp);
4509               *cp = savechar;
4510               make_tag (name, strlen(name), TRUE,
4511                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4512               free (name);
4513             }
4514         }
4515        else if (globals)        /* only if we are tagging global vars */
4516         {
4517           /* Skip a qualifier, if any. */
4518           bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
4519           /* After "my" or "local", but before any following paren or space. */
4520           char *varstart = cp;
4521
4522           if (qual              /* should this be removed?  If yes, how? */
4523               && (*cp == '$' || *cp == '@' || *cp == '%'))
4524             {
4525               varstart += 1;
4526               do
4527                 cp++;
4528               while (ISALNUM (*cp) || *cp == '_');
4529             }
4530           else if (qual)
4531             {
4532               /* Should be examining a variable list at this point;
4533                  could insist on seeing an open parenthesis. */
4534               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
4535                 cp++;
4536             }
4537           else
4538             continue;
4539
4540           make_tag (varstart, cp - varstart, FALSE,
4541                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4542         }
4543     }
4544 }
4545
4546
4547 /*
4548  * Python support
4549  * Look for /^[\t]*def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
4550  * Idea by Eric S. Raymond <esr@thyrsus.com> (1997)
4551  * More ideas by seb bacon <seb@jamkit.com> (2002)
4552  */
4553 static void
4554 Python_functions (inf)
4555      FILE *inf;
4556 {
4557   register char *cp;
4558
4559   LOOP_ON_INPUT_LINES (inf, lb, cp)
4560     {
4561       cp = skip_spaces (cp);
4562       if (LOOKING_AT (cp, "def") || LOOKING_AT (cp, "class"))
4563         {
4564           char *name = cp;
4565           while (!notinname (*cp) && *cp != ':')
4566             cp++;
4567           make_tag (name, cp - name, TRUE,
4568                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4569         }
4570     }
4571 }
4572
4573 \f
4574 /*
4575  * PHP support
4576  * Look for:
4577  *  - /^[ \t]*function[ \t\n]+[^ \t\n(]+/
4578  *  - /^[ \t]*class[ \t\n]+[^ \t\n]+/
4579  *  - /^[ \t]*define\(\"[^\"]+/
4580  * Only with --members:
4581  *  - /^[ \t]*var[ \t\n]+\$[^ \t\n=;]/
4582  * Idea by Diez B. Roggisch (2001)
4583  */
4584 static void
4585 PHP_functions (inf)
4586      FILE *inf;
4587 {
4588   register char *cp, *name;
4589   bool search_identifier = FALSE;
4590
4591   LOOP_ON_INPUT_LINES (inf, lb, cp)
4592     {
4593       cp = skip_spaces (cp);
4594       name = cp;
4595       if (search_identifier
4596           && *cp != '\0')
4597         {
4598           while (!notinname (*cp))
4599             cp++;
4600           make_tag (name, cp - name, TRUE,
4601                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4602           search_identifier = FALSE;
4603         }
4604       else if (LOOKING_AT (cp, "function"))
4605         {
4606           if(*cp == '&')
4607             cp = skip_spaces (cp+1);
4608           if(*cp != '\0')
4609             {
4610               name = cp;
4611               while (!notinname (*cp))
4612                 cp++;
4613               make_tag (name, cp - name, TRUE,
4614                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4615             }
4616           else
4617             search_identifier = TRUE;
4618         }
4619       else if (LOOKING_AT (cp, "class"))
4620         {
4621           if (*cp != '\0')
4622             {
4623               name = cp;
4624               while (*cp != '\0' && !iswhite (*cp))
4625                 cp++;
4626               make_tag (name, cp - name, FALSE,
4627                         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4628             }
4629           else
4630             search_identifier = TRUE;
4631         }
4632       else if (strneq (cp, "define", 6)
4633                && (cp = skip_spaces (cp+6))
4634                && *cp++ == '('
4635                && (*cp == '"' || *cp == '\''))
4636         {
4637           char quote = *cp++;
4638           name = cp;
4639           while (*cp != quote && *cp != '\0')
4640             cp++;
4641           make_tag (name, cp - name, FALSE,
4642                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4643         }
4644       else if (members
4645                && LOOKING_AT (cp, "var")
4646                && *cp == '$')
4647         {
4648           name = cp;
4649           while (!notinname(*cp))
4650             cp++;
4651           make_tag (name, cp - name, FALSE,
4652                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4653         }
4654     }
4655 }
4656
4657 \f
4658 /*
4659  * Cobol tag functions
4660  * We could look for anything that could be a paragraph name.
4661  * i.e. anything that starts in column 8 is one word and ends in a full stop.
4662  * Idea by Corny de Souza (1993)
4663  */
4664 static void
4665 Cobol_paragraphs (inf)
4666      FILE *inf;
4667 {
4668   register char *bp, *ep;
4669
4670   LOOP_ON_INPUT_LINES (inf, lb, bp)
4671     {
4672       if (lb.len < 9)
4673         continue;
4674       bp += 8;
4675
4676       /* If eoln, compiler option or comment ignore whole line. */
4677       if (bp[-1] != ' ' || !ISALNUM (bp[0]))
4678         continue;
4679
4680       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
4681         continue;
4682       if (*ep++ == '.')
4683         make_tag (bp, ep - bp, TRUE,
4684                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4685     }
4686 }
4687
4688 \f
4689 /*
4690  * Makefile support
4691  * Ideas by Assar Westerlund <assar@sics.se> (2001)
4692  */
4693 static void
4694 Makefile_targets (inf)
4695      FILE *inf;
4696 {
4697   register char *bp;
4698
4699   LOOP_ON_INPUT_LINES (inf, lb, bp)
4700     {
4701       if (*bp == '\t' || *bp == '#')
4702         continue;
4703       while (*bp != '\0' && *bp != '=' && *bp != ':')
4704         bp++;
4705       if (*bp == ':' || (globals && *bp == '='))
4706         make_tag (lb.buffer, bp - lb.buffer, TRUE,
4707                   lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
4708     }
4709 }
4710
4711 \f
4712 /*
4713  * Pascal parsing
4714  * Original code by Mosur K. Mohan (1989)
4715  *
4716  *  Locates tags for procedures & functions.  Doesn't do any type- or
4717  *  var-definitions.  It does look for the keyword "extern" or
4718  *  "forward" immediately following the procedure statement; if found,
4719  *  the tag is skipped.
4720  */
4721 static void
4722 Pascal_functions (inf)
4723      FILE *inf;
4724 {
4725   linebuffer tline;             /* mostly copied from C_entries */
4726   long save_lcno;
4727   int save_lineno, namelen, taglen;
4728   char c, *name;
4729
4730   bool                          /* each of these flags is TRUE iff: */
4731     incomment,                  /* point is inside a comment */
4732     inquote,                    /* point is inside '..' string */
4733     get_tagname,                /* point is after PROCEDURE/FUNCTION
4734                                    keyword, so next item = potential tag */
4735     found_tag,                  /* point is after a potential tag */
4736     inparms,                    /* point is within parameter-list */
4737     verify_tag;                 /* point has passed the parm-list, so the
4738                                    next token will determine whether this
4739                                    is a FORWARD/EXTERN to be ignored, or
4740                                    whether it is a real tag */
4741
4742   save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
4743   name = NULL;                  /* keep compiler quiet */
4744   dbp = lb.buffer;
4745   *dbp = '\0';
4746   linebuffer_init (&tline);
4747
4748   incomment = inquote = FALSE;
4749   found_tag = FALSE;            /* have a proc name; check if extern */
4750   get_tagname = FALSE;          /* found "procedure" keyword         */
4751   inparms = FALSE;              /* found '(' after "proc"            */
4752   verify_tag = FALSE;           /* check if "extern" is ahead        */
4753
4754
4755   while (!feof (inf))           /* long main loop to get next char */
4756     {
4757       c = *dbp++;
4758       if (c == '\0')            /* if end of line */
4759         {
4760           readline (&lb, inf);
4761           dbp = lb.buffer;
4762           if (*dbp == '\0')
4763             continue;
4764           if (!((found_tag && verify_tag)
4765                 || get_tagname))
4766             c = *dbp++;         /* only if don't need *dbp pointing
4767                                    to the beginning of the name of
4768                                    the procedure or function */
4769         }
4770       if (incomment)
4771         {
4772           if (c == '}')         /* within { } comments */
4773             incomment = FALSE;
4774           else if (c == '*' && *dbp == ')') /* within (* *) comments */
4775             {
4776               dbp++;
4777               incomment = FALSE;
4778             }
4779           continue;
4780         }
4781       else if (inquote)
4782         {
4783           if (c == '\'')
4784             inquote = FALSE;
4785           continue;
4786         }
4787       else
4788         switch (c)
4789           {
4790           case '\'':
4791             inquote = TRUE;     /* found first quote */
4792             continue;
4793           case '{':             /* found open { comment */
4794             incomment = TRUE;
4795             continue;
4796           case '(':
4797             if (*dbp == '*')    /* found open (* comment */
4798               {
4799                 incomment = TRUE;
4800                 dbp++;
4801               }
4802             else if (found_tag) /* found '(' after tag, i.e., parm-list */
4803               inparms = TRUE;
4804             continue;
4805           case ')':             /* end of parms list */
4806             if (inparms)
4807               inparms = FALSE;
4808             continue;
4809           case ';':
4810             if (found_tag && !inparms) /* end of proc or fn stmt */
4811               {
4812                 verify_tag = TRUE;
4813                 break;
4814               }
4815             continue;
4816           }
4817       if (found_tag && verify_tag && (*dbp != ' '))
4818         {
4819           /* Check if this is an "extern" declaration. */
4820           if (*dbp == '\0')
4821             continue;
4822           if (lowcase (*dbp == 'e'))
4823             {
4824               if (nocase_tail ("extern")) /* superfluous, really! */
4825                 {
4826                   found_tag = FALSE;
4827                   verify_tag = FALSE;
4828                 }
4829             }
4830           else if (lowcase (*dbp) == 'f')
4831             {
4832               if (nocase_tail ("forward")) /* check for forward reference */
4833                 {
4834                   found_tag = FALSE;
4835                   verify_tag = FALSE;
4836                 }
4837             }
4838           if (found_tag && verify_tag) /* not external proc, so make tag */
4839             {
4840               found_tag = FALSE;
4841               verify_tag = FALSE;
4842               make_tag (name, namelen, TRUE,
4843                         tline.buffer, taglen, save_lineno, save_lcno);
4844               continue;
4845             }
4846         }
4847       if (get_tagname)          /* grab name of proc or fn */
4848         {
4849           char *cp;
4850
4851           if (*dbp == '\0')
4852             continue;
4853
4854           /* Find block name. */
4855           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
4856             continue;
4857
4858           /* Save all values for later tagging. */
4859           linebuffer_setlen (&tline, lb.len);
4860           strcpy (tline.buffer, lb.buffer);
4861           save_lineno = lineno;
4862           save_lcno = linecharno;
4863           name = tline.buffer + (dbp - lb.buffer);
4864           namelen = cp - dbp;
4865           taglen = cp - lb.buffer + 1;
4866
4867           dbp = cp;             /* set dbp to e-o-token */
4868           get_tagname = FALSE;
4869           found_tag = TRUE;
4870           continue;
4871
4872           /* And proceed to check for "extern". */
4873         }
4874       else if (!incomment && !inquote && !found_tag)
4875         {
4876           /* Check for proc/fn keywords. */
4877           switch (lowcase (c))
4878             {
4879             case 'p':
4880               if (nocase_tail ("rocedure")) /* c = 'p', dbp has advanced */
4881                 get_tagname = TRUE;
4882               continue;
4883             case 'f':
4884               if (nocase_tail ("unction"))
4885                 get_tagname = TRUE;
4886               continue;
4887             }
4888         }
4889     } /* while not eof */
4890
4891   free (tline.buffer);
4892 }
4893
4894 \f
4895 /*
4896  * Lisp tag functions
4897  *  look for (def or (DEF, quote or QUOTE
4898  */
4899
4900 static void L_getit __P((void));
4901
4902 static void
4903 L_getit ()
4904 {
4905   if (*dbp == '\'')             /* Skip prefix quote */
4906     dbp++;
4907   else if (*dbp == '(')
4908   {
4909     dbp++;
4910     /* Try to skip "(quote " */
4911     if (!LOOKING_AT (dbp, "quote") && !LOOKING_AT (dbp, "QUOTE"))
4912       /* Ok, then skip "(" before name in (defstruct (foo)) */
4913       dbp = skip_spaces (dbp);
4914   }
4915   get_tag (dbp, NULL);
4916 }
4917
4918 static void
4919 Lisp_functions (inf)
4920      FILE *inf;
4921 {
4922   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4923     {
4924       if (dbp[0] != '(')
4925         continue;
4926
4927       if (strneq (dbp+1, "def", 3) || strneq (dbp+1, "DEF", 3))
4928         {
4929           dbp = skip_non_spaces (dbp);
4930           dbp = skip_spaces (dbp);
4931           L_getit ();
4932         }
4933       else
4934         {
4935           /* Check for (foo::defmumble name-defined ... */
4936           do
4937             dbp++;
4938           while (!notinname (*dbp) && *dbp != ':');
4939           if (*dbp == ':')
4940             {
4941               do
4942                 dbp++;
4943               while (*dbp == ':');
4944
4945               if (strneq (dbp, "def", 3) || strneq (dbp, "DEF", 3))
4946                 {
4947                   dbp = skip_non_spaces (dbp);
4948                   dbp = skip_spaces (dbp);
4949                   L_getit ();
4950                 }
4951             }
4952         }
4953     }
4954 }
4955
4956 \f
4957 /*
4958  * Lua script language parsing
4959  * Original code by David A. Capello <dacap@users.sourceforge.net> (2004)
4960  *
4961  *  "function" and "local function" are tags if they start at column 1.
4962  */
4963 static void
4964 Lua_functions (inf)
4965      FILE *inf;
4966 {
4967   register char *bp;
4968
4969   LOOP_ON_INPUT_LINES (inf, lb, bp)
4970     {
4971       if (bp[0] != 'f' && bp[0] != 'l')
4972         continue;
4973
4974       LOOKING_AT (bp, "local"); /* skip possible "local" */
4975
4976       if (LOOKING_AT (bp, "function"))
4977         get_tag (bp, NULL);
4978     }
4979 }
4980
4981 \f
4982 /*
4983  * Postscript tags
4984  * Just look for lines where the first character is '/'
4985  * Also look at "defineps" for PSWrap
4986  * Ideas by:
4987  *   Richard Mlynarik <mly@adoc.xerox.com> (1997)
4988  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
4989  */
4990 static void
4991 PS_functions (inf)
4992      FILE *inf;
4993 {
4994   register char *bp, *ep;
4995
4996   LOOP_ON_INPUT_LINES (inf, lb, bp)
4997     {
4998       if (bp[0] == '/')
4999         {
5000           for (ep = bp+1;
5001                *ep != '\0' && *ep != ' ' && *ep != '{';
5002                ep++)
5003             continue;
5004           make_tag (bp, ep - bp, TRUE,
5005                     lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
5006         }
5007       else if (LOOKING_AT (bp, "defineps"))
5008         get_tag (bp, NULL);
5009     }
5010 }
5011
5012 \f
5013 /*
5014  * Forth tags
5015  * Ignore anything after \ followed by space or in ( )
5016  * Look for words defined by :
5017  * Look for constant, code, create, defer, value, and variable
5018  * OBP extensions:  Look for buffer:, field,
5019  * Ideas by Eduardo Horvath <eeh@netbsd.org> (2004)
5020  */
5021 static void
5022 Forth_words (inf)
5023      FILE *inf;
5024 {
5025   register char *bp;
5026
5027   LOOP_ON_INPUT_LINES (inf, lb, bp)
5028     while ((bp = skip_spaces (bp))[0] != '\0')
5029       if (bp[0] == '\\' && iswhite(bp[1]))
5030         break;                  /* read next line */
5031       else if (bp[0] == '(' && iswhite(bp[1]))
5032         do                      /* skip to ) or eol */
5033           bp++;
5034         while (*bp != ')' && *bp != '\0');
5035       else if ((bp[0] == ':' && iswhite(bp[1]) && bp++)
5036                || LOOKING_AT_NOCASE (bp, "constant")
5037                || LOOKING_AT_NOCASE (bp, "code")
5038                || LOOKING_AT_NOCASE (bp, "create")
5039                || LOOKING_AT_NOCASE (bp, "defer")
5040                || LOOKING_AT_NOCASE (bp, "value")
5041                || LOOKING_AT_NOCASE (bp, "variable")
5042                || LOOKING_AT_NOCASE (bp, "buffer:")
5043                || LOOKING_AT_NOCASE (bp, "field"))
5044         get_tag (skip_spaces (bp), NULL); /* Yay!  A definition! */
5045       else
5046         bp = skip_non_spaces (bp);
5047 }
5048
5049 \f
5050 /*
5051  * Scheme tag functions
5052  * look for (def... xyzzy
5053  *          (def... (xyzzy
5054  *          (def ... ((...(xyzzy ....
5055  *          (set! xyzzy
5056  * Original code by Ken Haase (1985?)
5057  */
5058 static void
5059 Scheme_functions (inf)
5060      FILE *inf;
5061 {
5062   register char *bp;
5063
5064   LOOP_ON_INPUT_LINES (inf, lb, bp)
5065     {
5066       if (strneq (bp, "(def", 4) || strneq (bp, "(DEF", 4))
5067         {
5068           bp = skip_non_spaces (bp+4);
5069           /* Skip over open parens and white space */
5070           while (notinname (*bp))
5071             bp++;
5072           get_tag (bp, NULL);
5073         }
5074       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
5075         get_tag (bp, NULL);
5076     }
5077 }
5078
5079 \f
5080 /* Find tags in TeX and LaTeX input files.  */
5081
5082 /* TEX_toktab is a table of TeX control sequences that define tags.
5083  * Each entry records one such control sequence.
5084  *
5085  * Original code from who knows whom.
5086  * Ideas by:
5087  *   Stefan Monnier (2002)
5088  */
5089
5090 static linebuffer *TEX_toktab = NULL; /* Table with tag tokens */
5091
5092 /* Default set of control sequences to put into TEX_toktab.
5093    The value of environment var TEXTAGS is prepended to this.  */
5094 static char *TEX_defenv = "\
5095 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
5096 :part:appendix:entry:index:def\
5097 :newcommand:renewcommand:newenvironment:renewenvironment";
5098
5099 static void TEX_mode __P((FILE *));
5100 static void TEX_decode_env __P((char *, char *));
5101
5102 static char TEX_esc = '\\';
5103 static char TEX_opgrp = '{';
5104 static char TEX_clgrp = '}';
5105
5106 /*
5107  * TeX/LaTeX scanning loop.
5108  */
5109 static void
5110 TeX_commands (inf)
5111      FILE *inf;
5112 {
5113   char *cp;
5114   linebuffer *key;
5115
5116   /* Select either \ or ! as escape character.  */
5117   TEX_mode (inf);
5118
5119   /* Initialize token table once from environment. */
5120   if (TEX_toktab == NULL)
5121     TEX_decode_env ("TEXTAGS", TEX_defenv);
5122
5123   LOOP_ON_INPUT_LINES (inf, lb, cp)
5124     {
5125       /* Look at each TEX keyword in line. */
5126       for (;;)
5127         {
5128           /* Look for a TEX escape. */
5129           while (*cp++ != TEX_esc)
5130             if (cp[-1] == '\0' || cp[-1] == '%')
5131               goto tex_next_line;
5132
5133           for (key = TEX_toktab; key->buffer != NULL; key++)
5134             if (strneq (cp, key->buffer, key->len))
5135               {
5136                 register char *p;
5137                 int namelen, linelen;
5138                 bool opgrp = FALSE;
5139
5140                 cp = skip_spaces (cp + key->len);
5141                 if (*cp == TEX_opgrp)
5142                   {
5143                     opgrp = TRUE;
5144                     cp++;
5145                   }
5146                 for (p = cp;
5147                      (!iswhite (*p) && *p != '#' &&
5148                       *p != TEX_opgrp && *p != TEX_clgrp);
5149                      p++)
5150                   continue;
5151                 namelen = p - cp;
5152                 linelen = lb.len;
5153                 if (!opgrp || *p == TEX_clgrp)
5154                   {
5155                     while (*p != '\0' && *p != TEX_opgrp && *p != TEX_clgrp)
5156                       *p++;
5157                     linelen = p - lb.buffer + 1;
5158                   }
5159                 make_tag (cp, namelen, TRUE,
5160                           lb.buffer, linelen, lineno, linecharno);
5161                 goto tex_next_line; /* We only tag a line once */
5162               }
5163         }
5164     tex_next_line:
5165       ;
5166     }
5167 }
5168
5169 #define TEX_LESC '\\'
5170 #define TEX_SESC '!'
5171
5172 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
5173    chars accordingly. */
5174 static void
5175 TEX_mode (inf)
5176      FILE *inf;
5177 {
5178   int c;
5179
5180   while ((c = getc (inf)) != EOF)
5181     {
5182       /* Skip to next line if we hit the TeX comment char. */
5183       if (c == '%')
5184         while (c != '\n')
5185           c = getc (inf);
5186       else if (c == TEX_LESC || c == TEX_SESC )
5187         break;
5188     }
5189
5190   if (c == TEX_LESC)
5191     {
5192       TEX_esc = TEX_LESC;
5193       TEX_opgrp = '{';
5194       TEX_clgrp = '}';
5195     }
5196   else
5197     {
5198       TEX_esc = TEX_SESC;
5199       TEX_opgrp = '<';
5200       TEX_clgrp = '>';
5201     }
5202   /* If the input file is compressed, inf is a pipe, and rewind may fail.
5203      No attempt is made to correct the situation. */
5204   rewind (inf);
5205 }
5206
5207 /* Read environment and prepend it to the default string.
5208    Build token table. */
5209 static void
5210 TEX_decode_env (evarname, defenv)
5211      char *evarname;
5212      char *defenv;
5213 {
5214   register char *env, *p;
5215   int i, len;
5216
5217   /* Append default string to environment. */
5218   env = getenv (evarname);
5219   if (!env)
5220     env = defenv;
5221   else
5222     {
5223       char *oldenv = env;
5224       env = concat (oldenv, defenv, "");
5225     }
5226
5227   /* Allocate a token table */
5228   for (len = 1, p = env; p;)
5229     if ((p = etags_strchr (p, ':')) && *++p != '\0')
5230       len++;
5231   TEX_toktab = xnew (len, linebuffer);
5232
5233   /* Unpack environment string into token table. Be careful about */
5234   /* zero-length strings (leading ':', "::" and trailing ':') */
5235   for (i = 0; *env != '\0';)
5236     {
5237       p = etags_strchr (env, ':');
5238       if (!p)                   /* End of environment string. */
5239         p = env + strlen (env);
5240       if (p - env > 0)
5241         {                       /* Only non-zero strings. */
5242           TEX_toktab[i].buffer = savenstr (env, p - env);
5243           TEX_toktab[i].len = p - env;
5244           i++;
5245         }
5246       if (*p)
5247         env = p + 1;
5248       else
5249         {
5250           TEX_toktab[i].buffer = NULL; /* Mark end of table. */
5251           TEX_toktab[i].len = 0;
5252           break;
5253         }
5254     }
5255 }
5256
5257 \f
5258 /* Texinfo support.  Dave Love, Mar. 2000.  */
5259 static void
5260 Texinfo_nodes (inf)
5261      FILE * inf;
5262 {
5263   char *cp, *start;
5264   LOOP_ON_INPUT_LINES (inf, lb, cp)
5265     if (LOOKING_AT (cp, "@node"))
5266       {
5267         start = cp;
5268         while (*cp != '\0' && *cp != ',')
5269           cp++;
5270         make_tag (start, cp - start, TRUE,
5271                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
5272       }
5273 }
5274
5275 \f
5276 /*
5277  * HTML support.
5278  * Contents of <title>, <h1>, <h2>, <h3> are tags.
5279  * Contents of <a name=xxx> are tags with name xxx.
5280  *
5281  * Francesco Potortì, 2002.
5282  */
5283 static void
5284 HTML_labels (inf)
5285      FILE * inf;
5286 {
5287   bool getnext = FALSE;         /* next text outside of HTML tags is a tag */
5288   bool skiptag = FALSE;         /* skip to the end of the current HTML tag */
5289   bool intag = FALSE;           /* inside an html tag, looking for ID= */
5290   bool inanchor = FALSE;        /* when INTAG, is an anchor, look for NAME= */
5291   char *end;
5292
5293
5294   linebuffer_setlen (&token_name, 0); /* no name in buffer */
5295
5296   LOOP_ON_INPUT_LINES (inf, lb, dbp)
5297     for (;;)                    /* loop on the same line */
5298       {
5299         if (skiptag)            /* skip HTML tag */
5300           {
5301             while (*dbp != '\0' && *dbp != '>')
5302               dbp++;
5303             if (*dbp == '>')
5304               {
5305                 dbp += 1;
5306                 skiptag = FALSE;
5307                 continue;       /* look on the same line */
5308               }
5309             break;              /* go to next line */
5310           }
5311
5312         else if (intag) /* look for "name=" or "id=" */
5313           {
5314             while (*dbp != '\0' && *dbp != '>'
5315                    && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
5316               dbp++;
5317             if (*dbp == '\0')
5318               break;            /* go to next line */
5319             if (*dbp == '>')
5320               {
5321                 dbp += 1;
5322                 intag = FALSE;
5323                 continue;       /* look on the same line */
5324               }
5325             if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
5326                 || LOOKING_AT_NOCASE (dbp, "id="))
5327               {
5328                 bool quoted = (dbp[0] == '"');
5329
5330                 if (quoted)
5331                   for (end = ++dbp; *end != '\0' && *end != '"'; end++)
5332                     continue;
5333                 else
5334                   for (end = dbp; *end != '\0' && intoken (*end); end++)
5335                     continue;
5336                 linebuffer_setlen (&token_name, end - dbp);
5337                 strncpy (token_name.buffer, dbp, end - dbp);
5338                 token_name.buffer[end - dbp] = '\0';
5339
5340                 dbp = end;
5341                 intag = FALSE;  /* we found what we looked for */
5342                 skiptag = TRUE; /* skip to the end of the tag */
5343                 getnext = TRUE; /* then grab the text */
5344                 continue;       /* look on the same line */
5345               }
5346             dbp += 1;
5347           }
5348
5349         else if (getnext)       /* grab next tokens and tag them */
5350           {
5351             dbp = skip_spaces (dbp);
5352             if (*dbp == '\0')
5353               break;            /* go to next line */
5354             if (*dbp == '<')
5355               {
5356                 intag = TRUE;
5357                 inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
5358                 continue;       /* look on the same line */
5359               }
5360
5361             for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
5362               continue;
5363             make_tag (token_name.buffer, token_name.len, TRUE,
5364                       dbp, end - dbp, lineno, linecharno);
5365             linebuffer_setlen (&token_name, 0); /* no name in buffer */
5366             getnext = FALSE;
5367             break;              /* go to next line */
5368           }
5369
5370         else                    /* look for an interesting HTML tag */
5371           {
5372             while (*dbp != '\0' && *dbp != '<')
5373               dbp++;
5374             if (*dbp == '\0')
5375               break;            /* go to next line */
5376             intag = TRUE;
5377             if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
5378               {
5379                 inanchor = TRUE;
5380                 continue;       /* look on the same line */
5381               }
5382             else if (LOOKING_AT_NOCASE (dbp, "<title>")
5383                      || LOOKING_AT_NOCASE (dbp, "<h1>")
5384                      || LOOKING_AT_NOCASE (dbp, "<h2>")
5385                      || LOOKING_AT_NOCASE (dbp, "<h3>"))
5386               {
5387                 intag = FALSE;
5388                 getnext = TRUE;
5389                 continue;       /* look on the same line */
5390               }
5391             dbp += 1;
5392           }
5393       }
5394 }
5395
5396 \f
5397 /*
5398  * Prolog support
5399  *
5400  * Assumes that the predicate or rule starts at column 0.
5401  * Only the first clause of a predicate or rule is added.
5402  * Original code by Sunichirou Sugou (1989)
5403  * Rewritten by Anders Lindgren (1996)
5404  */
5405 static int prolog_pr __P((char *, char *));
5406 static void prolog_skip_comment __P((linebuffer *, FILE *));
5407 static int prolog_atom __P((char *, int));
5408
5409 static void
5410 Prolog_functions (inf)
5411      FILE *inf;
5412 {
5413   char *cp, *last;
5414   int len;
5415   int allocated;
5416
5417   allocated = 0;
5418   len = 0;
5419   last = NULL;
5420
5421   LOOP_ON_INPUT_LINES (inf, lb, cp)
5422     {
5423       if (cp[0] == '\0')        /* Empty line */
5424         continue;
5425       else if (iswhite (cp[0])) /* Not a predicate */
5426         continue;
5427       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
5428         prolog_skip_comment (&lb, inf);
5429       else if ((len = prolog_pr (cp, last)) > 0)
5430         {
5431           /* Predicate or rule.  Store the function name so that we
5432              only generate a tag for the first clause.  */
5433           if (last == NULL)
5434             last = xnew(len + 1, char);
5435           else if (len + 1 > allocated)
5436             xrnew (last, len + 1, char);
5437           allocated = len + 1;
5438           strncpy (last, cp, len);
5439           last[len] = '\0';
5440         }
5441     }
5442 }
5443
5444
5445 static void
5446 prolog_skip_comment (plb, inf)
5447      linebuffer *plb;
5448      FILE *inf;
5449 {
5450   char *cp;
5451
5452   do
5453     {
5454       for (cp = plb->buffer; *cp != '\0'; cp++)
5455         if (cp[0] == '*' && cp[1] == '/')
5456           return;
5457       readline (plb, inf);
5458     }
5459   while (!feof(inf));
5460 }
5461
5462 /*
5463  * A predicate or rule definition is added if it matches:
5464  *     <beginning of line><Prolog Atom><whitespace>(
5465  * or  <beginning of line><Prolog Atom><whitespace>:-
5466  *
5467  * It is added to the tags database if it doesn't match the
5468  * name of the previous clause header.
5469  *
5470  * Return the size of the name of the predicate or rule, or 0 if no
5471  * header was found.
5472  */
5473 static int
5474 prolog_pr (s, last)
5475      char *s;
5476      char *last;                /* Name of last clause. */
5477 {
5478   int pos;
5479   int len;
5480
5481   pos = prolog_atom (s, 0);
5482   if (pos < 1)
5483     return 0;
5484
5485   len = pos;
5486   pos = skip_spaces (s + pos) - s;
5487
5488   if ((s[pos] == '.'
5489        || (s[pos] == '(' && (pos += 1))
5490        || (s[pos] == ':' && s[pos + 1] == '-' && (pos += 2)))
5491       && (last == NULL          /* save only the first clause */
5492           || len != (int)strlen (last)
5493           || !strneq (s, last, len)))
5494         {
5495           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5496           return len;
5497         }
5498   else
5499     return 0;
5500 }
5501
5502 /*
5503  * Consume a Prolog atom.
5504  * Return the number of bytes consumed, or -1 if there was an error.
5505  *
5506  * A prolog atom, in this context, could be one of:
5507  * - An alphanumeric sequence, starting with a lower case letter.
5508  * - A quoted arbitrary string. Single quotes can escape themselves.
5509  *   Backslash quotes everything.
5510  */
5511 static int
5512 prolog_atom (s, pos)
5513      char *s;
5514      int pos;
5515 {
5516   int origpos;
5517
5518   origpos = pos;
5519
5520   if (ISLOWER(s[pos]) || (s[pos] == '_'))
5521     {
5522       /* The atom is unquoted. */
5523       pos++;
5524       while (ISALNUM(s[pos]) || (s[pos] == '_'))
5525         {
5526           pos++;
5527         }
5528       return pos - origpos;
5529     }
5530   else if (s[pos] == '\'')
5531     {
5532       pos++;
5533
5534       for (;;)
5535         {
5536           if (s[pos] == '\'')
5537             {
5538               pos++;
5539               if (s[pos] != '\'')
5540                 break;
5541               pos++;            /* A double quote */
5542             }
5543           else if (s[pos] == '\0')
5544             /* Multiline quoted atoms are ignored. */
5545             return -1;
5546           else if (s[pos] == '\\')
5547             {
5548               if (s[pos+1] == '\0')
5549                 return -1;
5550               pos += 2;
5551             }
5552           else
5553             pos++;
5554         }
5555       return pos - origpos;
5556     }
5557   else
5558     return -1;
5559 }
5560
5561 \f
5562 /*
5563  * Support for Erlang
5564  *
5565  * Generates tags for functions, defines, and records.
5566  * Assumes that Erlang functions start at column 0.
5567  * Original code by Anders Lindgren (1996)
5568  */
5569 static int erlang_func __P((char *, char *));
5570 static void erlang_attribute __P((char *));
5571 static int erlang_atom __P((char *));
5572
5573 static void
5574 Erlang_functions (inf)
5575      FILE *inf;
5576 {
5577   char *cp, *last;
5578   int len;
5579   int allocated;
5580
5581   allocated = 0;
5582   len = 0;
5583   last = NULL;
5584
5585   LOOP_ON_INPUT_LINES (inf, lb, cp)
5586     {
5587       if (cp[0] == '\0')        /* Empty line */
5588         continue;
5589       else if (iswhite (cp[0])) /* Not function nor attribute */
5590         continue;
5591       else if (cp[0] == '%')    /* comment */
5592         continue;
5593       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
5594         continue;
5595       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
5596         {
5597           erlang_attribute (cp);
5598           last = NULL;
5599         }
5600       else if ((len = erlang_func (cp, last)) > 0)
5601         {
5602           /*
5603            * Function.  Store the function name so that we only
5604            * generates a tag for the first clause.
5605            */
5606           if (last == NULL)
5607             last = xnew (len + 1, char);
5608           else if (len + 1 > allocated)
5609             xrnew (last, len + 1, char);
5610           allocated = len + 1;
5611           strncpy (last, cp, len);
5612           last[len] = '\0';
5613         }
5614     }
5615 }
5616
5617
5618 /*
5619  * A function definition is added if it matches:
5620  *     <beginning of line><Erlang Atom><whitespace>(
5621  *
5622  * It is added to the tags database if it doesn't match the
5623  * name of the previous clause header.
5624  *
5625  * Return the size of the name of the function, or 0 if no function
5626  * was found.
5627  */
5628 static int
5629 erlang_func (s, last)
5630      char *s;
5631      char *last;                /* Name of last clause. */
5632 {
5633   int pos;
5634   int len;
5635
5636   pos = erlang_atom (s);
5637   if (pos < 1)
5638     return 0;
5639
5640   len = pos;
5641   pos = skip_spaces (s + pos) - s;
5642
5643   /* Save only the first clause. */
5644   if (s[pos++] == '('
5645       && (last == NULL
5646           || len != (int)strlen (last)
5647           || !strneq (s, last, len)))
5648         {
5649           make_tag (s, len, TRUE, s, pos, lineno, linecharno);
5650           return len;
5651         }
5652
5653   return 0;
5654 }
5655
5656
5657 /*
5658  * Handle attributes.  Currently, tags are generated for defines
5659  * and records.
5660  *
5661  * They are on the form:
5662  * -define(foo, bar).
5663  * -define(Foo(M, N), M+N).
5664  * -record(graph, {vtab = notable, cyclic = true}).
5665  */
5666 static void
5667 erlang_attribute (s)
5668      char *s;
5669 {
5670   char *cp = s;
5671
5672   if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
5673       && *cp++ == '(')
5674     {
5675       int len = erlang_atom (skip_spaces (cp));
5676       if (len > 0)
5677         make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
5678     }
5679   return;
5680 }
5681
5682
5683 /*
5684  * Consume an Erlang atom (or variable).
5685  * Return the number of bytes consumed, or -1 if there was an error.
5686  */
5687 static int
5688 erlang_atom (s)
5689      char *s;
5690 {
5691   int pos = 0;
5692
5693   if (ISALPHA (s[pos]) || s[pos] == '_')
5694     {
5695       /* The atom is unquoted. */
5696       do
5697         pos++;
5698       while (ISALNUM (s[pos]) || s[pos] == '_');
5699     }
5700   else if (s[pos] == '\'')
5701     {
5702       for (pos++; s[pos] != '\''; pos++)
5703         if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
5704             || (s[pos] == '\\' && s[++pos] == '\0'))
5705           return 0;
5706       pos++;
5707     }
5708
5709   return pos;
5710 }
5711
5712 \f
5713 #ifdef ETAGS_REGEXPS
5714
5715 static char *scan_separators __P((char *));
5716 static void add_regex __P((char *, language *));
5717 static char *substitute __P((char *, char *, struct re_registers *));
5718
5719 /*
5720  * Take a string like "/blah/" and turn it into "blah", verifying
5721  * that the first and last characters are the same, and handling
5722  * quoted separator characters.  Actually, stops on the occurrence of
5723  * an unquoted separator.  Also process \t, \n, etc. and turn into
5724  * appropriate characters. Works in place.  Null terminates name string.
5725  * Returns pointer to terminating separator, or NULL for
5726  * unterminated regexps.
5727  */
5728 static char *
5729 scan_separators (name)
5730      char *name;
5731 {
5732   char sep = name[0];
5733   char *copyto = name;
5734   bool quoted = FALSE;
5735
5736   for (++name; *name != '\0'; ++name)
5737     {
5738       if (quoted)
5739         {
5740           switch (*name)
5741             {
5742             case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
5743             case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
5744             case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
5745             case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
5746             case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
5747             case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
5748             case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
5749             case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
5750             case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
5751             default:
5752               if (*name == sep)
5753                 *copyto++ = sep;
5754               else
5755                 {
5756                   /* Something else is quoted, so preserve the quote. */
5757                   *copyto++ = '\\';
5758                   *copyto++ = *name;
5759                 }
5760               break;
5761             }
5762           quoted = FALSE;
5763         }
5764       else if (*name == '\\')
5765         quoted = TRUE;
5766       else if (*name == sep)
5767         break;
5768       else
5769         *copyto++ = *name;
5770     }
5771   if (*name != sep)
5772     name = NULL;                /* signal unterminated regexp */
5773
5774   /* Terminate copied string. */
5775   *copyto = '\0';
5776   return name;
5777 }
5778
5779 /* Look at the argument of --regex or --no-regex and do the right
5780    thing.  Same for each line of a regexp file. */
5781 static void
5782 analyse_regex (regex_arg)
5783      char *regex_arg;
5784 {
5785   if (regex_arg == NULL)
5786     {
5787       free_regexps ();          /* --no-regex: remove existing regexps */
5788       return;
5789     }
5790
5791   /* A real --regexp option or a line in a regexp file. */
5792   switch (regex_arg[0])
5793     {
5794       /* Comments in regexp file or null arg to --regex. */
5795     case '\0':
5796     case ' ':
5797     case '\t':
5798       break;
5799
5800       /* Read a regex file.  This is recursive and may result in a
5801          loop, which will stop when the file descriptors are exhausted. */
5802     case '@':
5803       {
5804         FILE *regexfp;
5805         linebuffer regexbuf;
5806         char *regexfile = regex_arg + 1;
5807
5808         /* regexfile is a file containing regexps, one per line. */
5809         regexfp = fopen (regexfile, "r");
5810         if (regexfp == NULL)
5811           {
5812             pfatal (regexfile);
5813             return;
5814           }
5815         linebuffer_init (&regexbuf);
5816         while (readline_internal (&regexbuf, regexfp) > 0)
5817           analyse_regex (regexbuf.buffer);
5818         free (regexbuf.buffer);
5819         fclose (regexfp);
5820       }
5821       break;
5822
5823       /* Regexp to be used for a specific language only. */
5824     case '{':
5825       {
5826         language *lang;
5827         char *lang_name = regex_arg + 1;
5828         char *cp;
5829
5830         for (cp = lang_name; *cp != '}'; cp++)
5831           if (*cp == '\0')
5832             {
5833               error ("unterminated language name in regex: %s", regex_arg);
5834               return;
5835             }
5836         *cp++ = '\0';
5837         lang = get_language_from_langname (lang_name);
5838         if (lang == NULL)
5839           return;
5840         add_regex (cp, lang);
5841       }
5842       break;
5843
5844       /* Regexp to be used for any language. */
5845     default:
5846       add_regex (regex_arg, NULL);
5847       break;
5848     }
5849 }
5850
5851 /* Separate the regexp pattern, compile it,
5852    and care for optional name and modifiers. */
5853 static void
5854 add_regex (regexp_pattern, lang)
5855      char *regexp_pattern;
5856      language *lang;
5857 {
5858   static struct re_pattern_buffer zeropattern;
5859   char sep, *pat, *name, *modifiers;
5860   const char *err;
5861   struct re_pattern_buffer *patbuf;
5862   regexp *rp;
5863   bool
5864     force_explicit_name = TRUE, /* do not use implicit tag names */
5865     ignore_case = FALSE,        /* case is significant */
5866     multi_line = FALSE,         /* matches are done one line at a time */
5867     single_line = FALSE;        /* dot does not match newline */
5868
5869
5870   if (strlen(regexp_pattern) < 3)
5871     {
5872       error ("null regexp", (char *)NULL);
5873       return;
5874     }
5875   sep = regexp_pattern[0];
5876   name = scan_separators (regexp_pattern);
5877   if (name == NULL)
5878     {
5879       error ("%s: unterminated regexp", regexp_pattern);
5880       return;
5881     }
5882   if (name[1] == sep)
5883     {
5884       error ("null name for regexp \"%s\"", regexp_pattern);
5885       return;
5886     }
5887   modifiers = scan_separators (name);
5888   if (modifiers == NULL)        /* no terminating separator --> no name */
5889     {
5890       modifiers = name;
5891       name = "";
5892     }
5893   else
5894     modifiers += 1;             /* skip separator */
5895
5896   /* Parse regex modifiers. */
5897   for (; modifiers[0] != '\0'; modifiers++)
5898     switch (modifiers[0])
5899       {
5900       case 'N':
5901         if (modifiers == name)
5902           error ("forcing explicit tag name but no name, ignoring", NULL);
5903         force_explicit_name = TRUE;
5904         break;
5905       case 'i':
5906         ignore_case = TRUE;
5907         break;
5908       case 's':
5909         single_line = TRUE;
5910         /* FALLTHRU */
5911       case 'm':
5912         multi_line = TRUE;
5913         need_filebuf = TRUE;
5914         break;
5915       default:
5916         {
5917           char wrongmod [2];
5918           wrongmod[0] = modifiers[0];
5919           wrongmod[1] = '\0';
5920           error ("invalid regexp modifier `%s', ignoring", wrongmod);
5921         }
5922         break;
5923       }
5924
5925   patbuf = xnew (1, struct re_pattern_buffer);
5926   *patbuf = zeropattern;
5927   if (ignore_case)
5928     {
5929       static char lc_trans[CHARS];
5930       int i;
5931       for (i = 0; i < CHARS; i++)
5932         lc_trans[i] = lowcase (i);
5933       patbuf->translate = lc_trans;     /* translation table to fold case  */
5934     }
5935
5936   if (multi_line)
5937     pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
5938   else
5939     pat = regexp_pattern;
5940
5941   if (single_line)
5942     re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
5943   else
5944     re_set_syntax (RE_SYNTAX_EMACS);
5945
5946   err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
5947   if (multi_line)
5948     free (pat);
5949   if (err != NULL)
5950     {
5951       error ("%s while compiling pattern", err);
5952       return;
5953     }
5954
5955   rp = p_head;
5956   p_head = xnew (1, regexp);
5957   p_head->pattern = savestr (regexp_pattern);
5958   p_head->p_next = rp;
5959   p_head->lang = lang;
5960   p_head->pat = patbuf;
5961   p_head->name = savestr (name);
5962   p_head->error_signaled = FALSE;
5963   p_head->force_explicit_name = force_explicit_name;
5964   p_head->ignore_case = ignore_case;
5965   p_head->multi_line = multi_line;
5966 }
5967
5968 /*
5969  * Do the substitutions indicated by the regular expression and
5970  * arguments.
5971  */
5972 static char *
5973 substitute (in, out, regs)
5974      char *in, *out;
5975      struct re_registers *regs;
5976 {
5977   char *result, *t;
5978   int size, dig, diglen;
5979
5980   result = NULL;
5981   size = strlen (out);
5982
5983   /* Pass 1: figure out how much to allocate by finding all \N strings. */
5984   if (out[size - 1] == '\\')
5985     fatal ("pattern error in \"%s\"", out);
5986   for (t = etags_strchr (out, '\\');
5987        t != NULL;
5988        t = etags_strchr (t + 2, '\\'))
5989     if (ISDIGIT (t[1]))
5990       {
5991         dig = t[1] - '0';
5992         diglen = regs->end[dig] - regs->start[dig];
5993         size += diglen - 2;
5994       }
5995     else
5996       size -= 1;
5997
5998   /* Allocate space and do the substitutions. */
5999   assert (size >= 0);
6000   result = xnew (size + 1, char);
6001
6002   for (t = result; *out != '\0'; out++)
6003     if (*out == '\\' && ISDIGIT (*++out))
6004       {
6005         dig = *out - '0';
6006         diglen = regs->end[dig] - regs->start[dig];
6007         strncpy (t, in + regs->start[dig], diglen);
6008         t += diglen;
6009       }
6010     else
6011       *t++ = *out;
6012   *t = '\0';
6013
6014   assert (t <= result + size);
6015   assert (t - result == (int)strlen (result));
6016
6017   return result;
6018 }
6019
6020 /* Deallocate all regexps. */
6021 static void
6022 free_regexps ()
6023 {
6024   regexp *rp;
6025   while (p_head != NULL)
6026     {
6027       rp = p_head->p_next;
6028       free (p_head->pattern);
6029       free (p_head->name);
6030       free (p_head);
6031       p_head = rp;
6032     }
6033   return;
6034 }
6035
6036 /*
6037  * Reads the whole file as a single string from `filebuf' and looks for
6038  * multi-line regular expressions, creating tags on matches.
6039  * readline already dealt with normal regexps.
6040  *
6041  * Idea by Ben Wing <ben@666.com> (2002).
6042  */
6043 static void
6044 regex_tag_multiline ()
6045 {
6046   char *buffer = filebuf.buffer;
6047   regexp *rp;
6048   char *name;
6049
6050   for (rp = p_head; rp != NULL; rp = rp->p_next)
6051     {
6052       int match = 0;
6053
6054       if (!rp->multi_line)
6055         continue;               /* skip normal regexps */
6056
6057       /* Generic initialisations before parsing file from memory. */
6058       lineno = 1;               /* reset global line number */
6059       charno = 0;               /* reset global char number */
6060       linecharno = 0;           /* reset global char number of line start */
6061
6062       /* Only use generic regexps or those for the current language. */
6063       if (rp->lang != NULL && rp->lang != curfdp->lang)
6064         continue;
6065
6066       while (match >= 0 && match < filebuf.len)
6067         {
6068           match = re_search (rp->pat, buffer, filebuf.len, charno,
6069                              filebuf.len - match, &rp->regs);
6070           switch (match)
6071             {
6072             case -2:
6073               /* Some error. */
6074               if (!rp->error_signaled)
6075                 {
6076                   error ("regexp stack overflow while matching \"%s\"",
6077                          rp->pattern);
6078                   rp->error_signaled = TRUE;
6079                 }
6080               break;
6081             case -1:
6082               /* No match. */
6083               break;
6084             default:
6085               if (match == rp->regs.end[0])
6086                 {
6087                   if (!rp->error_signaled)
6088                     {
6089                       error ("regexp matches the empty string: \"%s\"",
6090                              rp->pattern);
6091                       rp->error_signaled = TRUE;
6092                     }
6093                   match = -3;   /* exit from while loop */
6094                   break;
6095                 }
6096
6097               /* Match occurred.  Construct a tag. */
6098               while (charno < rp->regs.end[0])
6099                 if (buffer[charno++] == '\n')
6100                   lineno++, linecharno = charno;
6101               name = rp->name;
6102               if (name[0] == '\0')
6103                 name = NULL;
6104               else /* make a named tag */
6105                 name = substitute (buffer, rp->name, &rp->regs);
6106               if (rp->force_explicit_name)
6107                 /* Force explicit tag name, if a name is there. */
6108                 pfnote (name, TRUE, buffer + linecharno,
6109                         charno - linecharno + 1, lineno, linecharno);
6110               else
6111                 make_tag (name, strlen (name), TRUE, buffer + linecharno,
6112                           charno - linecharno + 1, lineno, linecharno);
6113               break;
6114             }
6115         }
6116     }
6117 }
6118
6119 #endif /* ETAGS_REGEXPS */
6120
6121 \f
6122 static bool
6123 nocase_tail (cp)
6124      char *cp;
6125 {
6126   register int len = 0;
6127
6128   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
6129     cp++, len++;
6130   if (*cp == '\0' && !intoken (dbp[len]))
6131     {
6132       dbp += len;
6133       return TRUE;
6134     }
6135   return FALSE;
6136 }
6137
6138 static void
6139 get_tag (bp, namepp)
6140      register char *bp;
6141      char **namepp;
6142 {
6143   register char *cp = bp;
6144
6145   if (*bp != '\0')
6146     {
6147       /* Go till you get to white space or a syntactic break */
6148       for (cp = bp + 1; !notinname (*cp); cp++)
6149         continue;
6150       make_tag (bp, cp - bp, TRUE,
6151                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
6152     }
6153
6154   if (namepp != NULL)
6155     *namepp = savenstr (bp, cp - bp);
6156 }
6157
6158 /*
6159  * Read a line of text from `stream' into `lbp', excluding the
6160  * newline or CR-NL, if any.  Return the number of characters read from
6161  * `stream', which is the length of the line including the newline.
6162  *
6163  * On DOS or Windows we do not count the CR character, if any before the
6164  * NL, in the returned length; this mirrors the behavior of Emacs on those
6165  * platforms (for text files, it translates CR-NL to NL as it reads in the
6166  * file).
6167  *
6168  * If multi-line regular expressions are requested, each line read is
6169  * appended to `filebuf'.
6170  */
6171 static long
6172 readline_internal (lbp, stream)
6173      linebuffer *lbp;
6174      register FILE *stream;
6175 {
6176   char *buffer = lbp->buffer;
6177   register char *p = lbp->buffer;
6178   register char *pend;
6179   int chars_deleted;
6180
6181   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
6182
6183   for (;;)
6184     {
6185       register int c = getc (stream);
6186       if (p == pend)
6187         {
6188           /* We're at the end of linebuffer: expand it. */
6189           lbp->size *= 2;
6190           xrnew (buffer, lbp->size, char);
6191           p += buffer - lbp->buffer;
6192           pend = buffer + lbp->size;
6193           lbp->buffer = buffer;
6194         }
6195       if (c == EOF)
6196         {
6197           *p = '\0';
6198           chars_deleted = 0;
6199           break;
6200         }
6201       if (c == '\n')
6202         {
6203           if (p > buffer && p[-1] == '\r')
6204             {
6205               p -= 1;
6206 #ifdef DOS_NT
6207              /* Assume CRLF->LF translation will be performed by Emacs
6208                 when loading this file, so CRs won't appear in the buffer.
6209                 It would be cleaner to compensate within Emacs;
6210                 however, Emacs does not know how many CRs were deleted
6211                 before any given point in the file.  */
6212               chars_deleted = 1;
6213 #else
6214               chars_deleted = 2;
6215 #endif
6216             }
6217           else
6218             {
6219               chars_deleted = 1;
6220             }
6221           *p = '\0';
6222           break;
6223         }
6224       *p++ = c;
6225     }
6226   lbp->len = p - buffer;
6227
6228   if (need_filebuf              /* we need filebuf for multi-line regexps */
6229       && chars_deleted > 0)     /* not at EOF */
6230     {
6231       while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
6232         {
6233           /* Expand filebuf. */
6234           filebuf.size *= 2;
6235           xrnew (filebuf.buffer, filebuf.size, char);
6236         }
6237       strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
6238       filebuf.len += lbp->len;
6239       filebuf.buffer[filebuf.len++] = '\n';
6240       filebuf.buffer[filebuf.len] = '\0';
6241     }
6242
6243   return lbp->len + chars_deleted;
6244 }
6245
6246 /*
6247  * Like readline_internal, above, but in addition try to match the
6248  * input line against relevant regular expressions and manage #line
6249  * directives.
6250  */
6251 static void
6252 readline (lbp, stream)
6253      linebuffer *lbp;
6254      FILE *stream;
6255 {
6256   long result;
6257
6258   linecharno = charno;          /* update global char number of line start */
6259   result = readline_internal (lbp, stream); /* read line */
6260   lineno += 1;                  /* increment global line number */
6261   charno += result;             /* increment global char number */
6262
6263   /* Honour #line directives. */
6264   if (!no_line_directive)
6265     {
6266       static bool discard_until_line_directive;
6267
6268       /* Check whether this is a #line directive. */
6269       if (result > 12 && strneq (lbp->buffer, "#line ", 6))
6270         {
6271           int start, lno;
6272
6273           if (DEBUG) start = 0; /* shut up the compiler */
6274           if (sscanf (lbp->buffer, "#line %d \"%n", &lno, &start) == 1)
6275             {
6276               char *endp = lbp->buffer + start;
6277
6278               assert (start > 0);
6279               while ((endp = etags_strchr (endp, '"')) != NULL
6280                      && endp[-1] == '\\')
6281                 endp++;
6282               if (endp != NULL)
6283                 /* Ok, this is a real #line directive.  Let's deal with it. */
6284                 {
6285                   char *taggedabsname;  /* absolute name of original file */
6286                   char *taggedfname;    /* name of original file as given */
6287                   char *name;           /* temp var */
6288
6289                   discard_until_line_directive = FALSE; /* found it */
6290                   name = lbp->buffer + start;
6291                   *endp = '\0';
6292                   canonicalize_filename (name); /* for DOS */
6293                   taggedabsname = absolute_filename (name, curfdp->infabsdir);
6294                   if (filename_is_absolute (name)
6295                       || filename_is_absolute (curfdp->infname))
6296                     taggedfname = savestr (taggedabsname);
6297                   else
6298                     taggedfname = relative_filename (taggedabsname,tagfiledir);
6299
6300                   if (streq (curfdp->taggedfname, taggedfname))
6301                     /* The #line directive is only a line number change.  We
6302                        deal with this afterwards. */
6303                     free (taggedfname);
6304                   else
6305                     /* The tags following this #line directive should be
6306                        attributed to taggedfname.  In order to do this, set
6307                        curfdp accordingly. */
6308                     {
6309                       fdesc *fdp; /* file description pointer */
6310
6311                       /* Go look for a file description already set up for the
6312                          file indicated in the #line directive.  If there is
6313                          one, use it from now until the next #line
6314                          directive. */
6315                       for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6316                         if (streq (fdp->infname, curfdp->infname)
6317                             && streq (fdp->taggedfname, taggedfname))
6318                           /* If we remove the second test above (after the &&)
6319                              then all entries pertaining to the same file are
6320                              coalesced in the tags file.  If we use it, then
6321                              entries pertaining to the same file but generated
6322                              from different files (via #line directives) will
6323                              go into separate sections in the tags file.  These
6324                              alternatives look equivalent.  The first one
6325                              destroys some apparently useless information. */
6326                           {
6327                             curfdp = fdp;
6328                             free (taggedfname);
6329                             break;
6330                           }
6331                       /* Else, if we already tagged the real file, skip all
6332                          input lines until the next #line directive. */
6333                       if (fdp == NULL) /* not found */
6334                         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
6335                           if (streq (fdp->infabsname, taggedabsname))
6336                             {
6337                               discard_until_line_directive = TRUE;
6338                               free (taggedfname);
6339                               break;
6340                             }
6341                       /* Else create a new file description and use that from
6342                          now on, until the next #line directive. */
6343                       if (fdp == NULL) /* not found */
6344                         {
6345                           fdp = fdhead;
6346                           fdhead = xnew (1, fdesc);
6347                           *fdhead = *curfdp; /* copy curr. file description */
6348                           fdhead->next = fdp;
6349                           fdhead->infname = savestr (curfdp->infname);
6350                           fdhead->infabsname = savestr (curfdp->infabsname);
6351                           fdhead->infabsdir = savestr (curfdp->infabsdir);
6352                           fdhead->taggedfname = taggedfname;
6353                           fdhead->usecharno = FALSE;
6354                           fdhead->prop = NULL;
6355                           fdhead->written = FALSE;
6356                           curfdp = fdhead;
6357                         }
6358                     }
6359                   free (taggedabsname);
6360                   lineno = lno - 1;
6361                   readline (lbp, stream);
6362                   return;
6363                 } /* if a real #line directive */
6364             } /* if #line is followed by a a number */
6365         } /* if line begins with "#line " */
6366
6367       /* If we are here, no #line directive was found. */
6368       if (discard_until_line_directive)
6369         {
6370           if (result > 0)
6371             {
6372               /* Do a tail recursion on ourselves, thus discarding the contents
6373                  of the line buffer. */
6374               readline (lbp, stream);
6375               return;
6376             }
6377           /* End of file. */
6378           discard_until_line_directive = FALSE;
6379           return;
6380         }
6381     } /* if #line directives should be considered */
6382
6383 #ifdef ETAGS_REGEXPS
6384   {
6385     int match;
6386     regexp *rp;
6387     char *name;
6388
6389     /* Match against relevant regexps. */
6390     if (lbp->len > 0)
6391       for (rp = p_head; rp != NULL; rp = rp->p_next)
6392         {
6393           /* Only use generic regexps or those for the current language.
6394              Also do not use multiline regexps, which is the job of
6395              regex_tag_multiline. */
6396           if ((rp->lang != NULL && rp->lang != fdhead->lang)
6397               || rp->multi_line)
6398             continue;
6399
6400           match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
6401           switch (match)
6402             {
6403             case -2:
6404               /* Some error. */
6405               if (!rp->error_signaled)
6406                 {
6407                   error ("regexp stack overflow while matching \"%s\"",
6408                          rp->pattern);
6409                   rp->error_signaled = TRUE;
6410                 }
6411               break;
6412             case -1:
6413               /* No match. */
6414               break;
6415             case 0:
6416               /* Empty string matched. */
6417               if (!rp->error_signaled)
6418                 {
6419                   error ("regexp matches the empty string: \"%s\"", rp->pattern);
6420                   rp->error_signaled = TRUE;
6421                 }
6422               break;
6423             default:
6424               /* Match occurred.  Construct a tag. */
6425               name = rp->name;
6426               if (name[0] == '\0')
6427                 name = NULL;
6428               else /* make a named tag */
6429                 name = substitute (lbp->buffer, rp->name, &rp->regs);
6430               if (rp->force_explicit_name)
6431                 /* Force explicit tag name, if a name is there. */
6432                 pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
6433               else
6434                 make_tag (name, strlen (name), TRUE,
6435                           lbp->buffer, match, lineno, linecharno);
6436               break;
6437             }
6438         }
6439   }
6440 #endif /* ETAGS_REGEXPS */
6441 }
6442
6443 \f
6444 /*
6445  * Return a pointer to a space of size strlen(cp)+1 allocated
6446  * with xnew where the string CP has been copied.
6447  */
6448 static char *
6449 savestr (cp)
6450      char *cp;
6451 {
6452   return savenstr (cp, strlen (cp));
6453 }
6454
6455 /*
6456  * Return a pointer to a space of size LEN+1 allocated with xnew where
6457  * the string CP has been copied for at most the first LEN characters.
6458  */
6459 static char *
6460 savenstr (cp, len)
6461      char *cp;
6462      int len;
6463 {
6464   register char *dp;
6465
6466   dp = xnew (len + 1, char);
6467   strncpy (dp, cp, len);
6468   dp[len] = '\0';
6469   return dp;
6470 }
6471
6472 /*
6473  * Return the ptr in sp at which the character c last
6474  * appears; NULL if not found
6475  *
6476  * Identical to POSIX strrchr, included for portability.
6477  */
6478 static char *
6479 etags_strrchr (sp, c)
6480      register const char *sp;
6481      register int c;
6482 {
6483   register const char *r;
6484
6485   r = NULL;
6486   do
6487     {
6488       if (*sp == c)
6489         r = sp;
6490   } while (*sp++);
6491   return (char *)r;
6492 }
6493
6494 /*
6495  * Return the ptr in sp at which the character c first
6496  * appears; NULL if not found
6497  *
6498  * Identical to POSIX strchr, included for portability.
6499  */
6500 static char *
6501 etags_strchr (sp, c)
6502      register const char *sp;
6503      register int c;
6504 {
6505   do
6506     {
6507       if (*sp == c)
6508         return (char *)sp;
6509     } while (*sp++);
6510   return NULL;
6511 }
6512
6513 /*
6514  * Compare two strings, ignoring case for alphabetic characters.
6515  *
6516  * Same as BSD's strcasecmp, included for portability.
6517  */
6518 static int
6519 etags_strcasecmp (s1, s2)
6520      register const char *s1;
6521      register const char *s2;
6522 {
6523   while (*s1 != '\0'
6524          && (ISALPHA (*s1) && ISALPHA (*s2)
6525              ? lowcase (*s1) == lowcase (*s2)
6526              : *s1 == *s2))
6527     s1++, s2++;
6528
6529   return (ISALPHA (*s1) && ISALPHA (*s2)
6530           ? lowcase (*s1) - lowcase (*s2)
6531           : *s1 - *s2);
6532 }
6533
6534 /*
6535  * Compare two strings, ignoring case for alphabetic characters.
6536  * Stop after a given number of characters
6537  *
6538  * Same as BSD's strncasecmp, included for portability.
6539  */
6540 static int
6541 etags_strncasecmp (s1, s2, n)
6542      register const char *s1;
6543      register const char *s2;
6544      register int n;
6545 {
6546   while (*s1 != '\0' && n-- > 0
6547          && (ISALPHA (*s1) && ISALPHA (*s2)
6548              ? lowcase (*s1) == lowcase (*s2)
6549              : *s1 == *s2))
6550     s1++, s2++;
6551
6552   if (n < 0)
6553     return 0;
6554   else
6555     return (ISALPHA (*s1) && ISALPHA (*s2)
6556             ? lowcase (*s1) - lowcase (*s2)
6557             : *s1 - *s2);
6558 }
6559
6560 /* Skip spaces (end of string is not space), return new pointer. */
6561 static char *
6562 skip_spaces (cp)
6563      char *cp;
6564 {
6565   while (iswhite (*cp))
6566     cp++;
6567   return cp;
6568 }
6569
6570 /* Skip non spaces, except end of string, return new pointer. */
6571 static char *
6572 skip_non_spaces (cp)
6573      char *cp;
6574 {
6575   while (*cp != '\0' && !iswhite (*cp))
6576     cp++;
6577   return cp;
6578 }
6579
6580 /* Print error message and exit.  */
6581 void
6582 fatal (s1, s2)
6583      char *s1, *s2;
6584 {
6585   error (s1, s2);
6586   exit (EXIT_FAILURE);
6587 }
6588
6589 static void
6590 pfatal (s1)
6591      char *s1;
6592 {
6593   perror (s1);
6594   exit (EXIT_FAILURE);
6595 }
6596
6597 static void
6598 suggest_asking_for_help ()
6599 {
6600   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
6601            progname, LONG_OPTIONS ? "--help" : "-h");
6602   exit (EXIT_FAILURE);
6603 }
6604
6605 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
6606 static void
6607 error (s1, s2)
6608      const char *s1, *s2;
6609 {
6610   fprintf (stderr, "%s: ", progname);
6611   fprintf (stderr, s1, s2);
6612   fprintf (stderr, "\n");
6613 }
6614
6615 /* Return a newly-allocated string whose contents
6616    concatenate those of s1, s2, s3.  */
6617 static char *
6618 concat (s1, s2, s3)
6619      char *s1, *s2, *s3;
6620 {
6621   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
6622   char *result = xnew (len1 + len2 + len3 + 1, char);
6623
6624   strcpy (result, s1);
6625   strcpy (result + len1, s2);
6626   strcpy (result + len1 + len2, s3);
6627   result[len1 + len2 + len3] = '\0';
6628
6629   return result;
6630 }
6631
6632 \f
6633 /* Does the same work as the system V getcwd, but does not need to
6634    guess the buffer size in advance. */
6635 static char *
6636 etags_getcwd ()
6637 {
6638 #ifdef HAVE_GETCWD
6639   int bufsize = 200;
6640   char *path = xnew (bufsize, char);
6641
6642   while (getcwd (path, bufsize) == NULL)
6643     {
6644       if (errno != ERANGE)
6645         pfatal ("getcwd");
6646       bufsize *= 2;
6647       free (path);
6648       path = xnew (bufsize, char);
6649     }
6650
6651   canonicalize_filename (path);
6652   return path;
6653
6654 #else /* not HAVE_GETCWD */
6655 #if MSDOS
6656
6657   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
6658
6659   getwd (path);
6660
6661   for (p = path; *p != '\0'; p++)
6662     if (*p == '\\')
6663       *p = '/';
6664     else
6665       *p = lowcase (*p);
6666
6667   return strdup (path);
6668 #else /* not MSDOS */
6669   linebuffer path;
6670   FILE *pipe;
6671
6672   linebuffer_init (&path);
6673   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
6674   if (pipe == NULL || readline_internal (&path, pipe) == 0)
6675     pfatal ("pwd");
6676   pclose (pipe);
6677
6678   return path.buffer;
6679 #endif /* not MSDOS */
6680 #endif /* not HAVE_GETCWD */
6681 }
6682
6683 /* Return a newly allocated string containing the file name of FILE
6684    relative to the absolute directory DIR (which should end with a slash). */
6685 static char *
6686 relative_filename (file, dir)
6687      char *file, *dir;
6688 {
6689   char *fp, *dp, *afn, *res;
6690   int i;
6691
6692   /* Find the common root of file and dir (with a trailing slash). */
6693   afn = absolute_filename (file, cwd);
6694   fp = afn;
6695   dp = dir;
6696   while (*fp++ == *dp++)
6697     continue;
6698   fp--, dp--;                   /* back to the first differing char */
6699 #ifdef DOS_NT
6700   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
6701     return afn;
6702 #endif
6703   do                            /* look at the equal chars until '/' */
6704     fp--, dp--;
6705   while (*fp != '/');
6706
6707   /* Build a sequence of "../" strings for the resulting relative file name. */
6708   i = 0;
6709   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
6710     i += 1;
6711   res = xnew (3*i + strlen (fp + 1) + 1, char);
6712   res[0] = '\0';
6713   while (i-- > 0)
6714     strcat (res, "../");
6715
6716   /* Add the file name relative to the common root of file and dir. */
6717   strcat (res, fp + 1);
6718   free (afn);
6719
6720   return res;
6721 }
6722
6723 /* Return a newly allocated string containing the absolute file name
6724    of FILE given DIR (which should end with a slash). */
6725 static char *
6726 absolute_filename (file, dir)
6727      char *file, *dir;
6728 {
6729   char *slashp, *cp, *res;
6730
6731   if (filename_is_absolute (file))
6732     res = savestr (file);
6733 #ifdef DOS_NT
6734   /* We don't support non-absolute file names with a drive
6735      letter, like `d:NAME' (it's too much hassle).  */
6736   else if (file[1] == ':')
6737     fatal ("%s: relative file names with drive letters not supported", file);
6738 #endif
6739   else
6740     res = concat (dir, file, "");
6741
6742   /* Delete the "/dirname/.." and "/." substrings. */
6743   slashp = etags_strchr (res, '/');
6744   while (slashp != NULL && slashp[0] != '\0')
6745     {
6746       if (slashp[1] == '.')
6747         {
6748           if (slashp[2] == '.'
6749               && (slashp[3] == '/' || slashp[3] == '\0'))
6750             {
6751               cp = slashp;
6752               do
6753                 cp--;
6754               while (cp >= res && !filename_is_absolute (cp));
6755               if (cp < res)
6756                 cp = slashp;    /* the absolute name begins with "/.." */
6757 #ifdef DOS_NT
6758               /* Under MSDOS and NT we get `d:/NAME' as absolute
6759                  file name, so the luser could say `d:/../NAME'.
6760                  We silently treat this as `d:/NAME'.  */
6761               else if (cp[0] != '/')
6762                 cp = slashp;
6763 #endif
6764               strcpy (cp, slashp + 3);
6765               slashp = cp;
6766               continue;
6767             }
6768           else if (slashp[2] == '/' || slashp[2] == '\0')
6769             {
6770               strcpy (slashp, slashp + 2);
6771               continue;
6772             }
6773         }
6774
6775       slashp = etags_strchr (slashp + 1, '/');
6776     }
6777
6778   if (res[0] == '\0')
6779     return savestr ("/");
6780   else
6781     return res;
6782 }
6783
6784 /* Return a newly allocated string containing the absolute
6785    file name of dir where FILE resides given DIR (which should
6786    end with a slash). */
6787 static char *
6788 absolute_dirname (file, dir)
6789      char *file, *dir;
6790 {
6791   char *slashp, *res;
6792   char save;
6793
6794   canonicalize_filename (file);
6795   slashp = etags_strrchr (file, '/');
6796   if (slashp == NULL)
6797     return savestr (dir);
6798   save = slashp[1];
6799   slashp[1] = '\0';
6800   res = absolute_filename (file, dir);
6801   slashp[1] = save;
6802
6803   return res;
6804 }
6805
6806 /* Whether the argument string is an absolute file name.  The argument
6807    string must have been canonicalized with canonicalize_filename. */
6808 static bool
6809 filename_is_absolute (fn)
6810      char *fn;
6811 {
6812   return (fn[0] == '/'
6813 #ifdef DOS_NT
6814           || (ISALPHA(fn[0]) && fn[1] == ':' && fn[2] == '/')
6815 #endif
6816           );
6817 }
6818
6819 /* Translate backslashes into slashes.  Works in place. */
6820 static void
6821 canonicalize_filename (fn)
6822      register char *fn;
6823 {
6824 #ifdef DOS_NT
6825   /* Canonicalize drive letter case.  */
6826   if (fn[0] != '\0' && fn[1] == ':' && ISLOWER (fn[0]))
6827     fn[0] = upcase (fn[0]);
6828   /* Convert backslashes to slashes.  */
6829   for (; *fn != '\0'; fn++)
6830     if (*fn == '\\')
6831       *fn = '/';
6832 #else
6833   /* No action. */
6834   fn = NULL;                    /* shut up the compiler */
6835 #endif
6836 }
6837
6838 \f
6839 /* Initialize a linebuffer for use */
6840 static void
6841 linebuffer_init (lbp)
6842      linebuffer *lbp;
6843 {
6844   lbp->size = (DEBUG) ? 3 : 200;
6845   lbp->buffer = xnew (lbp->size, char);
6846   lbp->buffer[0] = '\0';
6847   lbp->len = 0;
6848 }
6849
6850 /* Set the minimum size of a string contained in a linebuffer. */
6851 static void
6852 linebuffer_setlen (lbp, toksize)
6853      linebuffer *lbp;
6854      int toksize;
6855 {
6856   while (lbp->size <= toksize)
6857     {
6858       lbp->size *= 2;
6859       xrnew (lbp->buffer, lbp->size, char);
6860     }
6861   lbp->len = toksize;
6862 }
6863
6864 /* Like malloc but get fatal error if memory is exhausted. */
6865 static PTR
6866 xmalloc (size)
6867      unsigned int size;
6868 {
6869   PTR result = (PTR) malloc (size);
6870   if (result == NULL)
6871     fatal ("virtual memory exhausted", (char *)NULL);
6872   return result;
6873 }
6874
6875 static PTR
6876 xrealloc (ptr, size)
6877      char *ptr;
6878      unsigned int size;
6879 {
6880   PTR result = (PTR) realloc (ptr, size);
6881   if (result == NULL)
6882     fatal ("virtual memory exhausted", (char *)NULL);
6883   return result;
6884 }
6885
6886 /*
6887  * Local Variables:
6888  * c-indentation-style: gnu
6889  * indent-tabs-mode: t
6890  * tab-width: 8
6891  * fill-column: 79
6892  * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
6893  * End:
6894  */
6895
6896 /* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
6897    (do not change this comment) */
6898
6899 /* etags.c ends here */