lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs
   2    Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99
   3    Free Software Foundation, Inc. and Ken Arnold
   4
   5 This file is not considered part of GNU Emacs.
   6
   7 This program is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 This program is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with this program; if not, write to the Free Software Foundation,
  19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  20
  21 /*
  22  * Authors:
  23  *      Ctags originally by Ken Arnold.
  24  *      Fortran added by Jim Kleckner.
  25  *      Ed Pelegri-Llopart added C typedefs.
  26  *      Gnu Emacs TAGS format and modifications by RMS?
  27  *      Sam Kendall added C++.
  28  *      Francesco Potorti` reorganised C and C++ based on work by Joe Wells.
  29  *      Regexp tags by Tom Tromey.
  30  *
  31  *      Francesco Potorti` (F.Potorti@cnuce.cnr.it) is the current maintainer.
  32  */
  33
  34 char pot_etags_version[] = "@(#) pot revision number is 13.31";
  35
  36 #define TRUE    1
  37 #define FALSE   0
  38
  39 #define _GNU_SOURCE             /* enables some compiler checks on GNU */
  40 #ifndef DEBUG
  41 # define DEBUG FALSE
  42 #endif
  43
  44 #ifdef HAVE_CONFIG_H
  45 # include <config.h>
  46   /* On some systems, Emacs defines static as nothing for the sake
  47      of unexec.  We don't want that here since we don't use unexec. */
  48 # undef static
  49 # define ETAGS_REGEXPS          /* use the regexp features */
  50 # define LONG_OPTIONS           /* accept long options */
  51 #endif /* HAVE_CONFIG_H */
  52
  53 #ifdef MSDOS
  54 # include <fcntl.h>
  55 # include <sys/param.h>
  56 # include <io.h>
  57 # ifndef HAVE_CONFIG_H
  58 #   define DOS_NT
  59 #   include <sys/config.h>
  60 # endif
  61 #endif /* MSDOS */
  62
  63 #ifdef WINDOWSNT
  64 # include <stdlib.h>
  65 # include <fcntl.h>
  66 # include <string.h>
  67 # include <io.h>
  68 # define MAXPATHLEN _MAX_PATH
  69 # ifdef HAVE_CONFIG_H
  70 #   undef HAVE_NTGUI
  71 # else
  72 #   define DOS_NT
  73 # endif /* not HAVE_CONFIG_H */
  74 # ifndef HAVE_GETCWD
  75 #   define HAVE_GETCWD
  76 # endif /* undef HAVE_GETCWD */
  77 #endif /* WINDOWSNT */
  78
  79 #if !defined (WINDOWSNT) && defined (STDC_HEADERS)
  80 #include <stdlib.h>
  81 #include <string.h>
  82 #endif
  83
  84 #ifdef HAVE_UNISTD_H
  85 # include <unistd.h>
  86 #else
  87 # ifdef HAVE_GETCWD
  88     extern char *getcwd ();
  89 # endif
  90 #endif /* HAVE_UNISTD_H */
  91
  92 #include <stdio.h>
  93 #include <ctype.h>
  94 #include <errno.h>
  95 #ifndef errno
  96   extern int errno;
  97 #endif
  98 #include <sys/types.h>
  99 #include <sys/stat.h>
 100
 101 #if !defined (S_ISREG) && defined (S_IFREG)
 102 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 103 #endif
 104
 105 #ifdef LONG_OPTIONS
 106 # include <getopt.h>
 107 #else
 108 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 109   extern char *optarg;
 110   extern int optind, opterr;
 111 #endif /* LONG_OPTIONS */
 112
 113 #ifdef ETAGS_REGEXPS
 114 # include <regex.h>
 115 #endif /* ETAGS_REGEXPS */
 116
 117 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 118  Leave it undefined to make the program "etags", which makes emacs-style
 119  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 120 #ifdef CTAGS
 121 # undef  CTAGS
 122 # define CTAGS TRUE
 123 #else
 124 # define CTAGS FALSE
 125 #endif
 126
 127 /* Exit codes for success and failure.  */
 128 #ifdef VMS
 129 # define        GOOD    1
 130 # define        BAD     0
 131 #else
 132 # define        GOOD    0
 133 # define        BAD     1
 134 #endif
 135
 136 /* C extensions. */
 137 #define C_PLPL  0x00001         /* C++ */
 138 #define C_STAR  0x00003         /* C* */
 139 #define C_JAVA  0x00005         /* JAVA */
 140 #define YACC    0x10000         /* yacc file */
 141
 142 #define streq(s,t)      ((DEBUG && (s) == NULL && (t) == NULL   \
 143                           && (abort (), 1)) || !strcmp (s, t))
 144 #define strneq(s,t,n)   ((DEBUG && (s) == NULL && (t) == NULL   \
 145                           && (abort (), 1)) || !strncmp (s, t, n))
 146
 147 #define lowcase(c)      tolower ((char)c)
 148
 149 #define CHARS 256               /* 2^sizeof(char) */
 150 #define CHAR(x)         ((unsigned int)x & (CHARS - 1))
 151 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white */
 152 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name */
 153 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token */
 154 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token */
 155 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens */
 156
 157
 158 /*
 159  *      xnew, xrnew -- allocate, reallocate storage
 160  *
 161  * SYNOPSIS:    Type *xnew (int n, Type);
 162  *              Type *xrnew (OldPointer, int n, Type);
 163  */
 164 #ifdef chkmalloc
 165 # include "chkmalloc.h"
 166 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 167                                                   (n) * sizeof (Type)))
 168 # define xrnew(op,n,Type) ((Type *) trace_realloc (__FILE__, __LINE__, \
 169                                                    (op), (n) * sizeof (Type)))
 170 #else
 171 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 172 # define xrnew(op,n,Type) ((Type *) xrealloc ((op), (n) * sizeof (Type)))
 173 #endif
 174
 175 typedef int bool;
 176
 177 typedef void Lang_function ();
 178
 179 typedef struct
 180 {
 181   char *suffix;
 182   char *command;                /* Takes one arg and decompresses to stdout */
 183 } compressor;
 184
 185 typedef struct
 186 {
 187   char *name;
 188   Lang_function *function;
 189   char **suffixes;
 190   char **interpreters;
 191 } language;
 192
 193 extern char *getenv ();
 194
 195 /* Many compilers barf on this:
 196         Lang_function Ada_funcs;
 197    so let's write it this way */
 198 void Ada_funcs ();
 199 void Asm_labels ();
 200 void C_entries ();
 201 void default_C_entries ();
 202 void plain_C_entries ();
 203 void Cjava_entries ();
 204 void Cobol_paragraphs ();
 205 void Cplusplus_entries ();
 206 void Cstar_entries ();
 207 void Erlang_functions ();
 208 void Fortran_functions ();
 209 void Yacc_entries ();
 210 void Lisp_functions ();
 211 void Pascal_functions ();
 212 void Perl_functions ();
 213 void Postscript_functions ();
 214 void Prolog_functions ();
 215 void Python_functions ();
 216 void Scheme_functions ();
 217 void TeX_functions ();
 218 void just_read_file ();
 219
 220 compressor *get_compressor_from_suffix ();
 221 language *get_language_from_name ();
 222 language *get_language_from_interpreter ();
 223 language *get_language_from_suffix ();
 224 int total_size_of_entries ();
 225 long readline (), readline_internal ();
 226 void get_tag ();
 227
 228 #ifdef ETAGS_REGEXPS
 229 void analyse_regex ();
 230 void add_regex ();
 231 void free_patterns ();
 232 #endif /* ETAGS_REGEXPS */
 233 void error ();
 234 void suggest_asking_for_help ();
 235 void fatal (), pfatal ();
 236 void add_node ();
 237
 238 void init ();
 239 void initbuffer ();
 240 void find_entries ();
 241 void free_tree ();
 242 void pfnote (), new_pfnote ();
 243 void process_file ();
 244 void put_entries ();
 245 void takeprec ();
 246
 247 char *concat ();
 248 char *skip_spaces (), *skip_non_spaces ();
 249 char *savenstr (), *savestr ();
 250 char *etags_strchr (), *etags_strrchr ();
 251 char *etags_getcwd ();
 252 char *relative_filename (), *absolute_filename (), *absolute_dirname ();
 253 bool filename_is_absolute ();
 254 void canonicalize_filename ();
 255 void grow_linebuffer ();
 256 long *xmalloc (), *xrealloc ();
 257
 258 \f
 259 char searchar = '/';            /* use /.../ searches */
 260
 261 char *tagfile;                  /* output file */
 262 char *progname;                 /* name this program was invoked with */
 263 char *cwd;                      /* current working directory */
 264 char *tagfiledir;               /* directory of tagfile */
 265 FILE *tagf;                     /* ioptr for tags file */
 266
 267 char *curfile;                  /* current input file name */
 268 language *curlang;              /* current language */
 269
 270 int lineno;                     /* line number of current line */
 271 long charno;                    /* current character number */
 272 long linecharno;                /* charno of start of current line */
 273 char *dbp;                      /* pointer to start of current tag */
 274
 275 typedef struct node_st
 276 {                               /* sorting structure            */
 277   char *name;                   /* function or type name        */
 278   char *file;                   /* file name                    */
 279   bool is_func;                 /* use pattern or line no       */
 280   bool been_warned;             /* set if noticed dup           */
 281   int lno;                      /* line number tag is on        */
 282   long cno;                     /* character number line starts on */
 283   char *pat;                    /* search pattern               */
 284   struct node_st *left, *right; /* left and right sons          */
 285 } node;
 286
 287 node *head;                     /* the head of the binary tree of tags */
 288
 289 /*
 290  * A `linebuffer' is a structure which holds a line of text.
 291  * `readline_internal' reads a line from a stream into a linebuffer
 292  * and works regardless of the length of the line.
 293  * SIZE is the size of BUFFER, LEN is the length of the string in
 294  * BUFFER after readline reads it.
 295  */
 296 typedef struct
 297 {
 298   long size;
 299   int len;
 300   char *buffer;
 301 } linebuffer;
 302
 303 linebuffer lb;                  /* the current line */
 304 linebuffer token_name;          /* used by C_entries as a temporary area */
 305 struct
 306 {
 307   long linepos;
 308   linebuffer lb;                /* used by C_entries instead of lb */
 309 } lbs[2];
 310
 311 /* boolean "functions" (see init)       */
 312 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 313 char
 314   /* white chars */
 315   *white = " \f\t\n\r",
 316   /* not in a name */
 317   *nonam = " \f\t\n\r(=,[;",
 318   /* token ending chars */
 319   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 320   /* token starting chars */
 321   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 322   /* valid in-token chars */
 323   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 324
 325 bool append_to_tagfile;         /* -a: append to tags */
 326 /* The following four default to TRUE for etags, but to FALSE for ctags.  */
 327 bool typedefs;                  /* -t: create tags for C and Ada typedefs */
 328 bool typedefs_and_cplusplus;    /* -T: create tags for C typedefs, level */
 329                                 /* 0 struct/enum/union decls, and C++ */
 330                                 /* member functions. */
 331 bool constantypedefs;           /* -d: create tags for C #define, enum */
 332                                 /* constants and variables. */
 333                                 /* -D: opposite of -d.  Default under ctags. */
 334 bool declarations;              /* --declarations: tag them and extern in C&Co*/
 335 bool globals;                   /* create tags for global variables */
 336 bool members;                   /* create tags for C member variables */
 337 bool update;                    /* -u: update tags */
 338 bool vgrind_style;              /* -v: create vgrind style index output */
 339 bool no_warnings;               /* -w: suppress warnings */
 340 bool cxref_style;               /* -x: create cxref style output */
 341 bool cplusplus;                 /* .[hc] means C++, not C */
 342 bool noindentypedefs;           /* -I: ignore indentation in C */
 343 bool packages_only;             /* --packages-only: in Ada, only tag packages*/
 344
 345 #ifdef LONG_OPTIONS
 346 struct option longopts[] =
 347 {
 348   { "packages-only",      no_argument,       &packages_only, TRUE  },
 349   { "append",             no_argument,       NULL,           'a'   },
 350   { "backward-search",    no_argument,       NULL,           'B'   },
 351   { "c++",                no_argument,       NULL,           'C'   },
 352   { "cxref",              no_argument,       NULL,           'x'   },
 353   { "defines",            no_argument,       NULL,           'd'   },
 354   { "declarations",       no_argument,       &declarations,  TRUE  },
 355   { "no-defines",         no_argument,       NULL,           'D'   },
 356   { "globals",            no_argument,       &globals,       TRUE  },
 357   { "no-globals",         no_argument,       &globals,       FALSE },
 358   { "help",               no_argument,       NULL,           'h'   },
 359   { "help",               no_argument,       NULL,           'H'   },
 360   { "ignore-indentation", no_argument,       NULL,           'I'   },
 361   { "include",            required_argument, NULL,           'i'   },
 362   { "language",           required_argument, NULL,           'l'   },
 363   { "members",            no_argument,       &members,       TRUE  },
 364   { "no-members",         no_argument,       &members,       FALSE },
 365   { "no-warn",            no_argument,       NULL,           'w'   },
 366   { "output",             required_argument, NULL,           'o'   },
 367 #ifdef ETAGS_REGEXPS
 368   { "regex",              required_argument, NULL,           'r'   },
 369   { "no-regex",           no_argument,       NULL,           'R'   },
 370   { "ignore-case-regex",  required_argument, NULL,           'c'   },
 371 #endif /* ETAGS_REGEXPS */
 372   { "typedefs",           no_argument,       NULL,           't'   },
 373   { "typedefs-and-c++",   no_argument,       NULL,           'T'   },
 374   { "update",             no_argument,       NULL,           'u'   },
 375   { "version",            no_argument,       NULL,           'V'   },
 376   { "vgrind",             no_argument,       NULL,           'v'   },
 377   { NULL }
 378 };
 379 #endif /* LONG_OPTIONS */
 380
 381 #ifdef ETAGS_REGEXPS
 382 /* Structure defining a regular expression.  Elements are
 383    the compiled pattern, and the name string. */
 384 typedef struct pattern
 385 {
 386   struct pattern *p_next;
 387   language *language;
 388   char *regex;
 389   struct re_pattern_buffer *pattern;
 390   struct re_registers regs;
 391   char *name_pattern;
 392   bool error_signaled;
 393 } pattern;
 394
 395 /* List of all regexps. */
 396 pattern *p_head = NULL;
 397
 398 /* How many characters in the character set.  (From regex.c.)  */
 399 #define CHAR_SET_SIZE 256
 400 /* Translation table for case-insensitive matching. */
 401 char lc_trans[CHAR_SET_SIZE];
 402 #endif /* ETAGS_REGEXPS */
 403
 404 compressor compressors[] =
 405 {
 406   { "z", "gzip -d -c"},
 407   { "Z", "gzip -d -c"},
 408   { "gz", "gzip -d -c"},
 409   { "GZ", "gzip -d -c"},
 410   { "bz2", "bzip2 -d -c" },
 411   { NULL }
 412 };
 413
 414 /*
 415  * Language stuff.
 416  */
 417
 418 /* Non-NULL if language fixed. */
 419 language *forced_lang = NULL;
 420
 421 /* Ada code */
 422 char *Ada_suffixes [] =
 423   { "ads", "adb", "ada", NULL };
 424
 425 /* Assembly code */
 426 char *Asm_suffixes [] = { "a",  /* Unix assembler */
 427                           "asm", /* Microcontroller assembly */
 428                           "def", /* BSO/Tasking definition includes  */
 429                           "inc", /* Microcontroller include files */
 430                           "ins", /* Microcontroller include files */
 431                           "s", "sa", /* Unix assembler */
 432                           "S",   /* cpp-processed Unix assembler */
 433                           "src", /* BSO/Tasking C compiler output */
 434                           NULL
 435                         };
 436
 437 /* Note that .c and .h can be considered C++, if the --c++ flag was
 438    given.  That is why default_C_entries is called here. */
 439 char *default_C_suffixes [] =
 440   { "c", "h", NULL };
 441
 442 char *Cplusplus_suffixes [] =
 443   { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx",
 444     "M",                        /* Objective C++ */
 445     "pdb",                      /* Postscript with C syntax */
 446     NULL };
 447
 448 char *Cjava_suffixes [] =
 449   { "java", NULL };
 450
 451 char *Cobol_suffixes [] =
 452   { "COB", "cob", NULL };
 453
 454 char *Cstar_suffixes [] =
 455   { "cs", "hs", NULL };
 456
 457 char *Erlang_suffixes [] =
 458   { "erl", "hrl", NULL };
 459
 460 char *Fortran_suffixes [] =
 461   { "F", "f", "f90", "for", NULL };
 462
 463 char *Lisp_suffixes [] =
 464   { "cl", "clisp", "el", "l", "lisp", "lsp", "ml", NULL };
 465
 466 char *Pascal_suffixes [] =
 467   { "p", "pas", NULL };
 468
 469 char *Perl_suffixes [] =
 470   { "pl", "pm", NULL };
 471 char *Perl_interpreters [] =
 472   { "perl", "@PERL@", NULL };
 473
 474 char *plain_C_suffixes [] =
 475   { "pc",                       /* Pro*C file */
 476     "m",                        /* Objective C file */
 477     "lm",                       /* Objective lex file */
 478      NULL };
 479
 480 char *Postscript_suffixes [] =
 481   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 482
 483 char *Prolog_suffixes [] =
 484   { "prolog", NULL };
 485
 486 char *Python_suffixes [] =
 487   { "py", NULL };
 488
 489 /* Can't do the `SCM' or `scm' prefix with a version number. */
 490 char *Scheme_suffixes [] =
 491   { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "ss", "t", NULL };
 492
 493 char *TeX_suffixes [] =
 494   { "TeX", "bib", "clo", "cls", "ltx", "sty", "tex", NULL };
 495
 496 char *Yacc_suffixes [] =
 497   { "y", "ym", "yy", "yxx", "y++", NULL }; /* .ym is Objective yacc file */
 498
 499 /*
 500  * Table of languages.
 501  *
 502  * It is ok for a given function to be listed under more than one
 503  * name.  I just didn't.
 504  */
 505
 506 language lang_names [] =
 507 {
 508   { "ada",     Ada_funcs,           Ada_suffixes,         NULL              },
 509   { "asm",     Asm_labels,          Asm_suffixes,         NULL              },
 510   { "c",       default_C_entries,   default_C_suffixes,   NULL              },
 511   { "c++",     Cplusplus_entries,   Cplusplus_suffixes,   NULL              },
 512   { "c*",      Cstar_entries,       Cstar_suffixes,       NULL              },
 513   { "cobol",   Cobol_paragraphs,    Cobol_suffixes,       NULL              },
 514   { "erlang",  Erlang_functions,    Erlang_suffixes,      NULL              },
 515   { "fortran", Fortran_functions,   Fortran_suffixes,     NULL              },
 516   { "java",    Cjava_entries,       Cjava_suffixes,       NULL              },
 517   { "lisp",    Lisp_functions,      Lisp_suffixes,        NULL              },
 518   { "pascal",  Pascal_functions,    Pascal_suffixes,      NULL              },
 519   { "perl",    Perl_functions,      Perl_suffixes,        Perl_interpreters },
 520   { "postscript", Postscript_functions, Postscript_suffixes, NULL           },
 521   { "proc",    plain_C_entries,     plain_C_suffixes,     NULL              },
 522   { "prolog",  Prolog_functions,    Prolog_suffixes,      NULL              },
 523   { "python",  Python_functions,    Python_suffixes,      NULL              },
 524   { "scheme",  Scheme_functions,    Scheme_suffixes,      NULL              },
 525   { "tex",     TeX_functions,       TeX_suffixes,         NULL              },
 526   { "yacc",    Yacc_entries,        Yacc_suffixes,        NULL              },
 527   { "auto", NULL },             /* default guessing scheme */
 528   { "none", just_read_file },   /* regexp matching only */
 529   { NULL, NULL }                /* end of list */
 530 };
 531 \f
 532 void
 533 print_language_names ()
 534 {
 535   language *lang;
 536   char **ext;
 537
 538   puts ("\nThese are the currently supported languages, along with the\n\
 539 default file name suffixes:");
 540   for (lang = lang_names; lang->name != NULL; lang++)
 541     {
 542       printf ("\t%s\t", lang->name);
 543       if (lang->suffixes != NULL)
 544         for (ext = lang->suffixes; *ext != NULL; ext++)
 545           printf (" .%s", *ext);
 546       puts ("");
 547     }
 548   puts ("Where `auto' means use default language for files based on file\n\
 549 name suffix, and `none' means only do regexp processing on files.\n\
 550 If no language is specified and no matching suffix is found,\n\
 551 the first line of the file is read for a sharp-bang (#!) sequence\n\
 552 followed by the name of an interpreter.  If no such sequence is found,\n\
 553 Fortran is tried first; if no tags are found, C is tried next.\n\
 554 Compressed files are supported using gzip and bzip2.");
 555 }
 556
 557 #ifndef VERSION
 558 # define VERSION "20"
 559 #endif
 560 void
 561 print_version ()
 562 {
 563   printf ("%s (GNU Emacs %s)\n", (CTAGS) ? "ctags" : "etags", VERSION);
 564   puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
 565   puts ("This program is distributed under the same terms as Emacs");
 566
 567   exit (GOOD);
 568 }
 569
 570 void
 571 print_help ()
 572 {
 573   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 574 \n\
 575 These are the options accepted by %s.\n", progname, progname);
 576 #ifdef LONG_OPTIONS
 577   puts ("You may use unambiguous abbreviations for the long option names.");
 578 #else
 579   puts ("Long option names do not work with this executable, as it is not\n\
 580 linked with GNU getopt.");
 581 #endif /* LONG_OPTIONS */
 582   puts ("A - as file name means read names from stdin (one per line).");
 583   if (!CTAGS)
 584     printf ("  Absolute names are stored in the output file as they are.\n\
 585 Relative ones are stored relative to the output file's directory.");
 586   puts ("\n");
 587
 588   puts ("-a, --append\n\
 589         Append tag entries to existing tags file.");
 590
 591   puts ("--packages-only\n\
 592         For Ada files, only generate tags for packages .");
 593
 594   if (CTAGS)
 595     puts ("-B, --backward-search\n\
 596         Write the search commands for the tag entries using '?', the\n\
 597         backward-search command instead of '/', the forward-search command.");
 598
 599   puts ("-C, --c++\n\
 600         Treat files whose name suffix defaults to C language as C++ files.");
 601
 602   puts ("--declarations\n\
 603         In C and derived languages, create tags for function declarations,");
 604   if (CTAGS)
 605     puts ("\tand create tags for extern variables if --globals is used.");
 606   else
 607     puts
 608       ("\tand create tags for extern variables unless --no-globals is used.");
 609
 610   if (CTAGS)
 611     puts ("-d, --defines\n\
 612         Create tag entries for C #define constants and enum constants, too.");
 613   else
 614     puts ("-D, --no-defines\n\
 615         Don't create tag entries for C #define constants and enum constants.\n\
 616         This makes the tags file smaller.");
 617
 618   if (!CTAGS)
 619     {
 620       puts ("-i FILE, --include=FILE\n\
 621         Include a note in tag file indicating that, when searching for\n\
 622         a tag, one should also consult the tags file FILE after\n\
 623         checking the current file.");
 624       puts ("-l LANG, --language=LANG\n\
 625         Force the following files to be considered as written in the\n\
 626         named language up to the next --language=LANG option.");
 627     }
 628
 629   if (CTAGS)
 630     puts ("--globals\n\
 631         Create tag entries for global variables in some languages.");
 632   else
 633     puts ("--no-globals\n\
 634         Do not create tag entries for global variables in some\n\
 635         languages.  This makes the tags file smaller.");
 636   puts ("--members\n\
 637         Create tag entries for member variables in C and derived languages.");
 638
 639 #ifdef ETAGS_REGEXPS
 640   puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
 641         Make a tag for each line matching pattern REGEXP in the following\n\
 642         files.  {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
 643         regexfile is a file containing one REGEXP per line.\n\
 644         REGEXP is anchored (as if preceded by ^).\n\
 645         The form /REGEXP/NAME/ creates a named tag.\n\
 646         For example Tcl named tags can be created with:\n\
 647         --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
 648   puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
 649         Like -r, --regex but ignore case when matching expressions.");
 650   puts ("-R, --no-regex\n\
 651         Don't create tags from regexps for the following files.");
 652 #endif /* ETAGS_REGEXPS */
 653   puts ("-o FILE, --output=FILE\n\
 654         Write the tags to FILE.");
 655   puts ("-I, --ignore-indentation\n\
 656         Don't rely on indentation quite as much as normal.  Currently,\n\
 657         this means not to assume that a closing brace in the first\n\
 658         column is the final brace of a function or structure\n\
 659         definition in C and C++.");
 660
 661   if (CTAGS)
 662     {
 663       puts ("-t, --typedefs\n\
 664         Generate tag entries for C and Ada typedefs.");
 665       puts ("-T, --typedefs-and-c++\n\
 666         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 667         and C++ member functions.");
 668       puts ("-u, --update\n\
 669         Update the tag entries for the given files, leaving tag\n\
 670         entries for other files in place.  Currently, this is\n\
 671         implemented by deleting the existing entries for the given\n\
 672         files and then rewriting the new entries at the end of the\n\
 673         tags file.  It is often faster to simply rebuild the entire\n\
 674         tag file than to use this.");
 675       puts ("-v, --vgrind\n\
 676         Generates an index of items intended for human consumption,\n\
 677         similar to the output of vgrind.  The index is sorted, and\n\
 678         gives the page number of each item.");
 679       puts ("-w, --no-warn\n\
 680         Suppress warning messages about entries defined in multiple\n\
 681         files.");
 682       puts ("-x, --cxref\n\
 683         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 684         The output uses line numbers instead of page numbers, but\n\
 685         beyond that the differences are cosmetic; try both to see\n\
 686         which you like.");
 687     }
 688
 689   puts ("-V, --version\n\
 690         Print the version of the program.\n\
 691 -h, --help\n\
 692         Print this help message.");
 693
 694   print_language_names ();
 695
 696   puts ("");
 697   puts ("Report bugs to bug-gnu-emacs@prep.ai.mit.edu");
 698
 699   exit (GOOD);
 700 }
 701
 702 \f
 703 enum argument_type
 704 {
 705   at_language,
 706   at_regexp,
 707   at_filename,
 708   at_icregexp
 709 };
 710
 711 /* This structure helps us allow mixing of --lang and file names. */
 712 typedef struct
 713 {
 714   enum argument_type arg_type;
 715   char *what;
 716   language *lang;               /* language of the regexp */
 717 } argument;
 718
 719 #ifdef VMS                      /* VMS specific functions */
 720
 721 #define EOS     '\0'
 722
 723 /* This is a BUG!  ANY arbitrary limit is a BUG!
 724    Won't someone please fix this?  */
 725 #define MAX_FILE_SPEC_LEN       255
 726 typedef struct  {
 727   short   curlen;
 728   char    body[MAX_FILE_SPEC_LEN + 1];
 729 } vspec;
 730
 731 /*
 732  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
 733  returning in each successive call the next file name matching the input
 734  spec. The function expects that each in_spec passed
 735  to it will be processed to completion; in particular, up to and
 736  including the call following that in which the last matching name
 737  is returned, the function ignores the value of in_spec, and will
 738  only start processing a new spec with the following call.
 739  If an error occurs, on return out_spec contains the value
 740  of in_spec when the error occurred.
 741
 742  With each successive file name returned in out_spec, the
 743  function's return value is one. When there are no more matching
 744  names the function returns zero. If on the first call no file
 745  matches in_spec, or there is any other error, -1 is returned.
 746 */
 747
 748 #include        <rmsdef.h>
 749 #include        <descrip.h>
 750 #define         OUTSIZE MAX_FILE_SPEC_LEN
 751 short
 752 fn_exp (out, in)
 753      vspec *out;
 754      char *in;
 755 {
 756   static long context = 0;
 757   static struct dsc$descriptor_s o;
 758   static struct dsc$descriptor_s i;
 759   static bool pass1 = TRUE;
 760   long status;
 761   short retval;
 762
 763   if (pass1)
 764     {
 765       pass1 = FALSE;
 766       o.dsc$a_pointer = (char *) out;
 767       o.dsc$w_length = (short)OUTSIZE;
 768       i.dsc$a_pointer = in;
 769       i.dsc$w_length = (short)strlen(in);
 770       i.dsc$b_dtype = DSC$K_DTYPE_T;
 771       i.dsc$b_class = DSC$K_CLASS_S;
 772       o.dsc$b_dtype = DSC$K_DTYPE_VT;
 773       o.dsc$b_class = DSC$K_CLASS_VS;
 774     }
 775   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
 776     {
 777       out->body[out->curlen] = EOS;
 778       return 1;
 779     }
 780   else if (status == RMS$_NMF)
 781     retval = 0;
 782   else
 783     {
 784       strcpy(out->body, in);
 785       retval = -1;
 786     }
 787   lib$find_file_end(&context);
 788   pass1 = TRUE;
 789   return retval;
 790 }
 791
 792 /*
 793   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
 794   name of each file specified by the provided arg expanding wildcards.
 795 */
 796 char *
 797 gfnames (arg, p_error)
 798      char *arg;
 799      bool *p_error;
 800 {
 801   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
 802
 803   switch (fn_exp (&filename, arg))
 804     {
 805     case 1:
 806       *p_error = FALSE;
 807       return filename.body;
 808     case 0:
 809       *p_error = FALSE;
 810       return NULL;
 811     default:
 812       *p_error = TRUE;
 813       return filename.body;
 814     }
 815 }
 816
 817 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
 818 system (cmd)
 819      char *cmd;
 820 {
 821   error ("%s", "system() function not implemented under VMS");
 822 }
 823 #endif
 824
 825 #define VERSION_DELIM   ';'
 826 char *massage_name (s)
 827      char *s;
 828 {
 829   char *start = s;
 830
 831   for ( ; *s; s++)
 832     if (*s == VERSION_DELIM)
 833       {
 834         *s = EOS;
 835         break;
 836       }
 837     else
 838       *s = lowcase (*s);
 839   return start;
 840 }
 841 #endif /* VMS */
 842
 843 \f
 844 int
 845 main (argc, argv)
 846      int argc;
 847      char *argv[];
 848 {
 849   int i;
 850   unsigned int nincluded_files;
 851   char **included_files;
 852   char *this_file;
 853   argument *argbuffer;
 854   int current_arg, file_count;
 855   linebuffer filename_lb;
 856 #ifdef VMS
 857   bool got_err;
 858 #endif
 859
 860 #ifdef DOS_NT
 861   _fmode = O_BINARY;   /* all of files are treated as binary files */
 862 #endif /* DOS_NT */
 863
 864   progname = argv[0];
 865   nincluded_files = 0;
 866   included_files = xnew (argc, char *);
 867   current_arg = 0;
 868   file_count = 0;
 869
 870   /* Allocate enough no matter what happens.  Overkill, but each one
 871      is small. */
 872   argbuffer = xnew (argc, argument);
 873
 874 #ifdef ETAGS_REGEXPS
 875   /* Set syntax for regular expression routines. */
 876   re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
 877   /* Translation table for case-insensitive search. */
 878   for (i = 0; i < CHAR_SET_SIZE; i++)
 879     lc_trans[i] = lowcase (i);
 880 #endif /* ETAGS_REGEXPS */
 881
 882   /*
 883    * If etags, always find typedefs and structure tags.  Why not?
 884    * Also default is to find macro constants, enum constants and
 885    * global variables.
 886    */
 887   if (!CTAGS)
 888     {
 889       typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
 890       globals = TRUE;
 891       members = FALSE;
 892     }
 893
 894   while (1)
 895     {
 896       int opt;
 897       char *optstring;
 898
 899 #ifdef ETAGS_REGEXPS
 900       optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
 901 #else
 902       optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
 903 #endif /* ETAGS_REGEXPS */
 904
 905 #ifndef LONG_OPTIONS
 906       optstring = optstring + 1;
 907 #endif /* LONG_OPTIONS */
 908
 909       opt = getopt_long (argc, argv, optstring, longopts, 0);
 910       if (opt == EOF)
 911         break;
 912
 913       switch (opt)
 914         {
 915         case 0:
 916           /* If getopt returns 0, then it has already processed a
 917              long-named option.  We should do nothing.  */
 918           break;
 919
 920         case 1:
 921           /* This means that a file name has been seen.  Record it. */
 922           argbuffer[current_arg].arg_type = at_filename;
 923           argbuffer[current_arg].what = optarg;
 924           ++current_arg;
 925           ++file_count;
 926           break;
 927
 928           /* Common options. */
 929         case 'a': append_to_tagfile = TRUE;     break;
 930         case 'C': cplusplus = TRUE;             break;
 931         case 'd': constantypedefs = TRUE;       break;
 932         case 'D': constantypedefs = FALSE;      break;
 933         case 'f':               /* for compatibility with old makefiles */
 934         case 'o':
 935           if (tagfile)
 936             {
 937               error ("-%c option may only be given once.", opt);
 938               suggest_asking_for_help ();
 939             }
 940           tagfile = optarg;
 941           break;
 942         case 'I':
 943         case 'S':               /* for backward compatibility */
 944           noindentypedefs = TRUE;
 945           break;
 946         case 'l':
 947           {
 948             language *lang = get_language_from_name (optarg);
 949             if (lang != NULL)
 950               {
 951                 argbuffer[current_arg].lang = lang;
 952                 argbuffer[current_arg].arg_type = at_language;
 953                 ++current_arg;
 954               }
 955           }
 956           break;
 957 #ifdef ETAGS_REGEXPS
 958         case 'r':
 959           argbuffer[current_arg].arg_type = at_regexp;
 960           argbuffer[current_arg].what = optarg;
 961           ++current_arg;
 962           break;
 963         case 'R':
 964           argbuffer[current_arg].arg_type = at_regexp;
 965           argbuffer[current_arg].what = NULL;
 966           ++current_arg;
 967           break;
 968         case 'c':
 969           argbuffer[current_arg].arg_type = at_icregexp;
 970           argbuffer[current_arg].what = optarg;
 971           ++current_arg;
 972           break;
 973 #endif /* ETAGS_REGEXPS */
 974         case 'V':
 975           print_version ();
 976           break;
 977         case 'h':
 978         case 'H':
 979           print_help ();
 980           break;
 981         case 't':
 982           typedefs = TRUE;
 983           break;
 984         case 'T':
 985           typedefs = typedefs_and_cplusplus = TRUE;
 986           break;
 987 #if (!CTAGS)
 988           /* Etags options */
 989         case 'i':
 990           included_files[nincluded_files++] = optarg;
 991           break;
 992 #else /* CTAGS */
 993           /* Ctags options. */
 994         case 'B': searchar = '?';       break;
 995         case 'u': update = TRUE;        break;
 996         case 'v': vgrind_style = TRUE;  /*FALLTHRU*/
 997         case 'x': cxref_style = TRUE;   break;
 998         case 'w': no_warnings = TRUE;   break;
 999 #endif /* CTAGS */
1000         default:
1001           suggest_asking_for_help ();
1002         }
1003     }
1004
1005   for (; optind < argc; ++optind)
1006     {
1007       argbuffer[current_arg].arg_type = at_filename;
1008       argbuffer[current_arg].what = argv[optind];
1009       ++current_arg;
1010       ++file_count;
1011     }
1012
1013   if (nincluded_files == 0 && file_count == 0)
1014     {
1015       error ("no input files specified.", 0);
1016       suggest_asking_for_help ();
1017     }
1018
1019   if (tagfile == NULL)
1020     tagfile = CTAGS ? "tags" : "TAGS";
1021   cwd = etags_getcwd ();        /* the current working directory */
1022   if (cwd[strlen (cwd) - 1] != '/')
1023     {
1024       char *oldcwd = cwd;
1025       cwd = concat (oldcwd, "/", "");
1026       free (oldcwd);
1027     }
1028   if (streq (tagfile, "-"))
1029     tagfiledir = cwd;
1030   else
1031     tagfiledir = absolute_dirname (tagfile, cwd);
1032
1033   init ();                      /* set up boolean "functions" */
1034
1035   initbuffer (&lb);
1036   initbuffer (&token_name);
1037   initbuffer (&lbs[0].lb);
1038   initbuffer (&lbs[1].lb);
1039   initbuffer (&filename_lb);
1040
1041   if (!CTAGS)
1042     {
1043       if (streq (tagfile, "-"))
1044         {
1045           tagf = stdout;
1046 #ifdef DOS_NT
1047           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1048              doesn't take effect until after `stdout' is already open). */
1049           if (!isatty (fileno (stdout)))
1050             setmode (fileno (stdout), O_BINARY);
1051 #endif /* DOS_NT */
1052         }
1053       else
1054         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1055       if (tagf == NULL)
1056         pfatal (tagfile);
1057     }
1058
1059   /*
1060    * Loop through files finding functions.
1061    */
1062   for (i = 0; i < current_arg; ++i)
1063     {
1064       switch (argbuffer[i].arg_type)
1065         {
1066         case at_language:
1067           forced_lang = argbuffer[i].lang;
1068           break;
1069 #ifdef ETAGS_REGEXPS
1070         case at_regexp:
1071           analyse_regex (argbuffer[i].what, FALSE);
1072           break;
1073         case at_icregexp:
1074           analyse_regex (argbuffer[i].what, TRUE);
1075           break;
1076 #endif
1077         case at_filename:
1078 #ifdef VMS
1079           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1080             {
1081               if (got_err)
1082                 {
1083                   error ("can't find file %s\n", this_file);
1084                   argc--, argv++;
1085                 }
1086               else
1087                 {
1088                   this_file = massage_name (this_file);
1089                 }
1090 #else
1091               this_file = argbuffer[i].what;
1092 #endif
1093               /* Input file named "-" means read file names from stdin
1094                  (one per line) and use them. */
1095               if (streq (this_file, "-"))
1096                 while (readline_internal (&filename_lb, stdin) > 0)
1097                   process_file (filename_lb.buffer);
1098               else
1099                 process_file (this_file);
1100 #ifdef VMS
1101             }
1102 #endif
1103           break;
1104         }
1105     }
1106
1107 #ifdef ETAGS_REGEXPS
1108   free_patterns ();
1109 #endif /* ETAGS_REGEXPS */
1110
1111   if (!CTAGS)
1112     {
1113       while (nincluded_files-- > 0)
1114         fprintf (tagf, "\f\n%s,include\n", *included_files++);
1115
1116       fclose (tagf);
1117       exit (GOOD);
1118     }
1119
1120   /* If CTAGS, we are here.  process_file did not write the tags yet,
1121      because we want them ordered.  Let's do it now. */
1122   if (cxref_style)
1123     {
1124       put_entries (head);
1125       free_tree (head);
1126       head = NULL;
1127       exit (GOOD);
1128     }
1129
1130   if (update)
1131     {
1132       char cmd[BUFSIZ];
1133       for (i = 0; i < current_arg; ++i)
1134         {
1135           if (argbuffer[i].arg_type != at_filename)
1136             continue;
1137           sprintf (cmd,
1138                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1139                    tagfile, argbuffer[i].what, tagfile);
1140           if (system (cmd) != GOOD)
1141             fatal ("failed to execute shell command", (char *)NULL);
1142         }
1143       append_to_tagfile = TRUE;
1144     }
1145
1146   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1147   if (tagf == NULL)
1148     pfatal (tagfile);
1149   put_entries (head);
1150   free_tree (head);
1151   head = NULL;
1152   fclose (tagf);
1153
1154   if (update)
1155     {
1156       char cmd[BUFSIZ];
1157       sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1158       exit (system (cmd));
1159     }
1160   return GOOD;
1161 }
1162
1163
1164
1165 /*
1166  * Return a compressor given the file name.  If EXTPTR is non-zero,
1167  * return a pointer into FILE where the compressor-specific
1168  * extension begins.  If no compressor is found, NULL is returned
1169  * and EXTPTR is not significant.
1170  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca>
1171  */
1172 compressor *
1173 get_compressor_from_suffix (file, extptr)
1174      char *file;
1175      char **extptr;
1176 {
1177   compressor *compr;
1178   char *slash, *suffix;
1179
1180   /* This relies on FN to be after canonicalize_filename,
1181      so we don't need to consider backslashes on DOS_NT.  */
1182   slash = etags_strrchr (file, '/');
1183   suffix = etags_strrchr (file, '.');
1184   if (suffix == NULL || suffix < slash)
1185     return NULL;
1186   if (extptr != NULL)
1187     *extptr = suffix;
1188   suffix += 1;
1189   /* Let those poor souls who live with DOS 8+3 file name limits get
1190      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1191      Only the first do loop is run if not MSDOS */
1192   do
1193     {
1194       for (compr = compressors; compr->suffix != NULL; compr++)
1195         if (streq (compr->suffix, suffix))
1196           return compr;
1197 #ifndef MSDOS
1198       break;
1199 #endif
1200       if (extptr != NULL)
1201         *extptr = ++suffix;
1202     } while (*suffix != '\0');
1203   return NULL;
1204 }
1205
1206
1207
1208 /*
1209  * Return a language given the name.
1210  */
1211 language *
1212 get_language_from_name (name)
1213      char *name;
1214 {
1215   language *lang;
1216
1217   if (name == NULL)
1218     error ("empty language name", (char *)NULL);
1219   else
1220     {
1221       for (lang = lang_names; lang->name != NULL; lang++)
1222         if (streq (name, lang->name))
1223           return lang;
1224       error ("unknown language \"%s\"", name);
1225     }
1226
1227   return NULL;
1228 }
1229
1230
1231 /*
1232  * Return a language given the interpreter name.
1233  */
1234 language *
1235 get_language_from_interpreter (interpreter)
1236      char *interpreter;
1237 {
1238   language *lang;
1239   char **iname;
1240
1241   if (interpreter == NULL)
1242     return NULL;
1243   for (lang = lang_names; lang->name != NULL; lang++)
1244     if (lang->interpreters != NULL)
1245       for (iname = lang->interpreters; *iname != NULL; iname++)
1246         if (streq (*iname, interpreter))
1247             return lang;
1248
1249   return NULL;
1250 }
1251
1252
1253
1254 /*
1255  * Return a language given the file name.
1256  */
1257 language *
1258 get_language_from_suffix (file)
1259      char *file;
1260 {
1261   language *lang;
1262   char **ext, *suffix;
1263
1264   suffix = etags_strrchr (file, '.');
1265   if (suffix == NULL)
1266     return NULL;
1267   suffix += 1;
1268   for (lang = lang_names; lang->name != NULL; lang++)
1269     if (lang->suffixes != NULL)
1270       for (ext = lang->suffixes; *ext != NULL; ext++)
1271         if (streq (*ext, suffix))
1272           return lang;
1273   return NULL;
1274 }
1275
1276
1277
1278 /*
1279  * This routine is called on each file argument.
1280  */
1281 void
1282 process_file (file)
1283      char *file;
1284 {
1285   struct stat stat_buf;
1286   FILE *inf;
1287   compressor *compr;
1288   char *compressed_name, *uncompressed_name;
1289   char *ext, *real_name;
1290
1291
1292   canonicalize_filename (file);
1293   if (streq (file, tagfile) && !streq (tagfile, "-"))
1294     {
1295       error ("skipping inclusion of %s in self.", file);
1296       return;
1297     }
1298   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1299     {
1300       compressed_name = NULL;
1301       real_name = uncompressed_name = savestr (file);
1302     }
1303   else
1304     {
1305       real_name = compressed_name = savestr (file);
1306       uncompressed_name = savenstr (file, ext - file);
1307     }
1308
1309   /* If the canonicalised uncompressed name has already be dealt with,
1310      skip it silently, else add it to the list. */
1311   {
1312     typedef struct processed_file
1313     {
1314       char *filename;
1315       struct processed_file *next;
1316     } processed_file;
1317     static processed_file *pf_head = NULL;
1318     register processed_file *fnp;
1319
1320     for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1321       if (streq (uncompressed_name, fnp->filename))
1322         goto exit;
1323     fnp = pf_head;
1324     pf_head = xnew (1, struct processed_file);
1325     pf_head->filename = savestr (uncompressed_name);
1326     pf_head->next = fnp;
1327   }
1328
1329   if (stat (real_name, &stat_buf) != 0)
1330     {
1331       /* Reset real_name and try with a different name. */
1332       real_name = NULL;
1333       if (compressed_name != NULL) /* try with the given suffix */
1334         {
1335           if (stat (uncompressed_name, &stat_buf) == 0)
1336             real_name = uncompressed_name;
1337         }
1338       else                      /* try all possible suffixes */
1339         {
1340           for (compr = compressors; compr->suffix != NULL; compr++)
1341             {
1342               compressed_name = concat (file, ".", compr->suffix);
1343               if (stat (compressed_name, &stat_buf) != 0)
1344                 {
1345 #ifdef MSDOS
1346                   char *suf = compressed_name + strlen (file);
1347                   size_t suflen = strlen (compr->suffix) + 1;
1348                   for ( ; suf[1]; suf++, suflen--)
1349                     {
1350                       memmove (suf, suf + 1, suflen);
1351                       if (stat (compressed_name, &stat_buf) == 0)
1352                         {
1353                           real_name = compressed_name;
1354                           break;
1355                         }
1356                     }
1357                   if (real_name != NULL)
1358                     break;
1359 #endif
1360                   free (compressed_name);
1361                   compressed_name = NULL;
1362                 }
1363               else
1364                 {
1365                   real_name = compressed_name;
1366                   break;
1367                 }
1368             }
1369         }
1370       if (real_name == NULL)
1371         {
1372           perror (file);
1373           goto exit;
1374         }
1375     } /* try with a different name */
1376
1377   if (!S_ISREG (stat_buf.st_mode))
1378     {
1379       error ("skipping %s: it is not a regular file.", real_name);
1380       goto exit;
1381     }
1382   if (real_name == compressed_name)
1383     {
1384       char *cmd = concat (compr->command, " ", real_name);
1385       inf = popen (cmd, "r");
1386       free (cmd);
1387     }
1388   else
1389     inf = fopen (real_name, "r");
1390   if (inf == NULL)
1391     {
1392       perror (real_name);
1393       goto exit;
1394     }
1395
1396   find_entries (uncompressed_name, inf);
1397
1398   if (real_name == compressed_name)
1399     pclose (inf);
1400   else
1401     fclose (inf);
1402
1403   if (!CTAGS)
1404     {
1405       char *filename;
1406
1407       if (filename_is_absolute (uncompressed_name))
1408         {
1409           /* file is an absolute file name.  Canonicalise it. */
1410           filename = absolute_filename (uncompressed_name, cwd);
1411         }
1412       else
1413         {
1414           /* file is a file name relative to cwd.  Make it relative
1415              to the directory of the tags file. */
1416           filename = relative_filename (uncompressed_name, tagfiledir);
1417         }
1418       fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1419       free (filename);
1420       put_entries (head);
1421       free_tree (head);
1422       head = NULL;
1423     }
1424
1425  exit:
1426   if (compressed_name) free(compressed_name);
1427   if (uncompressed_name) free(uncompressed_name);
1428   return;
1429 }
1430
1431 /*
1432  * This routine sets up the boolean pseudo-functions which work
1433  * by setting boolean flags dependent upon the corresponding character.
1434  * Every char which is NOT in that string is not a white char.  Therefore,
1435  * all of the array "_wht" is set to FALSE, and then the elements
1436  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1437  * of a char is TRUE if it is the string "white", else FALSE.
1438  */
1439 void
1440 init ()
1441 {
1442   register char *sp;
1443   register int i;
1444
1445   for (i = 0; i < CHARS; i++)
1446     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1447   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1448   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1449   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1450   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1451   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1452   iswhite('\0') = iswhite('\n');
1453   notinname('\0') = notinname('\n');
1454   begtoken('\0') = begtoken('\n');
1455   intoken('\0') = intoken('\n');
1456   endtoken('\0') = endtoken('\n');
1457 }
1458
1459 /*
1460  * This routine opens the specified file and calls the function
1461  * which finds the function and type definitions.
1462  */
1463 node *last_node = NULL;
1464
1465 void
1466 find_entries (file, inf)
1467      char *file;
1468      FILE *inf;
1469 {
1470   char *cp;
1471   language *lang;
1472   node *old_last_node;
1473
1474   /* Memory leakage here: the string pointed by curfile is
1475      never released, because curfile is copied into np->file
1476      for each node, to be used in CTAGS mode.  The amount of
1477      memory leaked here is the sum of the lengths of the
1478      file names. */
1479   curfile = savestr (file);
1480
1481   /* If user specified a language, use it. */
1482   lang = forced_lang;
1483   if (lang != NULL && lang->function != NULL)
1484     {
1485       curlang = lang;
1486       lang->function (inf);
1487       return;
1488     }
1489
1490   /* Try to guess the language given the file name. */
1491   lang = get_language_from_suffix (file);
1492   if (lang != NULL && lang->function != NULL)
1493     {
1494       curlang = lang;
1495       lang->function (inf);
1496       return;
1497     }
1498
1499   /* Look for sharp-bang as the first two characters. */
1500   if (readline_internal (&lb, inf) > 0
1501       && lb.len >= 2
1502       && lb.buffer[0] == '#'
1503       && lb.buffer[1] == '!')
1504     {
1505       char *lp;
1506
1507       /* Set lp to point at the first char after the last slash in the
1508          line or, if no slashes, at the first nonblank.  Then set cp to
1509          the first successive blank and terminate the string. */
1510       lp = etags_strrchr (lb.buffer+2, '/');
1511       if (lp != NULL)
1512         lp += 1;
1513       else
1514         lp = skip_spaces (lb.buffer + 2);
1515       cp = skip_non_spaces (lp);
1516       *cp = '\0';
1517
1518       if (strlen (lp) > 0)
1519         {
1520           lang = get_language_from_interpreter (lp);
1521           if (lang != NULL && lang->function != NULL)
1522             {
1523               curlang = lang;
1524               lang->function (inf);
1525               return;
1526             }
1527         }
1528     }
1529   /* We rewind here, even if inf may be a pipe.  We fail if the
1530      length of the first line is longer than the pipe block size,
1531      which is unlikely. */
1532   rewind (inf);
1533
1534   /* Try Fortran. */
1535   old_last_node = last_node;
1536   curlang = get_language_from_name ("fortran");
1537   Fortran_functions (inf);
1538
1539   /* No Fortran entries found.  Try C. */
1540   if (old_last_node == last_node)
1541     {
1542       /* We do not tag if rewind fails.
1543          Only the file name will be recorded in the tags file. */
1544       rewind (inf);
1545       curlang = get_language_from_name (cplusplus ? "c++" : "c");
1546       default_C_entries (inf);
1547     }
1548   return;
1549 }
1550 \f
1551 /* Record a tag. */
1552 void
1553 pfnote (name, is_func, linestart, linelen, lno, cno)
1554      char *name;                /* tag name, or NULL if unnamed */
1555      bool is_func;              /* tag is a function */
1556      char *linestart;           /* start of the line where tag is */
1557      int linelen;               /* length of the line where tag is */
1558      int lno;                   /* line number */
1559      long cno;                  /* character number */
1560 {
1561   register node *np;
1562
1563   if (CTAGS && name == NULL)
1564     return;
1565
1566   np = xnew (1, node);
1567
1568   /* If ctags mode, change name "main" to M<thisfilename>. */
1569   if (CTAGS && !cxref_style && streq (name, "main"))
1570     {
1571       register char *fp = etags_strrchr (curfile, '/');
1572       np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1573       fp = etags_strrchr (np->name, '.');
1574       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1575         fp[0] = '\0';
1576     }
1577   else
1578     np->name = name;
1579   np->been_warned = FALSE;
1580   np->file = curfile;
1581   np->is_func = is_func;
1582   np->lno = lno;
1583   /* Our char numbers are 0-base, because of C language tradition?
1584      ctags compatibility?  old versions compatibility?   I don't know.
1585      Anyway, since emacs's are 1-base we expect etags.el to take care
1586      of the difference.  If we wanted to have 1-based numbers, we would
1587      uncomment the +1 below. */
1588   np->cno = cno /* + 1 */ ;
1589   np->left = np->right = NULL;
1590   if (CTAGS && !cxref_style)
1591     {
1592       if (strlen (linestart) < 50)
1593         np->pat = concat (linestart, "$", "");
1594       else
1595         np->pat = savenstr (linestart, 50);
1596     }
1597   else
1598     np->pat = savenstr (linestart, linelen);
1599
1600   add_node (np, &head);
1601 }
1602
1603 /* Date: Wed, 22 Jan 1997 02:56:31 -0500 [last amended 18 Sep 1997]
1604  * From: Sam Kendall <kendall@mv.mv.com>
1605  * Subject: Proposal for firming up the TAGS format specification
1606  * To: F.Potorti@cnuce.cnr.it
1607  *
1608  * pfnote should emit the optimized form [unnamed tag] only if:
1609  *  1. name does not contain any of the characters " \t\r\n(),;";
1610  *  2. linestart contains name as either a rightmost, or rightmost but
1611  *     one character, substring;
1612  *  3. the character, if any, immediately before name in linestart must
1613  *     be one of the characters " \t(),;";
1614  *  4. the character, if any, immediately after name in linestart must
1615  *     also be one of the characters " \t(),;".
1616  *
1617  * The real implementation uses the notinname() macro, which recognises
1618  * characters slightly different form " \t\r\n(),;".  See the variable
1619  * `nonam'.
1620  */
1621 #define traditional_tag_style TRUE
1622 void
1623 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1624      char *name;                /* tag name, or NULL if unnamed */
1625      int namelen;               /* tag length */
1626      bool is_func;              /* tag is a function */
1627      char *linestart;           /* start of the line where tag is */
1628      int linelen;               /* length of the line where tag is */
1629      int lno;                   /* line number */
1630      long cno;                  /* character number */
1631 {
1632   register char *cp;
1633   bool named;
1634
1635   named = TRUE;
1636   if (!CTAGS)
1637     {
1638       for (cp = name; !notinname (*cp); cp++)
1639         continue;
1640       if (*cp == '\0')                          /* rule #1 */
1641         {
1642           cp = linestart + linelen - namelen;
1643           if (notinname (linestart[linelen-1]))
1644             cp -= 1;                            /* rule #4 */
1645           if (cp >= linestart                   /* rule #2 */
1646               && (cp == linestart
1647                   || notinname (cp[-1]))        /* rule #3 */
1648               && strneq (name, cp, namelen))    /* rule #2 */
1649             named = FALSE;      /* use unnamed tag */
1650         }
1651     }
1652
1653   if (named)
1654     name = savenstr (name, namelen);
1655   else
1656     name = NULL;
1657   pfnote (name, is_func, linestart, linelen, lno, cno);
1658 }
1659
1660 /*
1661  * free_tree ()
1662  *      recurse on left children, iterate on right children.
1663  */
1664 void
1665 free_tree (np)
1666      register node *np;
1667 {
1668   while (np)
1669     {
1670       register node *node_right = np->right;
1671       free_tree (np->left);
1672       if (np->name != NULL)
1673         free (np->name);
1674       free (np->pat);
1675       free (np);
1676       np = node_right;
1677     }
1678 }
1679
1680 /*
1681  * add_node ()
1682  *      Adds a node to the tree of nodes.  In etags mode, we don't keep
1683  *      it sorted; we just keep a linear list.  In ctags mode, maintain
1684  *      an ordered tree, with no attempt at balancing.
1685  *
1686  *      add_node is the only function allowed to add nodes, so it can
1687  *      maintain state.
1688  */
1689 void
1690 add_node (np, cur_node_p)
1691      node *np, **cur_node_p;
1692 {
1693   register int dif;
1694   register node *cur_node = *cur_node_p;
1695
1696   if (cur_node == NULL)
1697     {
1698       *cur_node_p = np;
1699       last_node = np;
1700       return;
1701     }
1702
1703   if (!CTAGS)
1704     {
1705       /* Etags Mode */
1706       if (last_node == NULL)
1707         fatal ("internal error in add_node", (char *)NULL);
1708       last_node->right = np;
1709       last_node = np;
1710     }
1711   else
1712     {
1713       /* Ctags Mode */
1714       dif = strcmp (np->name, cur_node->name);
1715
1716       /*
1717        * If this tag name matches an existing one, then
1718        * do not add the node, but maybe print a warning.
1719        */
1720       if (!dif)
1721         {
1722           if (streq (np->file, cur_node->file))
1723             {
1724               if (!no_warnings)
1725                 {
1726                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1727                            np->file, lineno, np->name);
1728                   fprintf (stderr, "Second entry ignored\n");
1729                 }
1730             }
1731           else if (!cur_node->been_warned && !no_warnings)
1732             {
1733               fprintf
1734                 (stderr,
1735                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
1736                  np->file, cur_node->file, np->name);
1737               cur_node->been_warned = TRUE;
1738             }
1739           return;
1740         }
1741
1742       /* Actually add the node */
1743       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1744     }
1745 }
1746 \f
1747 void
1748 put_entries (np)
1749      register node *np;
1750 {
1751   register char *sp;
1752
1753   if (np == NULL)
1754     return;
1755
1756   /* Output subentries that precede this one */
1757   put_entries (np->left);
1758
1759   /* Output this entry */
1760
1761   if (!CTAGS)
1762     {
1763       if (np->name != NULL)
1764         fprintf (tagf, "%s\177%s\001%d,%ld\n",
1765                  np->pat, np->name, np->lno, np->cno);
1766       else
1767         fprintf (tagf, "%s\177%d,%ld\n",
1768                  np->pat, np->lno, np->cno);
1769     }
1770   else
1771     {
1772       if (np->name == NULL)
1773         error ("internal error: NULL name in ctags mode.", (char *)NULL);
1774
1775       if (cxref_style)
1776         {
1777           if (vgrind_style)
1778             fprintf (stdout, "%s %s %d\n",
1779                      np->name, np->file, (np->lno + 63) / 64);
1780           else
1781             fprintf (stdout, "%-16s %3d %-16s %s\n",
1782                      np->name, np->lno, np->file, np->pat);
1783         }
1784       else
1785         {
1786           fprintf (tagf, "%s\t%s\t", np->name, np->file);
1787
1788           if (np->is_func)
1789             {                   /* a function */
1790               putc (searchar, tagf);
1791               putc ('^', tagf);
1792
1793               for (sp = np->pat; *sp; sp++)
1794                 {
1795                   if (*sp == '\\' || *sp == searchar)
1796                     putc ('\\', tagf);
1797                   putc (*sp, tagf);
1798                 }
1799               putc (searchar, tagf);
1800             }
1801           else
1802             {                   /* a typedef; text pattern inadequate */
1803               fprintf (tagf, "%d", np->lno);
1804             }
1805           putc ('\n', tagf);
1806         }
1807     }
1808
1809   /* Output subentries that follow this one */
1810   put_entries (np->right);
1811 }
1812
1813 /* Length of a number's decimal representation. */
1814 int
1815 number_len (num)
1816      long num;
1817 {
1818   int len = 1;
1819   while ((num /= 10) > 0)
1820     len += 1;
1821   return len;
1822 }
1823
1824 /*
1825  * Return total number of characters that put_entries will output for
1826  * the nodes in the subtree of the specified node.  Works only if
1827  * we are not ctags, but called only in that case.  This count
1828  * is irrelevant with the new tags.el, but is still supplied for
1829  * backward compatibility.
1830  */
1831 int
1832 total_size_of_entries (np)
1833      register node *np;
1834 {
1835   register int total;
1836
1837   if (np == NULL)
1838     return 0;
1839
1840   for (total = 0; np != NULL; np = np->right)
1841     {
1842       /* Count left subentries. */
1843       total += total_size_of_entries (np->left);
1844
1845       /* Count this entry */
1846       total += strlen (np->pat) + 1;
1847       total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1848       if (np->name != NULL)
1849         total += 1 + strlen (np->name); /* \001name */
1850     }
1851
1852   return total;
1853 }
1854 \f
1855 /*
1856  * The C symbol tables.
1857  */
1858 enum sym_type
1859 {
1860   st_none,
1861   st_C_objprot, st_C_objimpl, st_C_objend,
1862   st_C_gnumacro,
1863   st_C_ignore,
1864   st_C_javastruct,
1865   st_C_operator,
1866   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1867 };
1868
1869 /* Feed stuff between (but not including) %[ and %] lines to:
1870       gperf -c -k 1,3 -o -p -r -t
1871 %[
1872 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1873 %%
1874 if,             0,      st_C_ignore
1875 for,            0,      st_C_ignore
1876 while,          0,      st_C_ignore
1877 switch,         0,      st_C_ignore
1878 return,         0,      st_C_ignore
1879 @interface,     0,      st_C_objprot
1880 @protocol,      0,      st_C_objprot
1881 @implementation,0,      st_C_objimpl
1882 @end,           0,      st_C_objend
1883 import,         C_JAVA, st_C_ignore
1884 package,        C_JAVA, st_C_ignore
1885 friend,         C_PLPL, st_C_ignore
1886 extends,        C_JAVA, st_C_javastruct
1887 implements,     C_JAVA, st_C_javastruct
1888 interface,      C_JAVA, st_C_struct
1889 class,          C_PLPL, st_C_struct
1890 namespace,      C_PLPL, st_C_struct
1891 domain,         C_STAR, st_C_struct
1892 union,          0,      st_C_struct
1893 struct,         0,      st_C_struct
1894 extern,         0,      st_C_extern
1895 enum,           0,      st_C_enum
1896 typedef,        0,      st_C_typedef
1897 define,         0,      st_C_define
1898 operator,       C_PLPL, st_C_operator
1899 bool,           C_PLPL, st_C_typespec
1900 long,           0,      st_C_typespec
1901 short,          0,      st_C_typespec
1902 int,            0,      st_C_typespec
1903 char,           0,      st_C_typespec
1904 float,          0,      st_C_typespec
1905 double,         0,      st_C_typespec
1906 signed,         0,      st_C_typespec
1907 unsigned,       0,      st_C_typespec
1908 auto,           0,      st_C_typespec
1909 void,           0,      st_C_typespec
1910 static,         0,      st_C_typespec
1911 const,          0,      st_C_typespec
1912 volatile,       0,      st_C_typespec
1913 explicit,       C_PLPL, st_C_typespec
1914 mutable,        C_PLPL, st_C_typespec
1915 typename,       C_PLPL, st_C_typespec
1916 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
1917 DEFUN,          0,      st_C_gnumacro
1918 SYSCALL,        0,      st_C_gnumacro
1919 ENTRY,          0,      st_C_gnumacro
1920 PSEUDO,         0,      st_C_gnumacro
1921 # These are defined inside C functions, so currently they are not met.
1922 # EXFUN used in glibc, DEFVAR_* in emacs.
1923 #EXFUN,         0,      st_C_gnumacro
1924 #DEFVAR_,       0,      st_C_gnumacro
1925 %]
1926 and replace lines between %< and %> with its output. */
1927 /*%<*/
1928 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
1929 /* Command-line: gperf -c -k 1,3 -o -p -r -t  */
1930 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
1931
1932 #define TOTAL_KEYWORDS 46
1933 #define MIN_WORD_LENGTH 2
1934 #define MAX_WORD_LENGTH 15
1935 #define MIN_HASH_VALUE 13
1936 #define MAX_HASH_VALUE 123
1937 /* maximum key range = 111, duplicates = 0 */
1938
1939 #ifdef __GNUC__
1940 __inline
1941 #endif
1942 static unsigned int
1943 hash (str, len)
1944      register const char *str;
1945      register unsigned int len;
1946 {
1947   static unsigned char asso_values[] =
1948     {
1949       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1950       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1951       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1952       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1953       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1954       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1955       124, 124, 124, 124,   3, 124, 124, 124,  43,   6,
1956        11, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1957        11, 124, 124,  58,   7, 124, 124, 124, 124, 124,
1958       124, 124, 124, 124, 124, 124, 124,  57,   7,  42,
1959         4,  14,  52,   0, 124,  53, 124, 124,  29,  11,
1960         6,  35,  32, 124,  29,  34,  59,  58,  51,  24,
1961       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1962       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1963       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1964       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1965       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1966       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1967       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1968       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1969       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1970       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1971       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1972       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1973       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1974       124, 124, 124, 124, 124, 124
1975     };
1976   register int hval = len;
1977
1978   switch (hval)
1979     {
1980       default:
1981       case 3:
1982         hval += asso_values[(unsigned char)str[2]];
1983       case 2:
1984       case 1:
1985         hval += asso_values[(unsigned char)str[0]];
1986         break;
1987     }
1988   return hval;
1989 }
1990
1991 #ifdef __GNUC__
1992 __inline
1993 #endif
1994 struct C_stab_entry *
1995 in_word_set (str, len)
1996      register const char *str;
1997      register unsigned int len;
1998 {
1999   static struct C_stab_entry wordlist[] =
2000     {
2001       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2002       {""}, {""}, {""}, {""},
2003       {"@end",          0,      st_C_objend},
2004       {""}, {""}, {""}, {""},
2005       {"ENTRY",         0,      st_C_gnumacro},
2006       {"@interface",    0,      st_C_objprot},
2007       {""},
2008       {"domain",        C_STAR, st_C_struct},
2009       {""},
2010       {"PSEUDO",                0,      st_C_gnumacro},
2011       {""}, {""},
2012       {"namespace",     C_PLPL, st_C_struct},
2013       {""}, {""},
2014       {"@implementation",0,     st_C_objimpl},
2015       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2016       {"long",          0,      st_C_typespec},
2017       {"signed",        0,      st_C_typespec},
2018       {"@protocol",     0,      st_C_objprot},
2019       {""}, {""}, {""}, {""},
2020       {"bool",          C_PLPL, st_C_typespec},
2021       {""}, {""}, {""}, {""}, {""}, {""},
2022       {"const",         0,      st_C_typespec},
2023       {"explicit",      C_PLPL, st_C_typespec},
2024       {"if",            0,      st_C_ignore},
2025       {""},
2026       {"operator",      C_PLPL, st_C_operator},
2027       {""},
2028       {"DEFUN",         0,      st_C_gnumacro},
2029       {""}, {""},
2030       {"define",        0,      st_C_define},
2031       {""}, {""}, {""}, {""}, {""},
2032       {"double",        0,      st_C_typespec},
2033       {"struct",        0,      st_C_struct},
2034       {""}, {""}, {""}, {""},
2035       {"short",         0,      st_C_typespec},
2036       {""},
2037       {"enum",          0,      st_C_enum},
2038       {"mutable",       C_PLPL, st_C_typespec},
2039       {""},
2040       {"extern",        0,      st_C_extern},
2041       {"extends",       C_JAVA, st_C_javastruct},
2042       {"package",       C_JAVA, st_C_ignore},
2043       {"while",         0,      st_C_ignore},
2044       {""},
2045       {"for",           0,      st_C_ignore},
2046       {""}, {""}, {""},
2047       {"volatile",      0,      st_C_typespec},
2048       {""}, {""},
2049       {"import",                C_JAVA, st_C_ignore},
2050       {"float",         0,      st_C_typespec},
2051       {"switch",                0,      st_C_ignore},
2052       {"return",                0,      st_C_ignore},
2053       {"implements",    C_JAVA, st_C_javastruct},
2054       {""},
2055       {"static",        0,      st_C_typespec},
2056       {"typedef",       0,      st_C_typedef},
2057       {"typename",      C_PLPL, st_C_typespec},
2058       {"unsigned",      0,      st_C_typespec},
2059       {""}, {""},
2060       {"char",          0,      st_C_typespec},
2061       {"class",         C_PLPL, st_C_struct},
2062       {""}, {""}, {""},
2063       {"void",          0,      st_C_typespec},
2064       {""}, {""},
2065       {"friend",                C_PLPL, st_C_ignore},
2066       {""}, {""}, {""},
2067       {"int",           0,      st_C_typespec},
2068       {"union",         0,      st_C_struct},
2069       {""}, {""}, {""},
2070       {"auto",          0,      st_C_typespec},
2071       {"interface",     C_JAVA, st_C_struct},
2072       {""},
2073       {"SYSCALL",       0,      st_C_gnumacro}
2074     };
2075
2076   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2077     {
2078       register int key = hash (str, len);
2079
2080       if (key <= MAX_HASH_VALUE && key >= 0)
2081         {
2082           register const char *s = wordlist[key].name;
2083
2084           if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2085             return &wordlist[key];
2086         }
2087     }
2088   return 0;
2089 }
2090 /*%>*/
2091
2092 enum sym_type
2093 C_symtype (str, len, c_ext)
2094      char *str;
2095      int len;
2096      int c_ext;
2097 {
2098   register struct C_stab_entry *se = in_word_set (str, len);
2099
2100   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2101     return st_none;
2102   return se->type;
2103 }
2104 \f
2105  /*
2106   * C functions and variables are recognized using a simple
2107   * finite automaton.  fvdef is its state variable.
2108   */
2109 enum
2110 {
2111   fvnone,                       /* nothing seen */
2112   foperator,                    /* func: operator keyword seen (cplpl) */
2113   fvnameseen,                   /* function or variable name seen */
2114   fstartlist,                   /* func: just after open parenthesis */
2115   finlist,                      /* func: in parameter list */
2116   flistseen,                    /* func: after parameter list */
2117   fignore,                      /* func: before open brace */
2118   vignore                       /* var-like: ignore until ';' */
2119 } fvdef;
2120
2121 bool fvextern;                  /* func or var: extern keyword seen; */
2122
2123  /*
2124   * typedefs are recognized using a simple finite automaton.
2125   * typdef is its state variable.
2126   */
2127 enum
2128 {
2129   tnone,                        /* nothing seen */
2130   tkeyseen,                     /* typedef keyword seen */
2131   ttypeseen,                    /* defined type seen */
2132   tinbody,                      /* inside typedef body */
2133   tend,                         /* just before typedef tag */
2134   tignore                       /* junk after typedef tag */
2135 } typdef;
2136
2137
2138  /*
2139   * struct-like structures (enum, struct and union) are recognized
2140   * using another simple finite automaton.  `structdef' is its state
2141   * variable.
2142   */
2143 enum
2144 {
2145   snone,                        /* nothing seen yet */
2146   skeyseen,                     /* struct-like keyword seen */
2147   stagseen,                     /* struct-like tag seen */
2148   scolonseen,                   /* colon seen after struct-like tag */
2149   sinbody                       /* in struct body: recognize member func defs*/
2150 } structdef;
2151
2152 /*
2153  * When structdef is stagseen, scolonseen, or sinbody, structtag is the
2154  * struct tag, and structtype is the type of the preceding struct-like
2155  * keyword.
2156  */
2157 char *structtag = "<uninited>";
2158 enum sym_type structtype;
2159
2160 /*
2161  * When objdef is different from onone, objtag is the name of the class.
2162  */
2163 char *objtag = "<uninited>";
2164
2165 /*
2166  * Yet another little state machine to deal with preprocessor lines.
2167  */
2168 enum
2169 {
2170   dnone,                        /* nothing seen */
2171   dsharpseen,                   /* '#' seen as first char on line */
2172   ddefineseen,                  /* '#' and 'define' seen */
2173   dignorerest                   /* ignore rest of line */
2174 } definedef;
2175
2176 /*
2177  * State machine for Objective C protocols and implementations.
2178  * Tom R.Hageman <tom@basil.icce.rug.nl>
2179  */
2180 enum
2181 {
2182   onone,                        /* nothing seen */
2183   oprotocol,                    /* @interface or @protocol seen */
2184   oimplementation,              /* @implementations seen */
2185   otagseen,                     /* class name seen */
2186   oparenseen,                   /* parenthesis before category seen */
2187   ocatseen,                     /* category name seen */
2188   oinbody,                      /* in @implementation body */
2189   omethodsign,                  /* in @implementation body, after +/- */
2190   omethodtag,                   /* after method name */
2191   omethodcolon,                 /* after method colon */
2192   omethodparm,                  /* after method parameter */
2193   oignore                       /* wait for @end */
2194 } objdef;
2195
2196
2197 /*
2198  * Use this structure to keep info about the token read, and how it
2199  * should be tagged.  Used by the make_C_tag function to build a tag.
2200  */
2201 typedef struct
2202 {
2203   bool valid;
2204   char *str;
2205   bool named;
2206   int linelen;
2207   int lineno;
2208   long linepos;
2209   char *buffer;
2210 } token;
2211
2212 token tok;                      /* latest token read */
2213
2214 /*
2215  * Set this to TRUE, and the next token considered is called a function.
2216  * Used only for GNU emacs's function-defining macros.
2217  */
2218 bool next_token_is_func;
2219
2220 /*
2221  * TRUE in the rules part of a yacc file, FALSE outside (parse as C).
2222  */
2223 bool yacc_rules;
2224
2225 /*
2226  * methodlen is the length of the method name stored in token_name.
2227  */
2228 int methodlen;
2229
2230 /*
2231  * consider_token ()
2232  *      checks to see if the current token is at the start of a
2233  *      function or variable, or corresponds to a typedef, or
2234  *      is a struct/union/enum tag, or #define, or an enum constant.
2235  *
2236  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2237  *      with args.  C_EXT is which language we are looking at.
2238  *
2239  * Globals
2240  *      fvdef                   IN OUT
2241  *      structdef               IN OUT
2242  *      definedef               IN OUT
2243  *      typdef                  IN OUT
2244  *      objdef                  IN OUT
2245  *      next_token_is_func      IN OUT
2246  */
2247
2248 bool
2249 consider_token (str, len, c, c_ext, cblev, parlev, is_func_or_var)
2250      register char *str;        /* IN: token pointer */
2251      register int len;          /* IN: token length */
2252      register char c;           /* IN: first char after the token */
2253      int c_ext;                 /* IN: C extensions mask */
2254      int cblev;                 /* IN: curly brace level */
2255      int parlev;                /* IN: parenthesis level */
2256      bool *is_func_or_var;      /* OUT: function or variable found */
2257 {
2258   enum sym_type toktype = C_symtype (str, len, c_ext);
2259
2260   /*
2261    * Advance the definedef state machine.
2262    */
2263   switch (definedef)
2264     {
2265     case dnone:
2266       /* We're not on a preprocessor line. */
2267       break;
2268     case dsharpseen:
2269       if (toktype == st_C_define)
2270         {
2271           definedef = ddefineseen;
2272         }
2273       else
2274         {
2275           definedef = dignorerest;
2276         }
2277       return FALSE;
2278     case ddefineseen:
2279       /*
2280        * Make a tag for any macro, unless it is a constant
2281        * and constantypedefs is FALSE.
2282        */
2283       definedef = dignorerest;
2284       *is_func_or_var = (c == '(');
2285       if (!*is_func_or_var && !constantypedefs)
2286         return FALSE;
2287       else
2288         return TRUE;
2289     case dignorerest:
2290       return FALSE;
2291     default:
2292       error ("internal error: definedef value.", (char *)NULL);
2293     }
2294
2295   /*
2296    * Now typedefs
2297    */
2298   switch (typdef)
2299     {
2300     case tnone:
2301       if (toktype == st_C_typedef)
2302         {
2303           if (typedefs)
2304             typdef = tkeyseen;
2305           fvextern = FALSE;
2306           fvdef = fvnone;
2307           return FALSE;
2308         }
2309       break;
2310     case tkeyseen:
2311       switch (toktype)
2312         {
2313         case st_none:
2314         case st_C_typespec:
2315         case st_C_struct:
2316         case st_C_enum:
2317           typdef = ttypeseen;
2318           break;
2319         }
2320       /* Do not return here, so the structdef stuff has a chance. */
2321       break;
2322     case tend:
2323       switch (toktype)
2324         {
2325         case st_C_typespec:
2326         case st_C_struct:
2327         case st_C_enum:
2328           return FALSE;
2329         }
2330       return TRUE;
2331     }
2332
2333   /*
2334    * This structdef business is currently only invoked when cblev==0.
2335    * It should be recursively invoked whatever the curly brace level,
2336    * and a stack of states kept, to allow for definitions of structs
2337    * within structs.
2338    *
2339    * This structdef business is NOT invoked when we are ctags and the
2340    * file is plain C.  This is because a struct tag may have the same
2341    * name as another tag, and this loses with ctags.
2342    */
2343   switch (toktype)
2344     {
2345     case st_C_javastruct:
2346       if (structdef == stagseen)
2347         structdef = scolonseen;
2348       return FALSE;
2349     case st_C_struct:
2350     case st_C_enum:
2351       if (typdef == tkeyseen
2352           || (typedefs_and_cplusplus && cblev == 0 && structdef == snone))
2353         {
2354           structdef = skeyseen;
2355           structtype = toktype;
2356         }
2357       return FALSE;
2358     }
2359
2360   if (structdef == skeyseen)
2361     {
2362       /* Save the tag for struct/union/class, for functions and variables
2363          that may be defined inside. */
2364       if (structtype == st_C_struct)
2365         structtag = savenstr (str, len);
2366       else
2367         structtag = "<enum>";
2368       structdef = stagseen;
2369       return TRUE;
2370     }
2371
2372   if (typdef != tnone)
2373     definedef = dnone;
2374
2375   /* Detect GNU macros.
2376
2377      Writers of emacs code are recommended to put the
2378      first two args of a DEFUN on the same line.
2379
2380       The DEFUN macro, used in emacs C source code, has a first arg
2381      that is a string (the lisp function name), and a second arg that
2382      is a C function name.  Since etags skips strings, the second arg
2383      is tagged.  This is unfortunate, as it would be better to tag the
2384      first arg.  The simplest way to deal with this problem would be
2385      to name the tag with a name built from the function name, by
2386      removing the initial 'F' character and substituting '-' for '_'.
2387      Anyway, this assumes that the conventions of naming lisp
2388      functions will never change.  Currently, this method is not
2389      implemented. */
2390   if (definedef == dnone && toktype == st_C_gnumacro)
2391     {
2392       next_token_is_func = TRUE;
2393       return FALSE;
2394     }
2395   if (next_token_is_func)
2396     {
2397       next_token_is_func = FALSE;
2398       fvdef = fignore;
2399       *is_func_or_var = TRUE;
2400       return TRUE;
2401     }
2402
2403   /* Detect Objective C constructs. */
2404   switch (objdef)
2405     {
2406     case onone:
2407       switch (toktype)
2408         {
2409         case st_C_objprot:
2410           objdef = oprotocol;
2411           return FALSE;
2412         case st_C_objimpl:
2413           objdef = oimplementation;
2414           return FALSE;
2415         }
2416       break;
2417     case oimplementation:
2418       /* Save the class tag for functions or variables defined inside. */
2419       objtag = savenstr (str, len);
2420       objdef = oinbody;
2421       return FALSE;
2422     case oprotocol:
2423       /* Save the class tag for categories. */
2424       objtag = savenstr (str, len);
2425       objdef = otagseen;
2426       *is_func_or_var = TRUE;
2427       return TRUE;
2428     case oparenseen:
2429       objdef = ocatseen;
2430       *is_func_or_var = TRUE;
2431       return TRUE;
2432     case oinbody:
2433       break;
2434     case omethodsign:
2435       if (parlev == 0)
2436         {
2437           objdef = omethodtag;
2438           methodlen = len;
2439           grow_linebuffer (&token_name, methodlen + 1);
2440           strncpy (token_name.buffer, str, len);
2441           token_name.buffer[methodlen] = '\0';
2442           token_name.len = methodlen;
2443           return TRUE;
2444         }
2445       return FALSE;
2446     case omethodcolon:
2447       if (parlev == 0)
2448         objdef = omethodparm;
2449       return FALSE;
2450     case omethodparm:
2451       if (parlev == 0)
2452         {
2453           objdef = omethodtag;
2454           methodlen += len;
2455           grow_linebuffer (&token_name, methodlen + 1);
2456           strncat (token_name.buffer, str, len);
2457           token_name.len = methodlen;
2458           return TRUE;
2459         }
2460       return FALSE;
2461     case oignore:
2462       if (toktype == st_C_objend)
2463         {
2464           /* Memory leakage here: the string pointed by objtag is
2465              never released, because many tests would be needed to
2466              avoid breaking on incorrect input code.  The amount of
2467              memory leaked here is the sum of the lengths of the
2468              class tags.
2469           free (objtag); */
2470           objdef = onone;
2471         }
2472       return FALSE;
2473     }
2474
2475   /* A function, variable or enum constant? */
2476   switch (toktype)
2477     {
2478     case st_C_extern:
2479       fvextern = TRUE;
2480       /* FALLTHRU */
2481     case st_C_typespec:
2482       if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2483         fvdef = fvnone;         /* should be useless */
2484       return FALSE;
2485     case st_C_ignore:
2486       fvextern = FALSE;
2487       fvdef = vignore;
2488       return FALSE;
2489     case st_C_operator:
2490       fvdef = foperator;
2491       *is_func_or_var = TRUE;
2492       return TRUE;
2493     case st_none:
2494       if ((c_ext & C_PLPL) && strneq (str+len-10, "::operator", 10))
2495         {
2496           fvdef = foperator;
2497           *is_func_or_var = TRUE;
2498           return TRUE;
2499         }
2500       if (constantypedefs && structdef == sinbody && structtype == st_C_enum)
2501         return TRUE;
2502       if (fvdef == fvnone)
2503         {
2504           fvdef = fvnameseen;   /* function or variable */
2505           *is_func_or_var = TRUE;
2506           return TRUE;
2507         }
2508       break;
2509     }
2510
2511   return FALSE;
2512 }
2513
2514 /*
2515  * C_entries ()
2516  *      This routine finds functions, variables, typedefs,
2517  *      #define's, enum constants and struct/union/enum definitions in
2518  *      C syntax and adds them to the list.
2519  */
2520 #define current_lb_is_new (newndx == curndx)
2521 #define switch_line_buffers() (curndx = 1 - curndx)
2522
2523 #define curlb (lbs[curndx].lb)
2524 #define othlb (lbs[1-curndx].lb)
2525 #define newlb (lbs[newndx].lb)
2526 #define curlinepos (lbs[curndx].linepos)
2527 #define othlinepos (lbs[1-curndx].linepos)
2528 #define newlinepos (lbs[newndx].linepos)
2529
2530 #define CNL_SAVE_DEFINEDEF()                                            \
2531 do {                                                                    \
2532   curlinepos = charno;                                                  \
2533   lineno++;                                                             \
2534   linecharno = charno;                                                  \
2535   charno += readline (&curlb, inf);                                     \
2536   lp = curlb.buffer;                                                    \
2537   quotednl = FALSE;                                                     \
2538   newndx = curndx;                                                      \
2539 } while (0)
2540
2541 #define CNL()                                                           \
2542 do {                                                                    \
2543   CNL_SAVE_DEFINEDEF();                                                 \
2544   if (savetok.valid)                                                    \
2545     {                                                                   \
2546       tok = savetok;                                                    \
2547       savetok.valid = FALSE;                                            \
2548     }                                                                   \
2549   definedef = dnone;                                                    \
2550 } while (0)
2551
2552
2553 void
2554 make_C_tag (isfun)
2555      bool isfun;
2556 {
2557   /* This function should never be called when tok.valid is FALSE, but
2558      we must protect against invalid input or internal errors. */
2559   if (tok.valid)
2560     {
2561       if (traditional_tag_style)
2562         {
2563           /* This was the original code.  Now we call new_pfnote instead,
2564              which uses the new method for naming tags (see new_pfnote). */
2565           char *name = NULL;
2566
2567           if (CTAGS || tok.named)
2568             name = savestr (token_name.buffer);
2569           pfnote (name, isfun,
2570                   tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2571         }
2572       else
2573         new_pfnote (token_name.buffer, token_name.len, isfun,
2574                     tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2575       tok.valid = FALSE;
2576     }
2577   else if (DEBUG)
2578     abort ();
2579 }
2580
2581
2582 void
2583 C_entries (c_ext, inf)
2584      int c_ext;                 /* extension of C */
2585      FILE *inf;                 /* input file */
2586 {
2587   register char c;              /* latest char read; '\0' for end of line */
2588   register char *lp;            /* pointer one beyond the character `c' */
2589   int curndx, newndx;           /* indices for current and new lb */
2590   register int tokoff;          /* offset in line of start of current token */
2591   register int toklen;          /* length of current token */
2592   char *qualifier;              /* string used to qualify names */
2593   int qlen;                     /* length of qualifier */
2594   int cblev;                    /* current curly brace level */
2595   int parlev;                   /* current parenthesis level */
2596   bool incomm, inquote, inchar, quotednl, midtoken;
2597   bool purec, cplpl, cjava;
2598   token savetok;                /* token saved during preprocessor handling */
2599
2600
2601   tokoff = toklen = 0;          /* keep compiler quiet */
2602   curndx = newndx = 0;
2603   lineno = 0;
2604   charno = 0;
2605   lp = curlb.buffer;
2606   *lp = 0;
2607
2608   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2609   structdef = snone; definedef = dnone; objdef = onone;
2610   next_token_is_func = yacc_rules = FALSE;
2611   midtoken = inquote = inchar = incomm = quotednl = FALSE;
2612   tok.valid = savetok.valid = FALSE;
2613   cblev = 0;
2614   parlev = 0;
2615   purec = !(c_ext & ~YACC);     /* no extensions (apart from possibly yacc) */
2616   cplpl = (c_ext & C_PLPL) == C_PLPL;
2617   cjava = (c_ext & C_JAVA) == C_JAVA;
2618   if (cjava)
2619     { qualifier = "."; qlen = 1; }
2620   else
2621     { qualifier = "::"; qlen = 2; }
2622
2623   while (!feof (inf))
2624     {
2625       c = *lp++;
2626       if (c == '\\')
2627         {
2628           /* If we're at the end of the line, the next character is a
2629              '\0'; don't skip it, because it's the thing that tells us
2630              to read the next line.  */
2631           if (*lp == '\0')
2632             {
2633               quotednl = TRUE;
2634               continue;
2635             }
2636           lp++;
2637           c = ' ';
2638         }
2639       else if (incomm)
2640         {
2641           switch (c)
2642             {
2643             case '*':
2644               if (*lp == '/')
2645                 {
2646                   c = *lp++;
2647                   incomm = FALSE;
2648                 }
2649               break;
2650             case '\0':
2651               /* Newlines inside comments do not end macro definitions in
2652                  traditional cpp. */
2653               CNL_SAVE_DEFINEDEF ();
2654               break;
2655             }
2656           continue;
2657         }
2658       else if (inquote)
2659         {
2660           switch (c)
2661             {
2662             case '"':
2663               inquote = FALSE;
2664               break;
2665             case '\0':
2666               /* Newlines inside strings do not end macro definitions
2667                  in traditional cpp, even though compilers don't
2668                  usually accept them. */
2669               CNL_SAVE_DEFINEDEF ();
2670               break;
2671             }
2672           continue;
2673         }
2674       else if (inchar)
2675         {
2676           switch (c)
2677             {
2678             case '\0':
2679               /* Hmmm, something went wrong. */
2680               CNL ();
2681               /* FALLTHRU */
2682             case '\'':
2683               inchar = FALSE;
2684               break;
2685             }
2686           continue;
2687         }
2688       else
2689         switch (c)
2690           {
2691           case '"':
2692             inquote = TRUE;
2693             if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2694               {
2695                 fvextern = FALSE;
2696                 fvdef = fvnone;
2697               }
2698             continue;
2699           case '\'':
2700             inchar = TRUE;
2701             if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2702               {
2703                 fvextern = FALSE;
2704                 fvdef = fvnone;
2705               }
2706             continue;
2707           case '/':
2708             if (*lp == '*')
2709               {
2710                 lp++;
2711                 incomm = TRUE;
2712                 continue;
2713               }
2714             else if (/* cplpl && */ *lp == '/')
2715               {
2716                 c = '\0';
2717                 break;
2718               }
2719             else
2720               break;
2721           case '%':
2722             if ((c_ext & YACC) && *lp == '%')
2723               {
2724                 /* entering or exiting rules section in yacc file */
2725                 lp++;
2726                 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2727                 typdef = tnone; structdef = snone;
2728                 next_token_is_func = FALSE;
2729                 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2730                 cblev = 0;
2731                 yacc_rules = !yacc_rules;
2732                 continue;
2733               }
2734             else
2735               break;
2736           case '#':
2737             if (definedef == dnone)
2738               {
2739                 char *cp;
2740                 bool cpptoken = TRUE;
2741
2742                 /* Look back on this line.  If all blanks, or nonblanks
2743                    followed by an end of comment, this is a preprocessor
2744                    token. */
2745                 for (cp = newlb.buffer; cp < lp-1; cp++)
2746                   if (!iswhite (*cp))
2747                     {
2748                       if (*cp == '*' && *(cp+1) == '/')
2749                         {
2750                           cp++;
2751                           cpptoken = TRUE;
2752                         }
2753                       else
2754                         cpptoken = FALSE;
2755                     }
2756                 if (cpptoken)
2757                   definedef = dsharpseen;
2758               } /* if (definedef == dnone) */
2759
2760             continue;
2761           } /* switch (c) */
2762
2763
2764       /* Consider token only if some complicated conditions are satisfied. */
2765       if ((definedef != dnone
2766            || (cblev == 0 && structdef != scolonseen)
2767            || (cblev == 1 && cplpl && structdef == sinbody)
2768            || (structdef == sinbody && purec))
2769           && typdef != tignore
2770           && definedef != dignorerest
2771           && fvdef != finlist)
2772         {
2773           if (midtoken)
2774             {
2775               if (endtoken (c))
2776                 {
2777                   bool funorvar = FALSE;
2778
2779                   if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2780                     {
2781                       /*
2782                        * This handles :: in the middle, but not at the
2783                        * beginning of an identifier.  Also, space-separated
2784                        * :: is not recognised.
2785                        */
2786                       lp += 2;
2787                       toklen += 2;
2788                       c = lp[-1];
2789                       goto intoken;
2790                     }
2791                   else
2792                     {
2793                       if (yacc_rules
2794                           || consider_token (newlb.buffer + tokoff, toklen, c,
2795                                              c_ext, cblev, parlev, &funorvar))
2796                         {
2797                           if (fvdef == foperator)
2798                             {
2799                               char *oldlp = lp;
2800                               lp = skip_spaces (lp-1);
2801                               if (*lp != '\0')
2802                                 lp += 1;
2803                               while (*lp != '\0'
2804                                      && !isspace (*lp) && *lp != '(')
2805                                 lp += 1;
2806                               c = *lp++;
2807                               toklen += lp - oldlp;
2808                             }
2809                           tok.named = FALSE;
2810                           if (!purec
2811                               && funorvar
2812                               && definedef == dnone
2813                               && structdef == sinbody)
2814                             /* function or var defined in C++ class body */
2815                             {
2816                               int len = strlen (structtag) + qlen + toklen;
2817                               grow_linebuffer (&token_name, len + 1);
2818                               strcpy (token_name.buffer, structtag);
2819                               strcat (token_name.buffer, qualifier);
2820                               strncat (token_name.buffer,
2821                                        newlb.buffer + tokoff, toklen);
2822                               token_name.len = len;
2823                               tok.named = TRUE;
2824                             }
2825                           else if (objdef == ocatseen)
2826                             /* Objective C category */
2827                             {
2828                               int len = strlen (objtag) + 2 + toklen;
2829                               grow_linebuffer (&token_name, len + 1);
2830                               strcpy (token_name.buffer, objtag);
2831                               strcat (token_name.buffer, "(");
2832                               strncat (token_name.buffer,
2833                                        newlb.buffer + tokoff, toklen);
2834                               strcat (token_name.buffer, ")");
2835                               token_name.len = len;
2836                               tok.named = TRUE;
2837                             }
2838                           else if (objdef == omethodtag
2839                                    || objdef == omethodparm)
2840                             /* Objective C method */
2841                             {
2842                               tok.named = TRUE;
2843                             }
2844                           else
2845                             {
2846                               grow_linebuffer (&token_name, toklen + 1);
2847                               strncpy (token_name.buffer,
2848                                        newlb.buffer + tokoff, toklen);
2849                               token_name.buffer[toklen] = '\0';
2850                               token_name.len = toklen;
2851                               /* Name macros and members. */
2852                               tok.named = (structdef == stagseen
2853                                            || typdef == ttypeseen
2854                                            || typdef == tend
2855                                            || (funorvar
2856                                                && definedef == dignorerest)
2857                                            || (funorvar
2858                                                && definedef == dnone
2859                                                && structdef == sinbody));
2860                             }
2861                           tok.lineno = lineno;
2862                           tok.linelen = tokoff + toklen + 1;
2863                           tok.buffer = newlb.buffer;
2864                           tok.linepos = newlinepos;
2865                           tok.valid = TRUE;
2866
2867                           if (definedef == dnone
2868                               && (fvdef == fvnameseen
2869                                   || fvdef == foperator
2870                                   || structdef == stagseen
2871                                   || typdef == tend
2872                                   || objdef != onone))
2873                             {
2874                               if (current_lb_is_new)
2875                                 switch_line_buffers ();
2876                             }
2877                           else
2878                             make_C_tag (funorvar);
2879                         }
2880                       midtoken = FALSE;
2881                     }
2882                 } /* if (endtoken (c)) */
2883               else if (intoken (c))
2884                 intoken:
2885                 {
2886                   toklen++;
2887                   continue;
2888                 }
2889             } /* if (midtoken) */
2890           else if (begtoken (c))
2891             {
2892               switch (definedef)
2893                 {
2894                 case dnone:
2895                   switch (fvdef)
2896                     {
2897                     case fstartlist:
2898                       fvdef = finlist;
2899                       continue;
2900                     case flistseen:
2901                       make_C_tag (TRUE); /* a function */
2902                       fvdef = fignore;
2903                       break;
2904                     case fvnameseen:
2905                       fvdef = fvnone;
2906                       break;
2907                     }
2908                   if (structdef == stagseen && !cjava)
2909                     structdef = snone;
2910                   break;
2911                 case dsharpseen:
2912                   savetok = tok;
2913                 }
2914               if (!yacc_rules || lp == newlb.buffer + 1)
2915                 {
2916                   tokoff = lp - 1 - newlb.buffer;
2917                   toklen = 1;
2918                   midtoken = TRUE;
2919                 }
2920               continue;
2921             } /* if (begtoken) */
2922         } /* if must look at token */
2923
2924
2925       /* Detect end of line, colon, comma, semicolon and various braces
2926          after having handled a token.*/
2927       switch (c)
2928         {
2929         case ':':
2930           if (definedef != dnone)
2931             break;
2932           switch (objdef)
2933             {
2934             case  otagseen:
2935               objdef = oignore;
2936               make_C_tag (TRUE); /* an Objective C class */
2937               break;
2938             case omethodtag:
2939             case omethodparm:
2940               objdef = omethodcolon;
2941               methodlen += 1;
2942               grow_linebuffer (&token_name, methodlen + 1);
2943               strcat (token_name.buffer, ":");
2944               token_name.len = methodlen;
2945               break;
2946             }
2947           if (structdef == stagseen)
2948             structdef = scolonseen;
2949           else
2950             switch (fvdef)
2951               {
2952               case fvnameseen:
2953                 if (yacc_rules)
2954                   {
2955                     make_C_tag (FALSE); /* a yacc function */
2956                     fvdef = fignore;
2957                   }
2958                 break;
2959               case fstartlist:
2960                 fvextern = FALSE;
2961                 fvdef = fvnone;
2962                 break;
2963               }
2964           break;
2965         case ';':
2966           if (definedef != dnone)
2967             break;
2968           if (cblev == 0)
2969             switch (typdef)
2970               {
2971               case tend:
2972                 make_C_tag (FALSE); /* a typedef */
2973                 /* FALLTHRU */
2974               default:
2975                 typdef = tnone;
2976               }
2977           switch (fvdef)
2978             {
2979             case fignore:
2980               break;
2981             case fvnameseen:
2982               if ((members && cblev == 1)
2983                   || (globals && cblev == 0 && (!fvextern || declarations)))
2984                 make_C_tag (FALSE); /* a variable */
2985               fvextern = FALSE;
2986               fvdef = fvnone;
2987               tok.valid = FALSE;
2988               break;
2989             case flistseen:
2990               if (declarations && (cblev == 0 || cblev == 1))
2991                 make_C_tag (TRUE); /* a function declaration */
2992               /* FALLTHRU */
2993             default:
2994               fvextern = FALSE;
2995               fvdef = fvnone;
2996               /* The following instruction invalidates the token.
2997                  Probably the token should be invalidated in all
2998                  other cases  where some state machine is reset. */
2999               tok.valid = FALSE;
3000             }
3001           if (structdef == stagseen)
3002             structdef = snone;
3003           break;
3004         case ',':
3005           if (definedef != dnone)
3006             break;
3007           switch (objdef)
3008             {
3009             case omethodtag:
3010             case omethodparm:
3011               make_C_tag (TRUE); /* an Objective C method */
3012               objdef = oinbody;
3013               break;
3014             }
3015           switch (fvdef)
3016             {
3017             case foperator:
3018             case finlist:
3019             case fignore:
3020             case vignore:
3021               break;
3022             case fvnameseen:
3023               if ((members && cblev == 1)
3024                   || (globals && cblev == 0 && (!fvextern || declarations)))
3025                 make_C_tag (FALSE); /* a variable */
3026               break;
3027             default:
3028               fvdef = fvnone;
3029             }
3030           if (structdef == stagseen)
3031             structdef = snone;
3032           break;
3033         case '[':
3034           if (definedef != dnone)
3035             break;
3036           if (cblev == 0 && typdef == tend)
3037             {
3038               typdef = tignore;
3039               make_C_tag (FALSE);       /* a typedef */
3040               break;
3041             }
3042           switch (fvdef)
3043             {
3044             case foperator:
3045             case finlist:
3046             case fignore:
3047             case vignore:
3048               break;
3049             case fvnameseen:
3050               if ((members && cblev == 1)
3051                   || (globals && cblev == 0 && (!fvextern || declarations)))
3052                 make_C_tag (FALSE); /* a variable */
3053               /* FALLTHRU */
3054             default:
3055               fvdef = fvnone;
3056             }
3057           if (structdef == stagseen)
3058             structdef = snone;
3059           break;
3060         case '(':
3061           if (definedef != dnone)
3062             break;
3063           if (objdef == otagseen && parlev == 0)
3064             objdef = oparenseen;
3065           switch (fvdef)
3066             {
3067             case fvnameseen:
3068               if (typdef == ttypeseen
3069                   && tok.valid
3070                   && *lp != '*'
3071                   && structdef != sinbody)
3072                 {
3073                   /* This handles constructs like:
3074                      typedef void OperatorFun (int fun); */
3075                   make_C_tag (FALSE);
3076                   typdef = tignore;
3077                 }
3078               /* FALLTHRU */
3079             case foperator:
3080               fvdef = fstartlist;
3081               break;
3082             case flistseen:
3083               fvdef = finlist;
3084               break;
3085             }
3086           parlev++;
3087           break;
3088         case ')':
3089           if (definedef != dnone)
3090             break;
3091           if (objdef == ocatseen && parlev == 1)
3092             {
3093               make_C_tag (TRUE); /* an Objective C category */
3094               objdef = oignore;
3095             }
3096           if (--parlev == 0)
3097             {
3098               switch (fvdef)
3099                 {
3100                 case fstartlist:
3101                 case finlist:
3102                   fvdef = flistseen;
3103                   break;
3104                 }
3105               if (cblev == 0 && (typdef == tend))
3106                 {
3107                   typdef = tignore;
3108                   make_C_tag (FALSE); /* a typedef */
3109                 }
3110             }
3111           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3112             parlev = 0;
3113           break;
3114         case '{':
3115           if (definedef != dnone)
3116             break;
3117           if (typdef == ttypeseen)
3118             typdef = tinbody;
3119           switch (structdef)
3120             {
3121             case skeyseen:      /* unnamed struct */
3122               structdef = sinbody;
3123               structtag = "_anonymous_";
3124               break;
3125             case stagseen:
3126             case scolonseen:    /* named struct */
3127               structdef = sinbody;
3128               make_C_tag (FALSE);       /* a struct */
3129               break;
3130             }
3131           switch (fvdef)
3132             {
3133             case flistseen:
3134               make_C_tag (TRUE); /* a function */
3135               /* FALLTHRU */
3136             case fignore:
3137               fvdef = fvnone;
3138               break;
3139             case fvnone:
3140               switch (objdef)
3141                 {
3142                 case otagseen:
3143                   make_C_tag (TRUE); /* an Objective C class */
3144                   objdef = oignore;
3145                   break;
3146                 case omethodtag:
3147                 case omethodparm:
3148                   make_C_tag (TRUE); /* an Objective C method */
3149                   objdef = oinbody;
3150                   break;
3151                 default:
3152                   /* Neutralize `extern "C" {' grot. */
3153                   if (cblev == 0 && structdef == snone && typdef == tnone)
3154                     cblev = -1;
3155                 }
3156             }
3157           cblev++;
3158           break;
3159         case '*':
3160           if (definedef != dnone)
3161             break;
3162           if (fvdef == fstartlist)
3163             fvdef = fvnone;     /* avoid tagging `foo' in `foo (*bar()) ()' */
3164           break;
3165         case '}':
3166           if (definedef != dnone)
3167             break;
3168           if (!noindentypedefs && lp == newlb.buffer + 1)
3169             {
3170               cblev = 0;        /* reset curly brace level if first column */
3171               parlev = 0;       /* also reset paren level, just in case... */
3172             }
3173           else if (cblev > 0)
3174             cblev--;
3175           if (cblev == 0)
3176             {
3177               if (typdef == tinbody)
3178                 typdef = tend;
3179               /* Memory leakage here: the string pointed by structtag is
3180                  never released, because I fear to miss something and
3181                  break things while freeing the area.  The amount of
3182                  memory leaked here is the sum of the lengths of the
3183                  struct tags.
3184               if (structdef == sinbody)
3185                 free (structtag); */
3186
3187               structdef = snone;
3188               structtag = "<error>";
3189             }
3190           break;
3191         case '=':
3192           if (definedef != dnone)
3193             break;
3194           switch (fvdef)
3195             {
3196             case foperator:
3197             case finlist:
3198             case fignore:
3199             case vignore:
3200               break;
3201             case fvnameseen:
3202               if ((members && cblev == 1)
3203                   || (globals && cblev == 0 && (!fvextern || declarations)))
3204                 make_C_tag (FALSE); /* a variable */
3205               /* FALLTHRU */
3206             default:
3207               fvdef = vignore;
3208             }
3209           break;
3210         case '+':
3211         case '-':
3212           if (objdef == oinbody && cblev == 0)
3213             {
3214               objdef = omethodsign;
3215               break;
3216             }
3217           /* FALLTHRU */
3218         case '#': case '~': case '&': case '%': case '/': case '|':
3219         case '^': case '!': case '<': case '>': case '.': case '?': case ']':
3220           if (definedef != dnone)
3221             break;
3222           /* These surely cannot follow a function tag in C. */
3223           switch (fvdef)
3224             {
3225             case foperator:
3226             case finlist:
3227             case fignore:
3228             case vignore:
3229               break;
3230             default:
3231               fvdef = fvnone;
3232             }
3233           break;
3234         case '\0':
3235           if (objdef == otagseen)
3236             {
3237               make_C_tag (TRUE); /* an Objective C class */
3238               objdef = oignore;
3239             }
3240           /* If a macro spans multiple lines don't reset its state. */
3241           if (quotednl)
3242             CNL_SAVE_DEFINEDEF ();
3243           else
3244             CNL ();
3245           break;
3246         } /* switch (c) */
3247
3248     } /* while not eof */
3249 }
3250
3251 /*
3252  * Process either a C++ file or a C file depending on the setting
3253  * of a global flag.
3254  */
3255 void
3256 default_C_entries (inf)
3257      FILE *inf;
3258 {
3259   C_entries (cplusplus ? C_PLPL : 0, inf);
3260 }
3261
3262 /* Always do plain ANSI C. */
3263 void
3264 plain_C_entries (inf)
3265      FILE *inf;
3266 {
3267   C_entries (0, inf);
3268 }
3269
3270 /* Always do C++. */
3271 void
3272 Cplusplus_entries (inf)
3273      FILE *inf;
3274 {
3275   C_entries (C_PLPL, inf);
3276 }
3277
3278 /* Always do Java. */
3279 void
3280 Cjava_entries (inf)
3281      FILE *inf;
3282 {
3283   C_entries (C_JAVA, inf);
3284 }
3285
3286 /* Always do C*. */
3287 void
3288 Cstar_entries (inf)
3289      FILE *inf;
3290 {
3291   C_entries (C_STAR, inf);
3292 }
3293
3294 /* Always do Yacc. */
3295 void
3296 Yacc_entries (inf)
3297      FILE *inf;
3298 {
3299   C_entries (YACC, inf);
3300 }
3301 \f
3302 /* A useful macro. */
3303 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3304   for (lineno = charno = 0;     /* loop initialization */               \
3305        !feof (file_pointer)     /* loop test */                         \
3306        && (lineno++,            /* instructions at start of loop */     \
3307            linecharno = charno,                                         \
3308            charno += readline (&line_buffer, file_pointer),             \
3309            char_pointer = lb.buffer,                                    \
3310            TRUE);                                                       \
3311       )
3312
3313
3314 /*
3315  * Read a file, but do no processing.  This is used to do regexp
3316  * matching on files that have no language defined.
3317  */
3318 void
3319 just_read_file (inf)
3320      FILE *inf;
3321 {
3322   register char *dummy;
3323
3324   LOOP_ON_INPUT_LINES (inf, lb, dummy)
3325     continue;
3326 }
3327 \f
3328 /* Fortran parsing */
3329
3330 bool
3331 tail (cp)
3332      char *cp;
3333 {
3334   register int len = 0;
3335
3336   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3337     cp++, len++;
3338   if (*cp == '\0' && !intoken (dbp[len]))
3339     {
3340       dbp += len;
3341       return TRUE;
3342     }
3343   return FALSE;
3344 }
3345
3346 void
3347 takeprec ()
3348 {
3349   dbp = skip_spaces (dbp);
3350   if (*dbp != '*')
3351     return;
3352   dbp++;
3353   dbp = skip_spaces (dbp);
3354   if (strneq (dbp, "(*)", 3))
3355     {
3356       dbp += 3;
3357       return;
3358     }
3359   if (!isdigit (*dbp))
3360     {
3361       --dbp;                    /* force failure */
3362       return;
3363     }
3364   do
3365     dbp++;
3366   while (isdigit (*dbp));
3367 }
3368
3369 void
3370 getit (inf)
3371      FILE *inf;
3372 {
3373   register char *cp;
3374
3375   dbp = skip_spaces (dbp);
3376   if (*dbp == '\0')
3377     {
3378       lineno++;
3379       linecharno = charno;
3380       charno += readline (&lb, inf);
3381       dbp = lb.buffer;
3382       if (dbp[5] != '&')
3383         return;
3384       dbp += 6;
3385       dbp = skip_spaces (dbp);
3386     }
3387   if (!isalpha (*dbp) && *dbp != '_' && *dbp != '$')
3388     return;
3389   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3390     continue;
3391   pfnote (savenstr (dbp, cp-dbp), TRUE,
3392           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3393 }
3394
3395
3396 void
3397 Fortran_functions (inf)
3398      FILE *inf;
3399 {
3400   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3401     {
3402       if (*dbp == '%')
3403         dbp++;                  /* Ratfor escape to fortran */
3404       dbp = skip_spaces (dbp);
3405       if (*dbp == '\0')
3406         continue;
3407       switch (lowcase (*dbp))
3408         {
3409         case 'i':
3410           if (tail ("integer"))
3411             takeprec ();
3412           break;
3413         case 'r':
3414           if (tail ("real"))
3415             takeprec ();
3416           break;
3417         case 'l':
3418           if (tail ("logical"))
3419             takeprec ();
3420           break;
3421         case 'c':
3422           if (tail ("complex") || tail ("character"))
3423             takeprec ();
3424           break;
3425         case 'd':
3426           if (tail ("double"))
3427             {
3428               dbp = skip_spaces (dbp);
3429               if (*dbp == '\0')
3430                 continue;
3431               if (tail ("precision"))
3432                 break;
3433               continue;
3434             }
3435           break;
3436         }
3437       dbp = skip_spaces (dbp);
3438       if (*dbp == '\0')
3439         continue;
3440       switch (lowcase (*dbp))
3441         {
3442         case 'f':
3443           if (tail ("function"))
3444             getit (inf);
3445           continue;
3446         case 's':
3447           if (tail ("subroutine"))
3448             getit (inf);
3449           continue;
3450         case 'e':
3451           if (tail ("entry"))
3452             getit (inf);
3453           continue;
3454         case 'b':
3455           if (tail ("blockdata") || tail ("block data"))
3456             {
3457               dbp = skip_spaces (dbp);
3458               if (*dbp == '\0') /* assume un-named */
3459                 pfnote (savestr ("blockdata"), TRUE,
3460                         lb.buffer, dbp - lb.buffer, lineno, linecharno);
3461               else
3462                 getit (inf);    /* look for name */
3463             }
3464           continue;
3465         }
3466     }
3467 }
3468 \f
3469 /*
3470  * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be>, 1998-04-24
3471  * Ada parsing
3472  */
3473 /* Once we are positioned after an "interesting" keyword, let's get
3474    the real tag value necessary. */
3475 void
3476 adagetit (inf, name_qualifier)
3477      FILE *inf;
3478      char *name_qualifier;
3479 {
3480   register char *cp;
3481   char *name;
3482   char c;
3483
3484   while (!feof (inf))
3485     {
3486       dbp = skip_spaces (dbp);
3487       if (*dbp == '\0'
3488           || (dbp[0] == '-' && dbp[1] == '-'))
3489         {
3490           lineno++;
3491           linecharno = charno;
3492           charno += readline (&lb, inf);
3493           dbp = lb.buffer;
3494         }
3495       switch (*dbp)
3496         {
3497         case 'b':
3498         case 'B':
3499           if (tail ("body"))
3500             {
3501               /* Skipping body of   procedure body   or   package body or ....
3502                  resetting qualifier to body instead of spec. */
3503               name_qualifier = "/b";
3504               continue;
3505             }
3506           break;
3507         case 't':
3508         case 'T':
3509           /* Skipping type of   task type   or   protected type ... */
3510           if (tail ("type"))
3511             continue;
3512           break;
3513         }
3514       if (*dbp == '"')
3515         {
3516           dbp += 1;
3517           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3518             continue;
3519         }
3520       else
3521         {
3522           dbp = skip_spaces (dbp);
3523           for (cp = dbp;
3524                (*cp != '\0'
3525                 && (isalpha (*cp) || isdigit (*cp) || *cp == '_' || *cp == '.'));
3526                cp++)
3527             continue;
3528           if (cp == dbp)
3529             return;
3530         }
3531       c = *cp;
3532       *cp = '\0';
3533       name = concat (dbp, name_qualifier, "");
3534       *cp = c;
3535       pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3536       if (c == '"')
3537         dbp = cp + 1;
3538       return;
3539     }
3540 }
3541
3542 void
3543 Ada_funcs (inf)
3544      FILE *inf;
3545 {
3546   bool inquote = FALSE;
3547
3548   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3549     {
3550       while (*dbp != '\0')
3551         {
3552           /* Skip a string i.e. "abcd". */
3553           if (inquote || (*dbp == '"'))
3554             {
3555               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3556               if (dbp != NULL)
3557                 {
3558                   inquote = FALSE;
3559                   dbp += 1;
3560                   continue;     /* advance char */
3561                 }
3562               else
3563                 {
3564                   inquote = TRUE;
3565                   break;        /* advance line */
3566                 }
3567             }
3568
3569           /* Skip comments. */
3570           if (dbp[0] == '-' && dbp[1] == '-')
3571             break;              /* advance line */
3572
3573           /* Skip character enclosed in single quote i.e. 'a'
3574              and skip single quote starting an attribute i.e. 'Image. */
3575           if (*dbp == '\'')
3576             {
3577               dbp++ ;
3578               if (*dbp != '\0')
3579                 dbp++;
3580               continue;
3581             }
3582
3583           /* Search for beginning of a token.  */
3584           if (!begtoken (*dbp))
3585             {
3586               dbp++;
3587               continue;         /* advance char */
3588             }
3589
3590           /* We are at the beginning of a token. */
3591           switch (*dbp)
3592             {
3593             case 'f':
3594             case 'F':
3595               if (!packages_only && tail ("function"))
3596                 adagetit (inf, "/f");
3597               else
3598                 break;          /* from switch */
3599               continue;         /* advance char */
3600             case 'p':
3601             case 'P':
3602               if (!packages_only && tail ("procedure"))
3603                 adagetit (inf, "/p");
3604               else if (tail ("package"))
3605                 adagetit (inf, "/s");
3606               else if (tail ("protected")) /* protected type */
3607                 adagetit (inf, "/t");
3608               else
3609                 break;          /* from switch */
3610               continue;         /* advance char */
3611             case 't':
3612             case 'T':
3613               if (!packages_only && tail ("task"))
3614                 adagetit (inf, "/k");
3615               else if (typedefs && !packages_only && tail ("type"))
3616                 {
3617                   adagetit (inf, "/t");
3618                   while (*dbp != '\0')
3619                     dbp += 1;
3620                 }
3621               else
3622                 break;          /* from switch */
3623               continue;         /* advance char */
3624             }
3625
3626           /* Look for the end of the token. */
3627           while (!endtoken (*dbp))
3628             dbp++;
3629
3630         } /* advance char */
3631     } /* advance line */
3632 }
3633 \f
3634 /*
3635  * Bob Weiner, Motorola Inc., 4/3/94
3636  * Unix and microcontroller assembly tag handling
3637  * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3638  */
3639 void
3640 Asm_labels (inf)
3641      FILE *inf;
3642 {
3643   register char *cp;
3644
3645   LOOP_ON_INPUT_LINES (inf, lb, cp)
3646     {
3647       /* If first char is alphabetic or one of [_.$], test for colon
3648          following identifier. */
3649       if (isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3650         {
3651           /* Read past label. */
3652           cp++;
3653           while (isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3654             cp++;
3655           if (*cp == ':' || isspace (*cp))
3656             {
3657               /* Found end of label, so copy it and add it to the table. */
3658               pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3659                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3660             }
3661         }
3662     }
3663 }
3664 \f
3665 /*
3666  * Perl support by Bart Robinson <lomew@cs.utah.edu>
3667  *              enhanced by Michael Ernst <mernst@alum.mit.edu>
3668  * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3669  * Perl variable names: /^(my|local).../
3670  */
3671 void
3672 Perl_functions (inf)
3673      FILE *inf;
3674 {
3675   register char *cp;
3676
3677   LOOP_ON_INPUT_LINES (inf, lb, cp)
3678     {
3679       if (*cp++ == 's'
3680           && *cp++ == 'u'
3681           && *cp++ == 'b' && isspace (*cp++))
3682         {
3683           cp = skip_spaces (cp);
3684           if (*cp != '\0')
3685             {
3686               char *sp = cp;
3687               while (*cp != '\0'
3688                      && !isspace (*cp) && *cp != '{' && *cp != '(')
3689                 cp++;
3690               pfnote (savenstr (sp, cp-sp), TRUE,
3691                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3692             }
3693         }
3694        else if (globals         /* only if tagging global vars is enabled */
3695                 && ((cp = lb.buffer,
3696                      *cp++ == 'm'
3697                      && *cp++ == 'y')
3698                     || (cp = lb.buffer,
3699                         *cp++ == 'l'
3700                         && *cp++ == 'o'
3701                         && *cp++ == 'c'
3702                         && *cp++ == 'a'
3703                         && *cp++ == 'l'))
3704                 && (*cp == '(' || isspace (*cp)))
3705         {
3706           /* After "my" or "local", but before any following paren or space. */
3707           char *varname = NULL;
3708
3709           cp = skip_spaces (cp);
3710           if (*cp == '$' || *cp == '@' || *cp == '%')
3711             {
3712               char* varstart = ++cp;
3713               while (isalnum (*cp) || *cp == '_')
3714                 cp++;
3715               varname = savenstr (varstart, cp-varstart);
3716             }
3717           else
3718             {
3719               /* Should be examining a variable list at this point;
3720                  could insist on seeing an open parenthesis. */
3721               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
3722                 cp++;
3723             }
3724
3725           /* Perhaps I should back cp up one character, so the TAGS table
3726              doesn't mention (and so depend upon) the following char. */
3727           pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
3728                   FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3729         }
3730     }
3731 }
3732 \f
3733 /*
3734  * Python support by Eric S. Raymond <esr@thyrsus.com>
3735  * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
3736  */
3737 void
3738 Python_functions (inf)
3739      FILE *inf;
3740 {
3741   register char *cp;
3742
3743   LOOP_ON_INPUT_LINES (inf, lb, cp)
3744     {
3745       if (*cp++ == 'd'
3746           && *cp++ == 'e'
3747           && *cp++ == 'f' && isspace (*cp++))
3748         {
3749           cp = skip_spaces (cp);
3750           while (*cp != '\0' && !isspace (*cp) && *cp != '(' && *cp != ':')
3751             cp++;
3752           pfnote (NULL, TRUE,
3753                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3754         }
3755
3756       cp = lb.buffer;
3757       if (*cp++ == 'c'
3758           && *cp++ == 'l'
3759           && *cp++ == 'a'
3760           && *cp++ == 's'
3761           && *cp++ == 's' && isspace (*cp++))
3762         {
3763           cp = skip_spaces (cp);
3764           while (*cp != '\0' && !isspace (*cp) && *cp != '(' && *cp != ':')
3765             cp++;
3766           pfnote (NULL, TRUE,
3767                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3768         }
3769     }
3770 }
3771 \f
3772 /* Idea by Corny de Souza
3773  * Cobol tag functions
3774  * We could look for anything that could be a paragraph name.
3775  * i.e. anything that starts in column 8 is one word and ends in a full stop.
3776  */
3777 void
3778 Cobol_paragraphs (inf)
3779      FILE *inf;
3780 {
3781   register char *bp, *ep;
3782
3783   LOOP_ON_INPUT_LINES (inf, lb, bp)
3784     {
3785       if (lb.len < 9)
3786         continue;
3787       bp += 8;
3788
3789       /* If eoln, compiler option or comment ignore whole line. */
3790       if (bp[-1] != ' ' || !isalnum (bp[0]))
3791         continue;
3792
3793       for (ep = bp; isalnum (*ep) || *ep == '-'; ep++)
3794         continue;
3795       if (*ep++ == '.')
3796         pfnote (savenstr (bp, ep-bp), TRUE,
3797                 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
3798     }
3799 }
3800 \f
3801 /* Added by Mosur Mohan, 4/22/88 */
3802 /* Pascal parsing                */
3803
3804 /*
3805  *  Locates tags for procedures & functions.  Doesn't do any type- or
3806  *  var-definitions.  It does look for the keyword "extern" or
3807  *  "forward" immediately following the procedure statement; if found,
3808  *  the tag is skipped.
3809  */
3810 void
3811 Pascal_functions (inf)
3812      FILE *inf;
3813 {
3814   linebuffer tline;             /* mostly copied from C_entries */
3815   long save_lcno;
3816   int save_lineno, save_len;
3817   char c, *cp, *namebuf;
3818
3819   bool                          /* each of these flags is TRUE iff: */
3820     incomment,                  /* point is inside a comment */
3821     inquote,                    /* point is inside '..' string */
3822     get_tagname,                /* point is after PROCEDURE/FUNCTION
3823                                    keyword, so next item = potential tag */
3824     found_tag,                  /* point is after a potential tag */
3825     inparms,                    /* point is within parameter-list */
3826     verify_tag;                 /* point has passed the parm-list, so the
3827                                    next token will determine whether this
3828                                    is a FORWARD/EXTERN to be ignored, or
3829                                    whether it is a real tag */
3830
3831   save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
3832   namebuf = NULL;               /* keep compiler quiet */
3833   lineno = 0;
3834   charno = 0;
3835   dbp = lb.buffer;
3836   *dbp = '\0';
3837   initbuffer (&tline);
3838
3839   incomment = inquote = FALSE;
3840   found_tag = FALSE;            /* have a proc name; check if extern */
3841   get_tagname = FALSE;          /* have found "procedure" keyword    */
3842   inparms = FALSE;              /* found '(' after "proc"            */
3843   verify_tag = FALSE;           /* check if "extern" is ahead        */
3844
3845
3846   while (!feof (inf))           /* long main loop to get next char */
3847     {
3848       c = *dbp++;
3849       if (c == '\0')            /* if end of line */
3850         {
3851           lineno++;
3852           linecharno = charno;
3853           charno += readline (&lb, inf);
3854           dbp = lb.buffer;
3855           if (*dbp == '\0')
3856             continue;
3857           if (!((found_tag && verify_tag)
3858                 || get_tagname))
3859             c = *dbp++;         /* only if don't need *dbp pointing
3860                                    to the beginning of the name of
3861                                    the procedure or function */
3862         }
3863       if (incomment)
3864         {
3865           if (c == '}')         /* within { } comments */
3866             incomment = FALSE;
3867           else if (c == '*' && *dbp == ')') /* within (* *) comments */
3868             {
3869               dbp++;
3870               incomment = FALSE;
3871             }
3872           continue;
3873         }
3874       else if (inquote)
3875         {
3876           if (c == '\'')
3877             inquote = FALSE;
3878           continue;
3879         }
3880       else
3881         switch (c)
3882           {
3883           case '\'':
3884             inquote = TRUE;     /* found first quote */
3885             continue;
3886           case '{':             /* found open { comment */
3887             incomment = TRUE;
3888             continue;
3889           case '(':
3890             if (*dbp == '*')    /* found open (* comment */
3891               {
3892                 incomment = TRUE;
3893                 dbp++;
3894               }
3895             else if (found_tag) /* found '(' after tag, i.e., parm-list */
3896               inparms = TRUE;
3897             continue;
3898           case ')':             /* end of parms list */
3899             if (inparms)
3900               inparms = FALSE;
3901             continue;
3902           case ';':
3903             if (found_tag && !inparms) /* end of proc or fn stmt */
3904               {
3905                 verify_tag = TRUE;
3906                 break;
3907               }
3908             continue;
3909           }
3910       if (found_tag && verify_tag && (*dbp != ' '))
3911         {
3912           /* check if this is an "extern" declaration */
3913           if (*dbp == '\0')
3914             continue;
3915           if (lowcase (*dbp == 'e'))
3916             {
3917               if (tail ("extern"))      /* superfluous, really! */
3918                 {
3919                   found_tag = FALSE;
3920                   verify_tag = FALSE;
3921                 }
3922             }
3923           else if (lowcase (*dbp) == 'f')
3924             {
3925               if (tail ("forward"))     /*  check for forward reference */
3926                 {
3927                   found_tag = FALSE;
3928                   verify_tag = FALSE;
3929                 }
3930             }
3931           if (found_tag && verify_tag) /* not external proc, so make tag */
3932             {
3933               found_tag = FALSE;
3934               verify_tag = FALSE;
3935               pfnote (namebuf, TRUE,
3936                       tline.buffer, save_len, save_lineno, save_lcno);
3937               continue;
3938             }
3939         }
3940       if (get_tagname)          /* grab name of proc or fn */
3941         {
3942           if (*dbp == '\0')
3943             continue;
3944
3945           /* save all values for later tagging */
3946           grow_linebuffer (&tline, lb.len + 1);
3947           strcpy (tline.buffer, lb.buffer);
3948           save_lineno = lineno;
3949           save_lcno = linecharno;
3950
3951           /* grab block name */
3952           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
3953             continue;
3954           namebuf = savenstr (dbp, cp-dbp);
3955           dbp = cp;             /* set dbp to e-o-token */
3956           save_len = dbp - lb.buffer + 1;
3957           get_tagname = FALSE;
3958           found_tag = TRUE;
3959           continue;
3960
3961           /* and proceed to check for "extern" */
3962         }
3963       else if (!incomment && !inquote && !found_tag)
3964         {
3965           /* check for proc/fn keywords */
3966           switch (lowcase (c))
3967             {
3968             case 'p':
3969               if (tail ("rocedure"))    /* c = 'p', dbp has advanced */
3970                 get_tagname = TRUE;
3971               continue;
3972             case 'f':
3973               if (tail ("unction"))
3974                 get_tagname = TRUE;
3975               continue;
3976             }
3977         }
3978     }                           /* while not eof */
3979
3980   free (tline.buffer);
3981 }
3982 \f
3983 /*
3984  * lisp tag functions
3985  *  look for (def or (DEF, quote or QUOTE
3986  */
3987 int
3988 L_isdef (strp)
3989      register char *strp;
3990 {
3991   return ((strp[1] == 'd' || strp[1] == 'D')
3992           && (strp[2] == 'e' || strp[2] == 'E')
3993           && (strp[3] == 'f' || strp[3] == 'F'));
3994 }
3995
3996 int
3997 L_isquote (strp)
3998      register char *strp;
3999 {
4000   return ((*++strp == 'q' || *strp == 'Q')
4001           && (*++strp == 'u' || *strp == 'U')
4002           && (*++strp == 'o' || *strp == 'O')
4003           && (*++strp == 't' || *strp == 'T')
4004           && (*++strp == 'e' || *strp == 'E')
4005           && isspace (*++strp));
4006 }
4007
4008 void
4009 L_getit ()
4010 {
4011   register char *cp;
4012
4013   if (*dbp == '\'')             /* Skip prefix quote */
4014     dbp++;
4015   else if (*dbp == '(')
4016   {
4017     if (L_isquote (dbp))
4018       dbp += 7;                 /* Skip "(quote " */
4019     else
4020       dbp += 1;                 /* Skip "(" before name in (defstruct (foo)) */
4021     dbp = skip_spaces (dbp);
4022   }
4023
4024   for (cp = dbp /*+1*/;
4025        *cp != '\0' && *cp != '(' && !isspace(*cp) && *cp != ')';
4026        cp++)
4027     continue;
4028   if (cp == dbp)
4029     return;
4030
4031   pfnote (savenstr (dbp, cp-dbp), TRUE,
4032           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4033 }
4034
4035 void
4036 Lisp_functions (inf)
4037      FILE *inf;
4038 {
4039   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4040     {
4041       if (dbp[0] == '(')
4042         {
4043           if (L_isdef (dbp))
4044             {
4045               dbp = skip_non_spaces (dbp);
4046               dbp = skip_spaces (dbp);
4047               L_getit ();
4048             }
4049           else
4050             {
4051               /* Check for (foo::defmumble name-defined ... */
4052               do
4053                 dbp++;
4054               while (*dbp != '\0' && !isspace (*dbp)
4055                      && *dbp != ':' && *dbp != '(' && *dbp != ')');
4056               if (*dbp == ':')
4057                 {
4058                   do
4059                     dbp++;
4060                   while (*dbp == ':');
4061
4062                   if (L_isdef (dbp - 1))
4063                     {
4064                       dbp = skip_non_spaces (dbp);
4065                       dbp = skip_spaces (dbp);
4066                       L_getit ();
4067                     }
4068                 }
4069             }
4070         }
4071     }
4072 }
4073 \f
4074 /*
4075  * Postscript tag functions
4076  * Just look for lines where the first character is '/'
4077  * Richard Mlynarik <mly@adoc.xerox.com>
4078  * Also look at "defineps" for PSWrap
4079  * suggested by Masatake YAMATO <masata-y@is.aist-nara.ac.jp>
4080  */
4081 void
4082 Postscript_functions (inf)
4083      FILE *inf;
4084 {
4085   register char *bp, *ep;
4086
4087   LOOP_ON_INPUT_LINES (inf, lb, bp)
4088     {
4089       if (bp[0] == '/')
4090         {
4091           for (ep = bp+1;
4092                *ep != '\0' && *ep != ' ' && *ep != '{';
4093                ep++)
4094             continue;
4095           pfnote (savenstr (bp, ep-bp), TRUE,
4096                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4097         }
4098       else if (strneq (bp, "defineps", 8))
4099         {
4100           bp = skip_non_spaces (bp);
4101           bp = skip_spaces (bp);
4102           get_tag (bp);
4103         }
4104     }
4105 }
4106
4107 \f
4108 /*
4109  * Scheme tag functions
4110  * look for (def... xyzzy
4111  * look for (def... (xyzzy
4112  * look for (def ... ((...(xyzzy ....
4113  * look for (set! xyzzy
4114  */
4115
4116 void get_scheme ();
4117
4118 void
4119 Scheme_functions (inf)
4120      FILE *inf;
4121 {
4122   register char *bp;
4123
4124   LOOP_ON_INPUT_LINES (inf, lb, bp)
4125     {
4126       if (bp[0] == '('
4127           && (bp[1] == 'D' || bp[1] == 'd')
4128           && (bp[2] == 'E' || bp[2] == 'e')
4129           && (bp[3] == 'F' || bp[3] == 'f'))
4130         {
4131           bp = skip_non_spaces (bp);
4132           /* Skip over open parens and white space */
4133           while (isspace (*bp) || *bp == '(')
4134             bp++;
4135           get_tag (bp);
4136         }
4137       if (bp[0] == '('
4138           && (bp[1] == 'S' || bp[1] == 's')
4139           && (bp[2] == 'E' || bp[2] == 'e')
4140           && (bp[3] == 'T' || bp[3] == 't')
4141           && (bp[4] == '!' || bp[4] == '!')
4142           && (isspace (bp[5])))
4143         {
4144           bp = skip_non_spaces (bp);
4145           bp = skip_spaces (bp);
4146           get_tag (bp);
4147         }
4148     }
4149 }
4150 \f
4151 /* Find tags in TeX and LaTeX input files.  */
4152
4153 /* TEX_toktab is a table of TeX control sequences that define tags.
4154    Each TEX_tabent records one such control sequence.
4155    CONVERT THIS TO USE THE Stab TYPE!! */
4156 struct TEX_tabent
4157 {
4158   char *name;
4159   int len;
4160 };
4161
4162 struct TEX_tabent *TEX_toktab = NULL;   /* Table with tag tokens */
4163
4164 /* Default set of control sequences to put into TEX_toktab.
4165    The value of environment var TEXTAGS is prepended to this.  */
4166
4167 char *TEX_defenv = "\
4168 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4169 :part:appendix:entry:index";
4170
4171 void TEX_mode ();
4172 struct TEX_tabent *TEX_decode_env ();
4173 int TEX_Token ();
4174
4175 char TEX_esc = '\\';
4176 char TEX_opgrp = '{';
4177 char TEX_clgrp = '}';
4178
4179 /*
4180  * TeX/LaTeX scanning loop.
4181  */
4182 void
4183 TeX_functions (inf)
4184      FILE *inf;
4185 {
4186   char *cp, *lasthit;
4187   register int i;
4188
4189   /* Select either \ or ! as escape character.  */
4190   TEX_mode (inf);
4191
4192   /* Initialize token table once from environment. */
4193   if (!TEX_toktab)
4194     TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4195
4196   LOOP_ON_INPUT_LINES (inf, lb, cp)
4197     {
4198       lasthit = cp;
4199       /* Look at each esc in line. */
4200       while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4201         {
4202           if (*++cp == '\0')
4203             break;
4204           linecharno += cp - lasthit;
4205           lasthit = cp;
4206           i = TEX_Token (lasthit);
4207           if (i >= 0)
4208             {
4209               /* We seem to include the TeX command in the tag name.
4210               register char *p;
4211               for (p = lasthit + TEX_toktab[i].len;
4212                    *p != '\0' && *p != TEX_clgrp;
4213                    p++)
4214                 continue; */
4215               pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4216                       lb.buffer, lb.len, lineno, linecharno);
4217               break;            /* We only tag a line once */
4218             }
4219         }
4220     }
4221 }
4222
4223 #define TEX_LESC '\\'
4224 #define TEX_SESC '!'
4225 #define TEX_cmt  '%'
4226
4227 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4228    chars accordingly. */
4229 void
4230 TEX_mode (inf)
4231      FILE *inf;
4232 {
4233   int c;
4234
4235   while ((c = getc (inf)) != EOF)
4236     {
4237       /* Skip to next line if we hit the TeX comment char. */
4238       if (c == TEX_cmt)
4239         while (c != '\n')
4240           c = getc (inf);
4241       else if (c == TEX_LESC || c == TEX_SESC )
4242         break;
4243     }
4244
4245   if (c == TEX_LESC)
4246     {
4247       TEX_esc = TEX_LESC;
4248       TEX_opgrp = '{';
4249       TEX_clgrp = '}';
4250     }
4251   else
4252     {
4253       TEX_esc = TEX_SESC;
4254       TEX_opgrp = '<';
4255       TEX_clgrp = '>';
4256     }
4257   /* If the input file is compressed, inf is a pipe, and rewind may fail.
4258      No attempt is made to correct the situation. */
4259   rewind (inf);
4260 }
4261
4262 /* Read environment and prepend it to the default string.
4263    Build token table. */
4264 struct TEX_tabent *
4265 TEX_decode_env (evarname, defenv)
4266      char *evarname;
4267      char *defenv;
4268 {
4269   register char *env, *p;
4270
4271   struct TEX_tabent *tab;
4272   int size, i;
4273
4274   /* Append default string to environment. */
4275   env = getenv (evarname);
4276   if (!env)
4277     env = defenv;
4278   else
4279     {
4280       char *oldenv = env;
4281       env = concat (oldenv, defenv, "");
4282     }
4283
4284   /* Allocate a token table */
4285   for (size = 1, p = env; p;)
4286     if ((p = etags_strchr (p, ':')) && *++p != '\0')
4287       size++;
4288   /* Add 1 to leave room for null terminator.  */
4289   tab = xnew (size + 1, struct TEX_tabent);
4290
4291   /* Unpack environment string into token table. Be careful about */
4292   /* zero-length strings (leading ':', "::" and trailing ':') */
4293   for (i = 0; *env;)
4294     {
4295       p = etags_strchr (env, ':');
4296       if (!p)                   /* End of environment string. */
4297         p = env + strlen (env);
4298       if (p - env > 0)
4299         {                       /* Only non-zero strings. */
4300           tab[i].name = savenstr (env, p - env);
4301           tab[i].len = strlen (tab[i].name);
4302           i++;
4303         }
4304       if (*p)
4305         env = p + 1;
4306       else
4307         {
4308           tab[i].name = NULL;   /* Mark end of table. */
4309           tab[i].len = 0;
4310           break;
4311         }
4312     }
4313   return tab;
4314 }
4315
4316 /* If the text at CP matches one of the tag-defining TeX command names,
4317    return the pointer to the first occurrence of that command in TEX_toktab.
4318    Otherwise return -1.
4319    Keep the capital `T' in `token' for dumb truncating compilers
4320    (this distinguishes it from `TEX_toktab' */
4321 int
4322 TEX_Token (cp)
4323      char *cp;
4324 {
4325   int i;
4326
4327   for (i = 0; TEX_toktab[i].len > 0; i++)
4328     if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4329       return i;
4330   return -1;
4331 }
4332 \f
4333 /*
4334  * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4335  *
4336  * Assumes that the predicate starts at column 0.
4337  * Only the first clause of a predicate is added.
4338  */
4339 int prolog_pred ();
4340 void prolog_skip_comment ();
4341 int prolog_atom ();
4342
4343 void
4344 Prolog_functions (inf)
4345      FILE *inf;
4346 {
4347   char *cp, *last;
4348   int len;
4349   int allocated;
4350
4351   allocated = 0;
4352   len = 0;
4353   last = NULL;
4354
4355   LOOP_ON_INPUT_LINES (inf, lb, cp)
4356     {
4357       if (cp[0] == '\0')        /* Empty line */
4358         continue;
4359       else if (isspace (cp[0])) /* Not a predicate */
4360         continue;
4361       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
4362         prolog_skip_comment (&lb, inf);
4363       else if ((len = prolog_pred (cp, last)) > 0)
4364         {
4365           /* Predicate.  Store the function name so that we only
4366              generate a tag for the first clause.  */
4367           if (last == NULL)
4368             last = xnew(len + 1, char);
4369           else if (len + 1 > allocated)
4370             last = xrnew (last, len + 1, char);
4371           allocated = len + 1;
4372           strncpy (last, cp, len);
4373           last[len] = '\0';
4374         }
4375     }
4376 }
4377
4378
4379 void
4380 prolog_skip_comment (plb, inf)
4381      linebuffer *plb;
4382      FILE *inf;
4383 {
4384   char *cp;
4385
4386   do
4387     {
4388       for (cp = plb->buffer; *cp != '\0'; cp++)
4389         if (cp[0] == '*' && cp[1] == '/')
4390           return;
4391       lineno++;
4392       linecharno += readline (plb, inf);
4393     }
4394   while (!feof(inf));
4395 }
4396
4397 /*
4398  * A predicate definition is added if it matches:
4399  *     <beginning of line><Prolog Atom><whitespace>(
4400  *
4401  * It is added to the tags database if it doesn't match the
4402  * name of the previous clause header.
4403  *
4404  * Return the size of the name of the predicate, or 0 if no header
4405  * was found.
4406  */
4407 int
4408 prolog_pred (s, last)
4409      char *s;
4410      char *last;                /* Name of last clause. */
4411 {
4412   int pos;
4413   int len;
4414
4415   pos = prolog_atom (s, 0);
4416   if (pos < 1)
4417     return 0;
4418
4419   len = pos;
4420   pos = skip_spaces (s + pos) - s;
4421
4422   if ((s[pos] == '(') || (s[pos] == '.'))
4423     {
4424       if (s[pos] == '(')
4425         pos++;
4426
4427       /* Save only the first clause. */
4428       if (last == NULL
4429           || len != (int)strlen (last)
4430           || !strneq (s, last, len))
4431         {
4432           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4433           return len;
4434         }
4435     }
4436   return 0;
4437 }
4438
4439 /*
4440  * Consume a Prolog atom.
4441  * Return the number of bytes consumed, or -1 if there was an error.
4442  *
4443  * A prolog atom, in this context, could be one of:
4444  * - An alphanumeric sequence, starting with a lower case letter.
4445  * - A quoted arbitrary string. Single quotes can escape themselves.
4446  *   Backslash quotes everything.
4447  */
4448 int
4449 prolog_atom (s, pos)
4450      char *s;
4451      int pos;
4452 {
4453   int origpos;
4454
4455   origpos = pos;
4456
4457   if (islower(s[pos]) || (s[pos] == '_'))
4458     {
4459       /* The atom is unquoted. */
4460       pos++;
4461       while (isalnum(s[pos]) || (s[pos] == '_'))
4462         {
4463           pos++;
4464         }
4465       return pos - origpos;
4466     }
4467   else if (s[pos] == '\'')
4468     {
4469       pos++;
4470
4471       while (1)
4472         {
4473           if (s[pos] == '\'')
4474             {
4475               pos++;
4476               if (s[pos] != '\'')
4477                 break;
4478               pos++;            /* A double quote */
4479             }
4480           else if (s[pos] == '\0')
4481             /* Multiline quoted atoms are ignored. */
4482             return -1;
4483           else if (s[pos] == '\\')
4484             {
4485               if (s[pos+1] == '\0')
4486                 return -1;
4487               pos += 2;
4488             }
4489           else
4490             pos++;
4491         }
4492       return pos - origpos;
4493     }
4494   else
4495     return -1;
4496 }
4497 \f
4498 /*
4499  * Support for Erlang  --  Anders Lindgren, Feb 1996.
4500  *
4501  * Generates tags for functions, defines, and records.
4502  *
4503  * Assumes that Erlang functions start at column 0.
4504  */
4505 int erlang_func ();
4506 void erlang_attribute ();
4507 int erlang_atom ();
4508
4509 void
4510 Erlang_functions (inf)
4511      FILE *inf;
4512 {
4513   char *cp, *last;
4514   int len;
4515   int allocated;
4516
4517   allocated = 0;
4518   len = 0;
4519   last = NULL;
4520
4521   LOOP_ON_INPUT_LINES (inf, lb, cp)
4522     {
4523       if (cp[0] == '\0')        /* Empty line */
4524         continue;
4525       else if (isspace (cp[0])) /* Not function nor attribute */
4526         continue;
4527       else if (cp[0] == '%')    /* comment */
4528         continue;
4529       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
4530         continue;
4531       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
4532         {
4533           erlang_attribute (cp);
4534           last = NULL;
4535         }
4536       else if ((len = erlang_func (cp, last)) > 0)
4537         {
4538           /*
4539            * Function.  Store the function name so that we only
4540            * generates a tag for the first clause.
4541            */
4542           if (last == NULL)
4543             last = xnew (len + 1, char);
4544           else if (len + 1 > allocated)
4545             last = xrnew (last, len + 1, char);
4546           allocated = len + 1;
4547           strncpy (last, cp, len);
4548           last[len] = '\0';
4549         }
4550     }
4551 }
4552
4553
4554 /*
4555  * A function definition is added if it matches:
4556  *     <beginning of line><Erlang Atom><whitespace>(
4557  *
4558  * It is added to the tags database if it doesn't match the
4559  * name of the previous clause header.
4560  *
4561  * Return the size of the name of the function, or 0 if no function
4562  * was found.
4563  */
4564 int
4565 erlang_func (s, last)
4566      char *s;
4567      char *last;                /* Name of last clause. */
4568 {
4569   int pos;
4570   int len;
4571
4572   pos = erlang_atom (s, 0);
4573   if (pos < 1)
4574     return 0;
4575
4576   len = pos;
4577   pos = skip_spaces (s + pos) - s;
4578
4579   /* Save only the first clause. */
4580   if (s[pos++] == '('
4581       && (last == NULL
4582           || len != (int)strlen (last)
4583           || !strneq (s, last, len)))
4584         {
4585           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4586           return len;
4587         }
4588
4589   return 0;
4590 }
4591
4592
4593 /*
4594  * Handle attributes.  Currently, tags are generated for defines
4595  * and records.
4596  *
4597  * They are on the form:
4598  * -define(foo, bar).
4599  * -define(Foo(M, N), M+N).
4600  * -record(graph, {vtab = notable, cyclic = true}).
4601  */
4602 void
4603 erlang_attribute (s)
4604      char *s;
4605 {
4606   int pos;
4607   int len;
4608
4609   if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4610     {
4611       pos = skip_spaces (s + 7) - s;
4612       if (s[pos++] == '(')
4613         {
4614           pos = skip_spaces (s + pos) - s;
4615           len = erlang_atom (s, pos);
4616           if (len != 0)
4617             pfnote (savenstr (& s[pos], len), TRUE,
4618                     s, pos + len, lineno, linecharno);
4619         }
4620     }
4621   return;
4622 }
4623
4624
4625 /*
4626  * Consume an Erlang atom (or variable).
4627  * Return the number of bytes consumed, or -1 if there was an error.
4628  */
4629 int
4630 erlang_atom (s, pos)
4631      char *s;
4632      int pos;
4633 {
4634   int origpos;
4635
4636   origpos = pos;
4637
4638   if (isalpha (s[pos]) || s[pos] == '_')
4639     {
4640       /* The atom is unquoted. */
4641       pos++;
4642       while (isalnum (s[pos]) || s[pos] == '_')
4643         pos++;
4644       return pos - origpos;
4645     }
4646   else if (s[pos] == '\'')
4647     {
4648       pos++;
4649
4650       while (1)
4651         {
4652           if (s[pos] == '\'')
4653             {
4654               pos++;
4655               break;
4656             }
4657           else if (s[pos] == '\0')
4658             /* Multiline quoted atoms are ignored. */
4659             return -1;
4660           else if (s[pos] == '\\')
4661             {
4662               if (s[pos+1] == '\0')
4663                 return -1;
4664               pos += 2;
4665             }
4666           else
4667             pos++;
4668         }
4669       return pos - origpos;
4670     }
4671   else
4672     return -1;
4673 }
4674 \f
4675 #ifdef ETAGS_REGEXPS
4676
4677 /* Take a string like "/blah/" and turn it into "blah", making sure
4678    that the first and last characters are the same, and handling
4679    quoted separator characters.  Actually, stops on the occurrence of
4680    an unquoted separator.  Also turns "\t" into a Tab character.
4681    Returns pointer to terminating separator.  Works in place.  Null
4682    terminates name string. */
4683 char *
4684 scan_separators (name)
4685      char *name;
4686 {
4687   char sep = name[0];
4688   char *copyto = name;
4689   bool quoted = FALSE;
4690
4691   for (++name; *name != '\0'; ++name)
4692     {
4693       if (quoted)
4694         {
4695           if (*name == 't')
4696             *copyto++ = '\t';
4697           else if (*name == sep)
4698             *copyto++ = sep;
4699           else
4700             {
4701               /* Something else is quoted, so preserve the quote. */
4702               *copyto++ = '\\';
4703               *copyto++ = *name;
4704             }
4705           quoted = FALSE;
4706         }
4707       else if (*name == '\\')
4708         quoted = TRUE;
4709       else if (*name == sep)
4710         break;
4711       else
4712         *copyto++ = *name;
4713     }
4714
4715   /* Terminate copied string. */
4716   *copyto = '\0';
4717   return name;
4718 }
4719
4720 /* Look at the argument of --regex or --no-regex and do the right
4721    thing.  Same for each line of a regexp file. */
4722 void
4723 analyse_regex (regex_arg, ignore_case)
4724      char *regex_arg;
4725      bool ignore_case;
4726 {
4727   if (regex_arg == NULL)
4728     free_patterns ();           /* --no-regex: remove existing regexps */
4729
4730   /* A real --regexp option or a line in a regexp file. */
4731   switch (regex_arg[0])
4732     {
4733       /* Comments in regexp file or null arg to --regex. */
4734     case '\0':
4735     case ' ':
4736     case '\t':
4737       break;
4738
4739       /* Read a regex file.  This is recursive and may result in a
4740          loop, which will stop when the file descriptors are exhausted. */
4741     case '@':
4742       {
4743         FILE *regexfp;
4744         linebuffer regexbuf;
4745         char *regexfile = regex_arg + 1;
4746
4747         /* regexfile is a file containing regexps, one per line. */
4748         regexfp = fopen (regexfile, "r");
4749         if (regexfp == NULL)
4750           {
4751             pfatal (regexfile);
4752             return;
4753           }
4754         initbuffer (&regexbuf);
4755         while (readline_internal (&regexbuf, regexfp) > 0)
4756           analyse_regex (regexbuf.buffer, ignore_case);
4757         free (regexbuf.buffer);
4758         fclose (regexfp);
4759       }
4760       break;
4761
4762       /* Regexp to be used for a specific language only. */
4763     case '{':
4764       {
4765         language *lang;
4766         char *lang_name = regex_arg + 1;
4767         char *cp;
4768
4769         for (cp = lang_name; *cp != '}'; cp++)
4770           if (*cp == '\0')
4771             {
4772               error ("unterminated language name in regex: %s", regex_arg);
4773               return;
4774             }
4775         *cp = '\0';
4776         lang = get_language_from_name (lang_name);
4777         if (lang == NULL)
4778           return;
4779         add_regex (cp + 1, ignore_case, lang);
4780       }
4781       break;
4782
4783       /* Regexp to be used for any language. */
4784     default:
4785       add_regex (regex_arg, ignore_case, NULL);
4786       break;
4787     }
4788 }
4789
4790 /* Turn a name, which is an ed-style (but Emacs syntax) regular
4791    expression, into a real regular expression by compiling it. */
4792 void
4793 add_regex (regexp_pattern, ignore_case, lang)
4794      char *regexp_pattern;
4795      bool ignore_case;
4796      language *lang;
4797 {
4798   char *name;
4799   const char *err;
4800   struct re_pattern_buffer *patbuf;
4801   pattern *pp;
4802
4803
4804   if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
4805     {
4806       error ("%s: unterminated regexp", regexp_pattern);
4807       return;
4808     }
4809   name = scan_separators (regexp_pattern);
4810   if (regexp_pattern[0] == '\0')
4811     {
4812       error ("null regexp", (char *)NULL);
4813       return;
4814     }
4815   (void) scan_separators (name);
4816
4817   patbuf = xnew (1, struct re_pattern_buffer);
4818   /* Translation table to fold case if appropriate. */
4819   patbuf->translate = (ignore_case) ? lc_trans : NULL;
4820   patbuf->fastmap = NULL;
4821   patbuf->buffer = NULL;
4822   patbuf->allocated = 0;
4823
4824   err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
4825   if (err != NULL)
4826     {
4827       error ("%s while compiling pattern", err);
4828       return;
4829     }
4830
4831   pp = p_head;
4832   p_head = xnew (1, pattern);
4833   p_head->regex = savestr (regexp_pattern);
4834   p_head->p_next = pp;
4835   p_head->language = lang;
4836   p_head->pattern = patbuf;
4837   p_head->name_pattern = savestr (name);
4838   p_head->error_signaled = FALSE;
4839 }
4840
4841 /*
4842  * Do the substitutions indicated by the regular expression and
4843  * arguments.
4844  */
4845 char *
4846 substitute (in, out, regs)
4847      char *in, *out;
4848      struct re_registers *regs;
4849 {
4850   char *result, *t;
4851   int size, dig, diglen;
4852
4853   result = NULL;
4854   size = strlen (out);
4855
4856   /* Pass 1: figure out how much to allocate by finding all \N strings. */
4857   if (out[size - 1] == '\\')
4858     fatal ("pattern error in \"%s\"", out);
4859   for (t = etags_strchr (out, '\\');
4860        t != NULL;
4861        t = etags_strchr (t + 2, '\\'))
4862     if (isdigit (t[1]))
4863       {
4864         dig = t[1] - '0';
4865         diglen = regs->end[dig] - regs->start[dig];
4866         size += diglen - 2;
4867       }
4868     else
4869       size -= 1;
4870
4871   /* Allocate space and do the substitutions. */
4872   result = xnew (size + 1, char);
4873
4874   for (t = result; *out != '\0'; out++)
4875     if (*out == '\\' && isdigit (*++out))
4876       {
4877         /* Using "dig2" satisfies my debugger.  Bleah. */
4878         dig = *out - '0';
4879         diglen = regs->end[dig] - regs->start[dig];
4880         strncpy (t, in + regs->start[dig], diglen);
4881         t += diglen;
4882       }
4883     else
4884       *t++ = *out;
4885   *t = '\0';
4886
4887   if (DEBUG && (t > result + size || t - result != (int)strlen (result)))
4888     abort ();
4889
4890   return result;
4891 }
4892
4893 /* Deallocate all patterns. */
4894 void
4895 free_patterns ()
4896 {
4897   pattern *pp;
4898   while (p_head != NULL)
4899     {
4900       pp = p_head->p_next;
4901       free (p_head->regex);
4902       free (p_head->name_pattern);
4903       free (p_head);
4904       p_head = pp;
4905     }
4906   return;
4907 }
4908 \f
4909 void
4910 get_tag (bp)
4911      register char *bp;
4912 {
4913   register char *cp;
4914
4915   if (*bp == '\0')
4916     return;
4917   /* Go till you get to white space or a syntactic break */
4918   for (cp = bp + 1;
4919        *cp != '\0' && *cp != '(' && *cp != ')' && !isspace (*cp);
4920        cp++)
4921     continue;
4922   pfnote (savenstr (bp, cp-bp), TRUE,
4923           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4924 }
4925
4926 #endif /* ETAGS_REGEXPS */
4927 /* Initialize a linebuffer for use */
4928 void
4929 initbuffer (lbp)
4930      linebuffer *lbp;
4931 {
4932   lbp->size = 200;
4933   lbp->buffer = xnew (200, char);
4934 }
4935
4936 /*
4937  * Read a line of text from `stream' into `lbp', excluding the
4938  * newline or CR-NL, if any.  Return the number of characters read from
4939  * `stream', which is the length of the line including the newline.
4940  *
4941  * On DOS or Windows we do not count the CR character, if any, before the
4942  * NL, in the returned length; this mirrors the behavior of emacs on those
4943  * platforms (for text files, it translates CR-NL to NL as it reads in the
4944  * file).
4945  */
4946 long
4947 readline_internal (lbp, stream)
4948      linebuffer *lbp;
4949      register FILE *stream;
4950 {
4951   char *buffer = lbp->buffer;
4952   register char *p = lbp->buffer;
4953   register char *pend;
4954   int chars_deleted;
4955
4956   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
4957
4958   while (1)
4959     {
4960       register int c = getc (stream);
4961       if (p == pend)
4962         {
4963           /* We're at the end of linebuffer: expand it. */
4964           lbp->size *= 2;
4965           buffer = xrnew (buffer, lbp->size, char);
4966           p += buffer - lbp->buffer;
4967           pend = buffer + lbp->size;
4968           lbp->buffer = buffer;
4969         }
4970       if (c == EOF)
4971         {
4972           *p = '\0';
4973           chars_deleted = 0;
4974           break;
4975         }
4976       if (c == '\n')
4977         {
4978           if (p > buffer && p[-1] == '\r')
4979             {
4980               p -= 1;
4981 #ifdef DOS_NT
4982              /* Assume CRLF->LF translation will be performed by Emacs
4983                 when loading this file, so CRs won't appear in the buffer.
4984                 It would be cleaner to compensate within Emacs;
4985                 however, Emacs does not know how many CRs were deleted
4986                 before any given point in the file.  */
4987               chars_deleted = 1;
4988 #else
4989               chars_deleted = 2;
4990 #endif
4991             }
4992           else
4993             {
4994               chars_deleted = 1;
4995             }
4996           *p = '\0';
4997           break;
4998         }
4999       *p++ = c;
5000     }
5001   lbp->len = p - buffer;
5002
5003   return lbp->len + chars_deleted;
5004 }
5005
5006 /*
5007  * Like readline_internal, above, but in addition try to match the
5008  * input line against relevant regular expressions.
5009  */
5010 long
5011 readline (lbp, stream)
5012      linebuffer *lbp;
5013      FILE *stream;
5014 {
5015   /* Read new line. */
5016   long result = readline_internal (lbp, stream);
5017 #ifdef ETAGS_REGEXPS
5018   int match;
5019   pattern *pp;
5020
5021   /* Match against relevant patterns. */
5022   if (lbp->len > 0)
5023     for (pp = p_head; pp != NULL; pp = pp->p_next)
5024       {
5025         /* Only use generic regexps or those for the current language. */
5026         if (pp->language != NULL && pp->language != curlang)
5027           continue;
5028
5029         match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5030         switch (match)
5031           {
5032           case -2:
5033             /* Some error. */
5034             if (!pp->error_signaled)
5035               {
5036                 error ("error while matching \"%s\"", pp->regex);
5037                 pp->error_signaled = TRUE;
5038               }
5039             break;
5040           case -1:
5041             /* No match. */
5042             break;
5043           default:
5044             /* Match occurred.  Construct a tag. */
5045             if (pp->name_pattern[0] != '\0')
5046               {
5047                 /* Make a named tag. */
5048                 char *name = substitute (lbp->buffer,
5049                                          pp->name_pattern, &pp->regs);
5050                 if (name != NULL)
5051                   pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5052               }
5053             else
5054               {
5055                 /* Make an unnamed tag. */
5056                 pfnote ((char *)NULL, TRUE,
5057                         lbp->buffer, match, lineno, linecharno);
5058               }
5059             break;
5060           }
5061       }
5062 #endif /* ETAGS_REGEXPS */
5063
5064   return result;
5065 }
5066 \f
5067 /*
5068  * Return a pointer to a space of size strlen(cp)+1 allocated
5069  * with xnew where the string CP has been copied.
5070  */
5071 char *
5072 savestr (cp)
5073      char *cp;
5074 {
5075   return savenstr (cp, strlen (cp));
5076 }
5077
5078 /*
5079  * Return a pointer to a space of size LEN+1 allocated with xnew where
5080  * the string CP has been copied for at most the first LEN characters.
5081  */
5082 char *
5083 savenstr (cp, len)
5084      char *cp;
5085      int len;
5086 {
5087   register char *dp;
5088
5089   dp = xnew (len + 1, char);
5090   strncpy (dp, cp, len);
5091   dp[len] = '\0';
5092   return dp;
5093 }
5094
5095 /*
5096  * Return the ptr in sp at which the character c last
5097  * appears; NULL if not found
5098  *
5099  * Identical to System V strrchr, included for portability.
5100  */
5101 char *
5102 etags_strrchr (sp, c)
5103      register char *sp, c;
5104 {
5105   register char *r;
5106
5107   r = NULL;
5108   do
5109     {
5110       if (*sp == c)
5111         r = sp;
5112   } while (*sp++);
5113   return r;
5114 }
5115
5116
5117 /*
5118  * Return the ptr in sp at which the character c first
5119  * appears; NULL if not found
5120  *
5121  * Identical to System V strchr, included for portability.
5122  */
5123 char *
5124 etags_strchr (sp, c)
5125      register char *sp, c;
5126 {
5127   do
5128     {
5129       if (*sp == c)
5130         return sp;
5131     } while (*sp++);
5132   return NULL;
5133 }
5134
5135 /* Skip spaces, return new pointer. */
5136 char *
5137 skip_spaces (cp)
5138      char *cp;
5139 {
5140   while (isspace (*cp))         /* isspace('\0')==FALSE */
5141     cp++;
5142   return cp;
5143 }
5144
5145 /* Skip non spaces, return new pointer. */
5146 char *
5147 skip_non_spaces (cp)
5148      char *cp;
5149 {
5150   while (!iswhite (*cp))        /* iswhite('\0')==TRUE */
5151     cp++;
5152   return cp;
5153 }
5154
5155 /* Print error message and exit.  */
5156 void
5157 fatal (s1, s2)
5158      char *s1, *s2;
5159 {
5160   error (s1, s2);
5161   exit (BAD);
5162 }
5163
5164 void
5165 pfatal (s1)
5166      char *s1;
5167 {
5168   perror (s1);
5169   exit (BAD);
5170 }
5171
5172 void
5173 suggest_asking_for_help ()
5174 {
5175   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5176            progname,
5177 #ifdef LONG_OPTIONS
5178            "--help"
5179 #else
5180            "-h"
5181 #endif
5182            );
5183   exit (BAD);
5184 }
5185
5186 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
5187 void
5188 error (s1, s2)
5189      char *s1, *s2;
5190 {
5191   fprintf (stderr, "%s: ", progname);
5192   fprintf (stderr, s1, s2);
5193   fprintf (stderr, "\n");
5194 }
5195
5196 /* Return a newly-allocated string whose contents
5197    concatenate those of s1, s2, s3.  */
5198 char *
5199 concat (s1, s2, s3)
5200      char *s1, *s2, *s3;
5201 {
5202   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5203   char *result = xnew (len1 + len2 + len3 + 1, char);
5204
5205   strcpy (result, s1);
5206   strcpy (result + len1, s2);
5207   strcpy (result + len1 + len2, s3);
5208   result[len1 + len2 + len3] = '\0';
5209
5210   return result;
5211 }
5212 \f
5213 /* Does the same work as the system V getcwd, but does not need to
5214    guess the buffer size in advance. */
5215 char *
5216 etags_getcwd ()
5217 {
5218 #ifdef HAVE_GETCWD
5219   int bufsize = 200;
5220   char *path = xnew (bufsize, char);
5221
5222   while (getcwd (path, bufsize) == NULL)
5223     {
5224       if (errno != ERANGE)
5225         pfatal ("getcwd");
5226       bufsize *= 2;
5227       free (path);
5228       path = xnew (bufsize, char);
5229     }
5230
5231   canonicalize_filename (path);
5232   return path;
5233
5234 #else /* not HAVE_GETCWD */
5235 #ifdef MSDOS
5236   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
5237
5238   getwd (path);
5239
5240   for (p = path; *p != '\0'; p++)
5241     if (*p == '\\')
5242       *p = '/';
5243     else
5244       *p = lowcase (*p);
5245
5246   return strdup (path);
5247 #else /* not MSDOS */
5248   linebuffer path;
5249   FILE *pipe;
5250
5251   initbuffer (&path);
5252   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5253   if (pipe == NULL || readline_internal (&path, pipe) == 0)
5254     pfatal ("pwd");
5255   pclose (pipe);
5256
5257   return path.buffer;
5258 #endif /* not MSDOS */
5259 #endif /* not HAVE_GETCWD */
5260 }
5261
5262 /* Return a newly allocated string containing the file name of FILE
5263    relative to the absolute directory DIR (which should end with a slash). */
5264 char *
5265 relative_filename (file, dir)
5266      char *file, *dir;
5267 {
5268   char *fp, *dp, *afn, *res;
5269   int i;
5270
5271   /* Find the common root of file and dir (with a trailing slash). */
5272   afn = absolute_filename (file, cwd);
5273   fp = afn;
5274   dp = dir;
5275   while (*fp++ == *dp++)
5276     continue;
5277   fp--, dp--;                   /* back to the first differing char */
5278 #ifdef DOS_NT
5279   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5280     return afn;
5281 #endif
5282   do                            /* look at the equal chars until '/' */
5283     fp--, dp--;
5284   while (*fp != '/');
5285
5286   /* Build a sequence of "../" strings for the resulting relative file name. */
5287   i = 0;
5288   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5289     i += 1;
5290   res = xnew (3*i + strlen (fp + 1) + 1, char);
5291   res[0] = '\0';
5292   while (i-- > 0)
5293     strcat (res, "../");
5294
5295   /* Add the file name relative to the common root of file and dir. */
5296   strcat (res, fp + 1);
5297   free (afn);
5298
5299   return res;
5300 }
5301
5302 /* Return a newly allocated string containing the absolute file name
5303    of FILE given DIR (which should end with a slash). */
5304 char *
5305 absolute_filename (file, dir)
5306      char *file, *dir;
5307 {
5308   char *slashp, *cp, *res;
5309
5310   if (filename_is_absolute (file))
5311     res = savestr (file);
5312 #ifdef DOS_NT
5313   /* We don't support non-absolute file names with a drive
5314      letter, like `d:NAME' (it's too much hassle).  */
5315   else if (file[1] == ':')
5316     fatal ("%s: relative file names with drive letters not supported", file);
5317 #endif
5318   else
5319     res = concat (dir, file, "");
5320
5321   /* Delete the "/dirname/.." and "/." substrings. */
5322   slashp = etags_strchr (res, '/');
5323   while (slashp != NULL && slashp[0] != '\0')
5324     {
5325       if (slashp[1] == '.')
5326         {
5327           if (slashp[2] == '.'
5328               && (slashp[3] == '/' || slashp[3] == '\0'))
5329             {
5330               cp = slashp;
5331               do
5332                 cp--;
5333               while (cp >= res && !filename_is_absolute (cp));
5334               if (cp < res)
5335                 cp = slashp;    /* the absolute name begins with "/.." */
5336 #ifdef DOS_NT
5337               /* Under MSDOS and NT we get `d:/NAME' as absolute
5338                  file name, so the luser could say `d:/../NAME'.
5339                  We silently treat this as `d:/NAME'.  */
5340               else if (cp[0] != '/')
5341                 cp = slashp;
5342 #endif
5343               strcpy (cp, slashp + 3);
5344               slashp = cp;
5345               continue;
5346             }
5347           else if (slashp[2] == '/' || slashp[2] == '\0')
5348             {
5349               strcpy (slashp, slashp + 2);
5350               continue;
5351             }
5352         }
5353
5354       slashp = etags_strchr (slashp + 1, '/');
5355     }
5356
5357   if (res[0] == '\0')
5358     return savestr ("/");
5359   else
5360     return res;
5361 }
5362
5363 /* Return a newly allocated string containing the absolute
5364    file name of dir where FILE resides given DIR (which should
5365    end with a slash). */
5366 char *
5367 absolute_dirname (file, dir)
5368      char *file, *dir;
5369 {
5370   char *slashp, *res;
5371   char save;
5372
5373   canonicalize_filename (file);
5374   slashp = etags_strrchr (file, '/');
5375   if (slashp == NULL)
5376     return savestr (dir);
5377   save = slashp[1];
5378   slashp[1] = '\0';
5379   res = absolute_filename (file, dir);
5380   slashp[1] = save;
5381
5382   return res;
5383 }
5384
5385 /* Whether the argument string is an absolute file name.  The argument
5386    string must have been canonicalized with canonicalize_filename. */
5387 bool
5388 filename_is_absolute (fn)
5389      char *fn;
5390 {
5391   return (fn[0] == '/'
5392 #ifdef DOS_NT
5393           || (isalpha(fn[0]) && fn[1] == ':' && fn[2] == '/')
5394 #endif
5395           );
5396 }
5397
5398 /* Translate backslashes into slashes.  Works in place. */
5399 void
5400 canonicalize_filename (fn)
5401      register char *fn;
5402 {
5403 #ifdef DOS_NT
5404   /* Convert backslashes to slashes.  */
5405   for (; *fn != '\0'; fn++)
5406     if (*fn == '\\')
5407       *fn = '/';
5408   /* Canonicalize drive letter case.  */
5409   if (islower (path[0]))
5410     path[0] = toupper (path[0]);
5411 #else
5412   /* No action. */
5413   fn = NULL;                    /* shut up the compiler */
5414 #endif
5415 }
5416
5417 /* Increase the size of a linebuffer. */
5418 void
5419 grow_linebuffer (lbp, toksize)
5420      linebuffer *lbp;
5421      int toksize;
5422 {
5423   while (lbp->size < toksize)
5424     lbp->size *= 2;
5425   lbp->buffer = xrnew (lbp->buffer, lbp->size, char);
5426 }
5427
5428 /* Like malloc but get fatal error if memory is exhausted.  */
5429 long *
5430 xmalloc (size)
5431      unsigned int size;
5432 {
5433   long *result = (long *) malloc (size);
5434   if (result == NULL)
5435     fatal ("virtual memory exhausted", (char *)NULL);
5436   return result;
5437 }
5438
5439 long *
5440 xrealloc (ptr, size)
5441      char *ptr;
5442      unsigned int size;
5443 {
5444   long *result =  (long *) realloc (ptr, size);
5445   if (result == NULL)
5446     fatal ("virtual memory exhausted", (char *)NULL);
5447   return result;
5448 }