lib-src/etags.c

   1 /* Tags file maker to go with GNU Emacs
   2    Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99
   3    Free Software Foundation, Inc. and Ken Arnold
   4
   5 This file is not considered part of GNU Emacs.
   6
   7 This program is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2 of the License, or
  10 (at your option) any later version.
  11
  12 This program is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with this program; if not, write to the Free Software Foundation,
  19 Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  20
  21 /*
  22  * Authors:
  23  *      Ctags originally by Ken Arnold.
  24  *      Fortran added by Jim Kleckner.
  25  *      Ed Pelegri-Llopart added C typedefs.
  26  *      Gnu Emacs TAGS format and modifications by RMS?
  27  *      Sam Kendall added C++.
  28  *      Francesco Potorti` reorganised C and C++ based on work by Joe Wells.
  29  *      Regexp tags by Tom Tromey.
  30  *
  31  *      Francesco Potorti` (pot@gnu.org) is the current maintainer.
  32  */
  33
  34 char pot_etags_version[] = "@(#) pot revision number is 13.33";
  35
  36 #define TRUE    1
  37 #define FALSE   0
  38
  39 #ifndef _GNU_SOURCE
  40 # define _GNU_SOURCE            /* enables some compiler checks on GNU */
  41 #endif
  42 #ifndef DEBUG
  43 # define DEBUG FALSE
  44 #endif
  45
  46 #ifdef HAVE_CONFIG_H
  47 # include <config.h>
  48   /* On some systems, Emacs defines static as nothing for the sake
  49      of unexec.  We don't want that here since we don't use unexec. */
  50 # undef static
  51 # define ETAGS_REGEXPS          /* use the regexp features */
  52 # define LONG_OPTIONS           /* accept long options */
  53 #endif /* HAVE_CONFIG_H */
  54
  55 #ifdef MSDOS
  56 # include <fcntl.h>
  57 # include <sys/param.h>
  58 # include <io.h>
  59 # ifndef HAVE_CONFIG_H
  60 #   define DOS_NT
  61 #   include <sys/config.h>
  62 # endif
  63 #endif /* MSDOS */
  64
  65 #ifdef WINDOWSNT
  66 # include <stdlib.h>
  67 # include <fcntl.h>
  68 # include <string.h>
  69 # include <io.h>
  70 # define MAXPATHLEN _MAX_PATH
  71 # ifdef HAVE_CONFIG_H
  72 #   undef HAVE_NTGUI
  73 # else
  74 #   define DOS_NT
  75 # endif /* not HAVE_CONFIG_H */
  76 # ifndef HAVE_GETCWD
  77 #   define HAVE_GETCWD
  78 # endif /* undef HAVE_GETCWD */
  79 #endif /* WINDOWSNT */
  80
  81 #if !defined (WINDOWSNT) && defined (STDC_HEADERS)
  82 #include <stdlib.h>
  83 #include <string.h>
  84 #endif
  85
  86 #ifdef HAVE_UNISTD_H
  87 # include <unistd.h>
  88 #else
  89 # ifdef HAVE_GETCWD
  90     extern char *getcwd ();
  91 # endif
  92 #endif /* HAVE_UNISTD_H */
  93
  94 #include <stdio.h>
  95 #include <ctype.h>
  96 #include <errno.h>
  97 #ifndef errno
  98   extern int errno;
  99 #endif
 100 #include <sys/types.h>
 101 #include <sys/stat.h>
 102
 103 #if !defined (S_ISREG) && defined (S_IFREG)
 104 # define S_ISREG(m)     (((m) & S_IFMT) == S_IFREG)
 105 #endif
 106
 107 #ifdef LONG_OPTIONS
 108 # include <getopt.h>
 109 #else
 110 # define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
 111   extern char *optarg;
 112   extern int optind, opterr;
 113 #endif /* LONG_OPTIONS */
 114
 115 #ifdef ETAGS_REGEXPS
 116 # include <regex.h>
 117 #endif /* ETAGS_REGEXPS */
 118
 119 /* Define CTAGS to make the program "ctags" compatible with the usual one.
 120  Leave it undefined to make the program "etags", which makes emacs-style
 121  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 122 #ifdef CTAGS
 123 # undef  CTAGS
 124 # define CTAGS TRUE
 125 #else
 126 # define CTAGS FALSE
 127 #endif
 128
 129 /* Exit codes for success and failure.  */
 130 #ifdef VMS
 131 # define        GOOD    1
 132 # define        BAD     0
 133 #else
 134 # define        GOOD    0
 135 # define        BAD     1
 136 #endif
 137
 138 /* C extensions. */
 139 #define C_PLPL  0x00001         /* C++ */
 140 #define C_STAR  0x00003         /* C* */
 141 #define C_JAVA  0x00005         /* JAVA */
 142 #define YACC    0x10000         /* yacc file */
 143
 144 #define streq(s,t)      ((DEBUG && (s) == NULL && (t) == NULL   \
 145                           && (abort (), 1)) || !strcmp (s, t))
 146 #define strneq(s,t,n)   ((DEBUG && (s) == NULL && (t) == NULL   \
 147                           && (abort (), 1)) || !strncmp (s, t, n))
 148
 149 #define lowcase(c)      tolower ((char)c)
 150
 151 #define CHARS 256               /* 2^sizeof(char) */
 152 #define CHAR(x)         ((unsigned int)x & (CHARS - 1))
 153 #define iswhite(c)      (_wht[CHAR(c)]) /* c is white */
 154 #define notinname(c)    (_nin[CHAR(c)]) /* c is not in a name */
 155 #define begtoken(c)     (_btk[CHAR(c)]) /* c can start token */
 156 #define intoken(c)      (_itk[CHAR(c)]) /* c can be in token */
 157 #define endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens */
 158
 159
 160 /*
 161  *      xnew, xrnew -- allocate, reallocate storage
 162  *
 163  * SYNOPSIS:    Type *xnew (int n, Type);
 164  *              Type *xrnew (OldPointer, int n, Type);
 165  */
 166 #ifdef chkmalloc
 167 # include "chkmalloc.h"
 168 # define xnew(n,Type)     ((Type *) trace_malloc (__FILE__, __LINE__, \
 169                                                   (n) * sizeof (Type)))
 170 # define xrnew(op,n,Type) ((Type *) trace_realloc (__FILE__, __LINE__, \
 171                                                    (op), (n) * sizeof (Type)))
 172 #else
 173 # define xnew(n,Type)     ((Type *) xmalloc ((n) * sizeof (Type)))
 174 # define xrnew(op,n,Type) ((Type *) xrealloc ((op), (n) * sizeof (Type)))
 175 #endif
 176
 177 typedef int bool;
 178
 179 typedef void Lang_function ();
 180
 181 typedef struct
 182 {
 183   char *suffix;
 184   char *command;                /* Takes one arg and decompresses to stdout */
 185 } compressor;
 186
 187 typedef struct
 188 {
 189   char *name;
 190   Lang_function *function;
 191   char **suffixes;
 192   char **interpreters;
 193 } language;
 194
 195 extern char *getenv ();
 196
 197 /* Many compilers barf on this:
 198         Lang_function Ada_funcs;
 199    so let's write it this way */
 200 void Ada_funcs ();
 201 void Asm_labels ();
 202 void C_entries ();
 203 void default_C_entries ();
 204 void plain_C_entries ();
 205 void Cjava_entries ();
 206 void Cobol_paragraphs ();
 207 void Cplusplus_entries ();
 208 void Cstar_entries ();
 209 void Erlang_functions ();
 210 void Fortran_functions ();
 211 void Yacc_entries ();
 212 void Lisp_functions ();
 213 void Pascal_functions ();
 214 void Perl_functions ();
 215 void Postscript_functions ();
 216 void Prolog_functions ();
 217 void Python_functions ();
 218 void Scheme_functions ();
 219 void TeX_functions ();
 220 void just_read_file ();
 221
 222 compressor *get_compressor_from_suffix ();
 223 language *get_language_from_name ();
 224 language *get_language_from_interpreter ();
 225 language *get_language_from_suffix ();
 226 int total_size_of_entries ();
 227 long readline (), readline_internal ();
 228 void get_tag ();
 229
 230 #ifdef ETAGS_REGEXPS
 231 void analyse_regex ();
 232 void add_regex ();
 233 void free_patterns ();
 234 #endif /* ETAGS_REGEXPS */
 235 void error ();
 236 void suggest_asking_for_help ();
 237 void fatal (), pfatal ();
 238 void add_node ();
 239
 240 void init ();
 241 void initbuffer ();
 242 void find_entries ();
 243 void free_tree ();
 244 void pfnote (), new_pfnote ();
 245 void process_file ();
 246 void put_entries ();
 247 void takeprec ();
 248
 249 char *concat ();
 250 char *skip_spaces (), *skip_non_spaces ();
 251 char *savenstr (), *savestr ();
 252 char *etags_strchr (), *etags_strrchr ();
 253 char *etags_getcwd ();
 254 char *relative_filename (), *absolute_filename (), *absolute_dirname ();
 255 bool filename_is_absolute ();
 256 void canonicalize_filename ();
 257 void grow_linebuffer ();
 258 long *xmalloc (), *xrealloc ();
 259
 260 \f
 261 char searchar = '/';            /* use /.../ searches */
 262
 263 char *tagfile;                  /* output file */
 264 char *progname;                 /* name this program was invoked with */
 265 char *cwd;                      /* current working directory */
 266 char *tagfiledir;               /* directory of tagfile */
 267 FILE *tagf;                     /* ioptr for tags file */
 268
 269 char *curfile;                  /* current input file name */
 270 language *curlang;              /* current language */
 271
 272 int lineno;                     /* line number of current line */
 273 long charno;                    /* current character number */
 274 long linecharno;                /* charno of start of current line */
 275 char *dbp;                      /* pointer to start of current tag */
 276
 277 typedef struct node_st
 278 {                               /* sorting structure            */
 279   char *name;                   /* function or type name        */
 280   char *file;                   /* file name                    */
 281   bool is_func;                 /* use pattern or line no       */
 282   bool been_warned;             /* set if noticed dup           */
 283   int lno;                      /* line number tag is on        */
 284   long cno;                     /* character number line starts on */
 285   char *pat;                    /* search pattern               */
 286   struct node_st *left, *right; /* left and right sons          */
 287 } node;
 288
 289 node *head;                     /* the head of the binary tree of tags */
 290
 291 /*
 292  * A `linebuffer' is a structure which holds a line of text.
 293  * `readline_internal' reads a line from a stream into a linebuffer
 294  * and works regardless of the length of the line.
 295  * SIZE is the size of BUFFER, LEN is the length of the string in
 296  * BUFFER after readline reads it.
 297  */
 298 typedef struct
 299 {
 300   long size;
 301   int len;
 302   char *buffer;
 303 } linebuffer;
 304
 305 linebuffer lb;                  /* the current line */
 306 linebuffer token_name;          /* used by C_entries as a temporary area */
 307 struct
 308 {
 309   long linepos;
 310   linebuffer lb;                /* used by C_entries instead of lb */
 311 } lbs[2];
 312
 313 /* boolean "functions" (see init)       */
 314 bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 315 char
 316   /* white chars */
 317   *white = " \f\t\n\r",
 318   /* not in a name */
 319   *nonam = " \f\t\n\r(=,[;",
 320   /* token ending chars */
 321   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
 322   /* token starting chars */
 323   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
 324   /* valid in-token chars */
 325   *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 326
 327 bool append_to_tagfile;         /* -a: append to tags */
 328 /* The following four default to TRUE for etags, but to FALSE for ctags.  */
 329 bool typedefs;                  /* -t: create tags for C and Ada typedefs */
 330 bool typedefs_and_cplusplus;    /* -T: create tags for C typedefs, level */
 331                                 /* 0 struct/enum/union decls, and C++ */
 332                                 /* member functions. */
 333 bool constantypedefs;           /* -d: create tags for C #define, enum */
 334                                 /* constants and variables. */
 335                                 /* -D: opposite of -d.  Default under ctags. */
 336 bool declarations;              /* --declarations: tag them and extern in C&Co*/
 337 bool globals;                   /* create tags for global variables */
 338 bool members;                   /* create tags for C member variables */
 339 bool update;                    /* -u: update tags */
 340 bool vgrind_style;              /* -v: create vgrind style index output */
 341 bool no_warnings;               /* -w: suppress warnings */
 342 bool cxref_style;               /* -x: create cxref style output */
 343 bool cplusplus;                 /* .[hc] means C++, not C */
 344 bool noindentypedefs;           /* -I: ignore indentation in C */
 345 bool packages_only;             /* --packages-only: in Ada, only tag packages*/
 346
 347 #ifdef LONG_OPTIONS
 348 struct option longopts[] =
 349 {
 350   { "packages-only",      no_argument,       &packages_only, TRUE  },
 351   { "append",             no_argument,       NULL,           'a'   },
 352   { "backward-search",    no_argument,       NULL,           'B'   },
 353   { "c++",                no_argument,       NULL,           'C'   },
 354   { "cxref",              no_argument,       NULL,           'x'   },
 355   { "defines",            no_argument,       NULL,           'd'   },
 356   { "declarations",       no_argument,       &declarations,  TRUE  },
 357   { "no-defines",         no_argument,       NULL,           'D'   },
 358   { "globals",            no_argument,       &globals,       TRUE  },
 359   { "no-globals",         no_argument,       &globals,       FALSE },
 360   { "help",               no_argument,       NULL,           'h'   },
 361   { "help",               no_argument,       NULL,           'H'   },
 362   { "ignore-indentation", no_argument,       NULL,           'I'   },
 363   { "include",            required_argument, NULL,           'i'   },
 364   { "language",           required_argument, NULL,           'l'   },
 365   { "members",            no_argument,       &members,       TRUE  },
 366   { "no-members",         no_argument,       &members,       FALSE },
 367   { "no-warn",            no_argument,       NULL,           'w'   },
 368   { "output",             required_argument, NULL,           'o'   },
 369 #ifdef ETAGS_REGEXPS
 370   { "regex",              required_argument, NULL,           'r'   },
 371   { "no-regex",           no_argument,       NULL,           'R'   },
 372   { "ignore-case-regex",  required_argument, NULL,           'c'   },
 373 #endif /* ETAGS_REGEXPS */
 374   { "typedefs",           no_argument,       NULL,           't'   },
 375   { "typedefs-and-c++",   no_argument,       NULL,           'T'   },
 376   { "update",             no_argument,       NULL,           'u'   },
 377   { "version",            no_argument,       NULL,           'V'   },
 378   { "vgrind",             no_argument,       NULL,           'v'   },
 379   { NULL }
 380 };
 381 #endif /* LONG_OPTIONS */
 382
 383 #ifdef ETAGS_REGEXPS
 384 /* Structure defining a regular expression.  Elements are
 385    the compiled pattern, and the name string. */
 386 typedef struct pattern
 387 {
 388   struct pattern *p_next;
 389   language *language;
 390   char *regex;
 391   struct re_pattern_buffer *pattern;
 392   struct re_registers regs;
 393   char *name_pattern;
 394   bool error_signaled;
 395 } pattern;
 396
 397 /* List of all regexps. */
 398 pattern *p_head = NULL;
 399
 400 /* How many characters in the character set.  (From regex.c.)  */
 401 #define CHAR_SET_SIZE 256
 402 /* Translation table for case-insensitive matching. */
 403 char lc_trans[CHAR_SET_SIZE];
 404 #endif /* ETAGS_REGEXPS */
 405
 406 compressor compressors[] =
 407 {
 408   { "z", "gzip -d -c"},
 409   { "Z", "gzip -d -c"},
 410   { "gz", "gzip -d -c"},
 411   { "GZ", "gzip -d -c"},
 412   { "bz2", "bzip2 -d -c" },
 413   { NULL }
 414 };
 415
 416 /*
 417  * Language stuff.
 418  */
 419
 420 /* Non-NULL if language fixed. */
 421 language *forced_lang = NULL;
 422
 423 /* Ada code */
 424 char *Ada_suffixes [] =
 425   { "ads", "adb", "ada", NULL };
 426
 427 /* Assembly code */
 428 char *Asm_suffixes [] = { "a",  /* Unix assembler */
 429                           "asm", /* Microcontroller assembly */
 430                           "def", /* BSO/Tasking definition includes  */
 431                           "inc", /* Microcontroller include files */
 432                           "ins", /* Microcontroller include files */
 433                           "s", "sa", /* Unix assembler */
 434                           "S",   /* cpp-processed Unix assembler */
 435                           "src", /* BSO/Tasking C compiler output */
 436                           NULL
 437                         };
 438
 439 /* Note that .c and .h can be considered C++, if the --c++ flag was
 440    given.  That is why default_C_entries is called here. */
 441 char *default_C_suffixes [] =
 442   { "c", "h", NULL };
 443
 444 char *Cplusplus_suffixes [] =
 445   { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx",
 446     "M",                        /* Objective C++ */
 447     "pdb",                      /* Postscript with C syntax */
 448     NULL };
 449
 450 char *Cjava_suffixes [] =
 451   { "java", NULL };
 452
 453 char *Cobol_suffixes [] =
 454   { "COB", "cob", NULL };
 455
 456 char *Cstar_suffixes [] =
 457   { "cs", "hs", NULL };
 458
 459 char *Erlang_suffixes [] =
 460   { "erl", "hrl", NULL };
 461
 462 char *Fortran_suffixes [] =
 463   { "F", "f", "f90", "for", NULL };
 464
 465 char *Lisp_suffixes [] =
 466   { "cl", "clisp", "el", "l", "lisp", "lsp", "ml", NULL };
 467
 468 char *Pascal_suffixes [] =
 469   { "p", "pas", NULL };
 470
 471 char *Perl_suffixes [] =
 472   { "pl", "pm", NULL };
 473 char *Perl_interpreters [] =
 474   { "perl", "@PERL@", NULL };
 475
 476 char *plain_C_suffixes [] =
 477   { "pc",                       /* Pro*C file */
 478     "m",                        /* Objective C file */
 479     "lm",                       /* Objective lex file */
 480      NULL };
 481
 482 char *Postscript_suffixes [] =
 483   { "ps", "psw", NULL };        /* .psw is for PSWrap */
 484
 485 char *Prolog_suffixes [] =
 486   { "prolog", NULL };
 487
 488 char *Python_suffixes [] =
 489   { "py", NULL };
 490
 491 /* Can't do the `SCM' or `scm' prefix with a version number. */
 492 char *Scheme_suffixes [] =
 493   { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "ss", "t", NULL };
 494
 495 char *TeX_suffixes [] =
 496   { "TeX", "bib", "clo", "cls", "ltx", "sty", "tex", NULL };
 497
 498 char *Yacc_suffixes [] =
 499   { "y", "ym", "yy", "yxx", "y++", NULL }; /* .ym is Objective yacc file */
 500
 501 /*
 502  * Table of languages.
 503  *
 504  * It is ok for a given function to be listed under more than one
 505  * name.  I just didn't.
 506  */
 507
 508 language lang_names [] =
 509 {
 510   { "ada",     Ada_funcs,           Ada_suffixes,         NULL              },
 511   { "asm",     Asm_labels,          Asm_suffixes,         NULL              },
 512   { "c",       default_C_entries,   default_C_suffixes,   NULL              },
 513   { "c++",     Cplusplus_entries,   Cplusplus_suffixes,   NULL              },
 514   { "c*",      Cstar_entries,       Cstar_suffixes,       NULL              },
 515   { "cobol",   Cobol_paragraphs,    Cobol_suffixes,       NULL              },
 516   { "erlang",  Erlang_functions,    Erlang_suffixes,      NULL              },
 517   { "fortran", Fortran_functions,   Fortran_suffixes,     NULL              },
 518   { "java",    Cjava_entries,       Cjava_suffixes,       NULL              },
 519   { "lisp",    Lisp_functions,      Lisp_suffixes,        NULL              },
 520   { "pascal",  Pascal_functions,    Pascal_suffixes,      NULL              },
 521   { "perl",    Perl_functions,      Perl_suffixes,        Perl_interpreters },
 522   { "postscript", Postscript_functions, Postscript_suffixes, NULL           },
 523   { "proc",    plain_C_entries,     plain_C_suffixes,     NULL              },
 524   { "prolog",  Prolog_functions,    Prolog_suffixes,      NULL              },
 525   { "python",  Python_functions,    Python_suffixes,      NULL              },
 526   { "scheme",  Scheme_functions,    Scheme_suffixes,      NULL              },
 527   { "tex",     TeX_functions,       TeX_suffixes,         NULL              },
 528   { "yacc",    Yacc_entries,        Yacc_suffixes,        NULL              },
 529   { "auto", NULL },             /* default guessing scheme */
 530   { "none", just_read_file },   /* regexp matching only */
 531   { NULL, NULL }                /* end of list */
 532 };
 533 \f
 534 void
 535 print_language_names ()
 536 {
 537   language *lang;
 538   char **ext;
 539
 540   puts ("\nThese are the currently supported languages, along with the\n\
 541 default file name suffixes:");
 542   for (lang = lang_names; lang->name != NULL; lang++)
 543     {
 544       printf ("\t%s\t", lang->name);
 545       if (lang->suffixes != NULL)
 546         for (ext = lang->suffixes; *ext != NULL; ext++)
 547           printf (" .%s", *ext);
 548       puts ("");
 549     }
 550   puts ("Where `auto' means use default language for files based on file\n\
 551 name suffix, and `none' means only do regexp processing on files.\n\
 552 If no language is specified and no matching suffix is found,\n\
 553 the first line of the file is read for a sharp-bang (#!) sequence\n\
 554 followed by the name of an interpreter.  If no such sequence is found,\n\
 555 Fortran is tried first; if no tags are found, C is tried next.\n\
 556 Compressed files are supported using gzip and bzip2.");
 557 }
 558
 559 #ifndef VERSION
 560 # define VERSION "20"
 561 #endif
 562 void
 563 print_version ()
 564 {
 565   printf ("%s (GNU Emacs %s)\n", (CTAGS) ? "ctags" : "etags", VERSION);
 566   puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
 567   puts ("This program is distributed under the same terms as Emacs");
 568
 569   exit (GOOD);
 570 }
 571
 572 void
 573 print_help ()
 574 {
 575   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 576 \n\
 577 These are the options accepted by %s.\n", progname, progname);
 578 #ifdef LONG_OPTIONS
 579   puts ("You may use unambiguous abbreviations for the long option names.");
 580 #else
 581   puts ("Long option names do not work with this executable, as it is not\n\
 582 linked with GNU getopt.");
 583 #endif /* LONG_OPTIONS */
 584   puts ("A - as file name means read names from stdin (one per line).");
 585   if (!CTAGS)
 586     printf ("  Absolute names are stored in the output file as they are.\n\
 587 Relative ones are stored relative to the output file's directory.");
 588   puts ("\n");
 589
 590   puts ("-a, --append\n\
 591         Append tag entries to existing tags file.");
 592
 593   puts ("--packages-only\n\
 594         For Ada files, only generate tags for packages .");
 595
 596   if (CTAGS)
 597     puts ("-B, --backward-search\n\
 598         Write the search commands for the tag entries using '?', the\n\
 599         backward-search command instead of '/', the forward-search command.");
 600
 601   puts ("-C, --c++\n\
 602         Treat files whose name suffix defaults to C language as C++ files.");
 603
 604   puts ("--declarations\n\
 605         In C and derived languages, create tags for function declarations,");
 606   if (CTAGS)
 607     puts ("\tand create tags for extern variables if --globals is used.");
 608   else
 609     puts
 610       ("\tand create tags for extern variables unless --no-globals is used.");
 611
 612   if (CTAGS)
 613     puts ("-d, --defines\n\
 614         Create tag entries for C #define constants and enum constants, too.");
 615   else
 616     puts ("-D, --no-defines\n\
 617         Don't create tag entries for C #define constants and enum constants.\n\
 618         This makes the tags file smaller.");
 619
 620   if (!CTAGS)
 621     {
 622       puts ("-i FILE, --include=FILE\n\
 623         Include a note in tag file indicating that, when searching for\n\
 624         a tag, one should also consult the tags file FILE after\n\
 625         checking the current file.");
 626       puts ("-l LANG, --language=LANG\n\
 627         Force the following files to be considered as written in the\n\
 628         named language up to the next --language=LANG option.");
 629     }
 630
 631   if (CTAGS)
 632     puts ("--globals\n\
 633         Create tag entries for global variables in some languages.");
 634   else
 635     puts ("--no-globals\n\
 636         Do not create tag entries for global variables in some\n\
 637         languages.  This makes the tags file smaller.");
 638   puts ("--members\n\
 639         Create tag entries for member variables in C and derived languages.");
 640
 641 #ifdef ETAGS_REGEXPS
 642   puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
 643         Make a tag for each line matching pattern REGEXP in the following\n\
 644         files.  {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
 645         regexfile is a file containing one REGEXP per line.\n\
 646         REGEXP is anchored (as if preceded by ^).\n\
 647         The form /REGEXP/NAME/ creates a named tag.\n\
 648         For example Tcl named tags can be created with:\n\
 649         --regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
 650   puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
 651         Like -r, --regex but ignore case when matching expressions.");
 652   puts ("-R, --no-regex\n\
 653         Don't create tags from regexps for the following files.");
 654 #endif /* ETAGS_REGEXPS */
 655   puts ("-o FILE, --output=FILE\n\
 656         Write the tags to FILE.");
 657   puts ("-I, --ignore-indentation\n\
 658         Don't rely on indentation quite as much as normal.  Currently,\n\
 659         this means not to assume that a closing brace in the first\n\
 660         column is the final brace of a function or structure\n\
 661         definition in C and C++.");
 662
 663   if (CTAGS)
 664     {
 665       puts ("-t, --typedefs\n\
 666         Generate tag entries for C and Ada typedefs.");
 667       puts ("-T, --typedefs-and-c++\n\
 668         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
 669         and C++ member functions.");
 670       puts ("-u, --update\n\
 671         Update the tag entries for the given files, leaving tag\n\
 672         entries for other files in place.  Currently, this is\n\
 673         implemented by deleting the existing entries for the given\n\
 674         files and then rewriting the new entries at the end of the\n\
 675         tags file.  It is often faster to simply rebuild the entire\n\
 676         tag file than to use this.");
 677       puts ("-v, --vgrind\n\
 678         Generates an index of items intended for human consumption,\n\
 679         similar to the output of vgrind.  The index is sorted, and\n\
 680         gives the page number of each item.");
 681       puts ("-w, --no-warn\n\
 682         Suppress warning messages about entries defined in multiple\n\
 683         files.");
 684       puts ("-x, --cxref\n\
 685         Like --vgrind, but in the style of cxref, rather than vgrind.\n\
 686         The output uses line numbers instead of page numbers, but\n\
 687         beyond that the differences are cosmetic; try both to see\n\
 688         which you like.");
 689     }
 690
 691   puts ("-V, --version\n\
 692         Print the version of the program.\n\
 693 -h, --help\n\
 694         Print this help message.");
 695
 696   print_language_names ();
 697
 698   puts ("");
 699   puts ("Report bugs to bug-gnu-emacs@gnu.org");
 700
 701   exit (GOOD);
 702 }
 703
 704 \f
 705 enum argument_type
 706 {
 707   at_language,
 708   at_regexp,
 709   at_filename,
 710   at_icregexp
 711 };
 712
 713 /* This structure helps us allow mixing of --lang and file names. */
 714 typedef struct
 715 {
 716   enum argument_type arg_type;
 717   char *what;
 718   language *lang;               /* language of the regexp */
 719 } argument;
 720
 721 #ifdef VMS                      /* VMS specific functions */
 722
 723 #define EOS     '\0'
 724
 725 /* This is a BUG!  ANY arbitrary limit is a BUG!
 726    Won't someone please fix this?  */
 727 #define MAX_FILE_SPEC_LEN       255
 728 typedef struct  {
 729   short   curlen;
 730   char    body[MAX_FILE_SPEC_LEN + 1];
 731 } vspec;
 732
 733 /*
 734  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
 735  returning in each successive call the next file name matching the input
 736  spec. The function expects that each in_spec passed
 737  to it will be processed to completion; in particular, up to and
 738  including the call following that in which the last matching name
 739  is returned, the function ignores the value of in_spec, and will
 740  only start processing a new spec with the following call.
 741  If an error occurs, on return out_spec contains the value
 742  of in_spec when the error occurred.
 743
 744  With each successive file name returned in out_spec, the
 745  function's return value is one. When there are no more matching
 746  names the function returns zero. If on the first call no file
 747  matches in_spec, or there is any other error, -1 is returned.
 748 */
 749
 750 #include        <rmsdef.h>
 751 #include        <descrip.h>
 752 #define         OUTSIZE MAX_FILE_SPEC_LEN
 753 short
 754 fn_exp (out, in)
 755      vspec *out;
 756      char *in;
 757 {
 758   static long context = 0;
 759   static struct dsc$descriptor_s o;
 760   static struct dsc$descriptor_s i;
 761   static bool pass1 = TRUE;
 762   long status;
 763   short retval;
 764
 765   if (pass1)
 766     {
 767       pass1 = FALSE;
 768       o.dsc$a_pointer = (char *) out;
 769       o.dsc$w_length = (short)OUTSIZE;
 770       i.dsc$a_pointer = in;
 771       i.dsc$w_length = (short)strlen(in);
 772       i.dsc$b_dtype = DSC$K_DTYPE_T;
 773       i.dsc$b_class = DSC$K_CLASS_S;
 774       o.dsc$b_dtype = DSC$K_DTYPE_VT;
 775       o.dsc$b_class = DSC$K_CLASS_VS;
 776     }
 777   if ((status = lib$find_file(&i, &o, &context, 0, 0)) == RMS$_NORMAL)
 778     {
 779       out->body[out->curlen] = EOS;
 780       return 1;
 781     }
 782   else if (status == RMS$_NMF)
 783     retval = 0;
 784   else
 785     {
 786       strcpy(out->body, in);
 787       retval = -1;
 788     }
 789   lib$find_file_end(&context);
 790   pass1 = TRUE;
 791   return retval;
 792 }
 793
 794 /*
 795   v1.01 nmm 19-Aug-85 gfnames - return in successive calls the
 796   name of each file specified by the provided arg expanding wildcards.
 797 */
 798 char *
 799 gfnames (arg, p_error)
 800      char *arg;
 801      bool *p_error;
 802 {
 803   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
 804
 805   switch (fn_exp (&filename, arg))
 806     {
 807     case 1:
 808       *p_error = FALSE;
 809       return filename.body;
 810     case 0:
 811       *p_error = FALSE;
 812       return NULL;
 813     default:
 814       *p_error = TRUE;
 815       return filename.body;
 816     }
 817 }
 818
 819 #ifndef OLD  /* Newer versions of VMS do provide `system'.  */
 820 system (cmd)
 821      char *cmd;
 822 {
 823   error ("%s", "system() function not implemented under VMS");
 824 }
 825 #endif
 826
 827 #define VERSION_DELIM   ';'
 828 char *massage_name (s)
 829      char *s;
 830 {
 831   char *start = s;
 832
 833   for ( ; *s; s++)
 834     if (*s == VERSION_DELIM)
 835       {
 836         *s = EOS;
 837         break;
 838       }
 839     else
 840       *s = lowcase (*s);
 841   return start;
 842 }
 843 #endif /* VMS */
 844
 845 \f
 846 int
 847 main (argc, argv)
 848      int argc;
 849      char *argv[];
 850 {
 851   int i;
 852   unsigned int nincluded_files;
 853   char **included_files;
 854   char *this_file;
 855   argument *argbuffer;
 856   int current_arg, file_count;
 857   linebuffer filename_lb;
 858 #ifdef VMS
 859   bool got_err;
 860 #endif
 861
 862 #ifdef DOS_NT
 863   _fmode = O_BINARY;   /* all of files are treated as binary files */
 864 #endif /* DOS_NT */
 865
 866   progname = argv[0];
 867   nincluded_files = 0;
 868   included_files = xnew (argc, char *);
 869   current_arg = 0;
 870   file_count = 0;
 871
 872   /* Allocate enough no matter what happens.  Overkill, but each one
 873      is small. */
 874   argbuffer = xnew (argc, argument);
 875
 876 #ifdef ETAGS_REGEXPS
 877   /* Set syntax for regular expression routines. */
 878   re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
 879   /* Translation table for case-insensitive search. */
 880   for (i = 0; i < CHAR_SET_SIZE; i++)
 881     lc_trans[i] = lowcase (i);
 882 #endif /* ETAGS_REGEXPS */
 883
 884   /*
 885    * If etags, always find typedefs and structure tags.  Why not?
 886    * Also default is to find macro constants, enum constants and
 887    * global variables.
 888    */
 889   if (!CTAGS)
 890     {
 891       typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
 892       globals = TRUE;
 893       members = FALSE;
 894     }
 895
 896   while (1)
 897     {
 898       int opt;
 899       char *optstring;
 900
 901 #ifdef ETAGS_REGEXPS
 902       optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
 903 #else
 904       optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
 905 #endif /* ETAGS_REGEXPS */
 906
 907 #ifndef LONG_OPTIONS
 908       optstring = optstring + 1;
 909 #endif /* LONG_OPTIONS */
 910
 911       opt = getopt_long (argc, argv, optstring, longopts, 0);
 912       if (opt == EOF)
 913         break;
 914
 915       switch (opt)
 916         {
 917         case 0:
 918           /* If getopt returns 0, then it has already processed a
 919              long-named option.  We should do nothing.  */
 920           break;
 921
 922         case 1:
 923           /* This means that a file name has been seen.  Record it. */
 924           argbuffer[current_arg].arg_type = at_filename;
 925           argbuffer[current_arg].what = optarg;
 926           ++current_arg;
 927           ++file_count;
 928           break;
 929
 930           /* Common options. */
 931         case 'a': append_to_tagfile = TRUE;     break;
 932         case 'C': cplusplus = TRUE;             break;
 933         case 'd': constantypedefs = TRUE;       break;
 934         case 'D': constantypedefs = FALSE;      break;
 935         case 'f':               /* for compatibility with old makefiles */
 936         case 'o':
 937           if (tagfile)
 938             {
 939               error ("-o option may only be given once.", (char *)NULL);
 940               suggest_asking_for_help ();
 941             }
 942           tagfile = optarg;
 943           break;
 944         case 'I':
 945         case 'S':               /* for backward compatibility */
 946           noindentypedefs = TRUE;
 947           break;
 948         case 'l':
 949           {
 950             language *lang = get_language_from_name (optarg);
 951             if (lang != NULL)
 952               {
 953                 argbuffer[current_arg].lang = lang;
 954                 argbuffer[current_arg].arg_type = at_language;
 955                 ++current_arg;
 956               }
 957           }
 958           break;
 959 #ifdef ETAGS_REGEXPS
 960         case 'r':
 961           argbuffer[current_arg].arg_type = at_regexp;
 962           argbuffer[current_arg].what = optarg;
 963           ++current_arg;
 964           break;
 965         case 'R':
 966           argbuffer[current_arg].arg_type = at_regexp;
 967           argbuffer[current_arg].what = NULL;
 968           ++current_arg;
 969           break;
 970         case 'c':
 971           argbuffer[current_arg].arg_type = at_icregexp;
 972           argbuffer[current_arg].what = optarg;
 973           ++current_arg;
 974           break;
 975 #endif /* ETAGS_REGEXPS */
 976         case 'V':
 977           print_version ();
 978           break;
 979         case 'h':
 980         case 'H':
 981           print_help ();
 982           break;
 983         case 't':
 984           typedefs = TRUE;
 985           break;
 986         case 'T':
 987           typedefs = typedefs_and_cplusplus = TRUE;
 988           break;
 989 #if (!CTAGS)
 990           /* Etags options */
 991         case 'i':
 992           included_files[nincluded_files++] = optarg;
 993           break;
 994 #else /* CTAGS */
 995           /* Ctags options. */
 996         case 'B': searchar = '?';       break;
 997         case 'u': update = TRUE;        break;
 998         case 'v': vgrind_style = TRUE;  /*FALLTHRU*/
 999         case 'x': cxref_style = TRUE;   break;
1000         case 'w': no_warnings = TRUE;   break;
1001 #endif /* CTAGS */
1002         default:
1003           suggest_asking_for_help ();
1004         }
1005     }
1006
1007   for (; optind < argc; ++optind)
1008     {
1009       argbuffer[current_arg].arg_type = at_filename;
1010       argbuffer[current_arg].what = argv[optind];
1011       ++current_arg;
1012       ++file_count;
1013     }
1014
1015   if (nincluded_files == 0 && file_count == 0)
1016     {
1017       error ("no input files specified.", (char *)NULL);
1018       suggest_asking_for_help ();
1019     }
1020
1021   if (tagfile == NULL)
1022     tagfile = CTAGS ? "tags" : "TAGS";
1023   cwd = etags_getcwd ();        /* the current working directory */
1024   if (cwd[strlen (cwd) - 1] != '/')
1025     {
1026       char *oldcwd = cwd;
1027       cwd = concat (oldcwd, "/", "");
1028       free (oldcwd);
1029     }
1030   if (streq (tagfile, "-"))
1031     tagfiledir = cwd;
1032   else
1033     tagfiledir = absolute_dirname (tagfile, cwd);
1034
1035   init ();                      /* set up boolean "functions" */
1036
1037   initbuffer (&lb);
1038   initbuffer (&token_name);
1039   initbuffer (&lbs[0].lb);
1040   initbuffer (&lbs[1].lb);
1041   initbuffer (&filename_lb);
1042
1043   if (!CTAGS)
1044     {
1045       if (streq (tagfile, "-"))
1046         {
1047           tagf = stdout;
1048 #ifdef DOS_NT
1049           /* Switch redirected `stdout' to binary mode (setting `_fmode'
1050              doesn't take effect until after `stdout' is already open). */
1051           if (!isatty (fileno (stdout)))
1052             setmode (fileno (stdout), O_BINARY);
1053 #endif /* DOS_NT */
1054         }
1055       else
1056         tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1057       if (tagf == NULL)
1058         pfatal (tagfile);
1059     }
1060
1061   /*
1062    * Loop through files finding functions.
1063    */
1064   for (i = 0; i < current_arg; ++i)
1065     {
1066       switch (argbuffer[i].arg_type)
1067         {
1068         case at_language:
1069           forced_lang = argbuffer[i].lang;
1070           break;
1071 #ifdef ETAGS_REGEXPS
1072         case at_regexp:
1073           analyse_regex (argbuffer[i].what, FALSE);
1074           break;
1075         case at_icregexp:
1076           analyse_regex (argbuffer[i].what, TRUE);
1077           break;
1078 #endif
1079         case at_filename:
1080 #ifdef VMS
1081           while ((this_file = gfnames (argbuffer[i].what, &got_err)) != NULL)
1082             {
1083               if (got_err)
1084                 {
1085                   error ("can't find file %s\n", this_file);
1086                   argc--, argv++;
1087                 }
1088               else
1089                 {
1090                   this_file = massage_name (this_file);
1091                 }
1092 #else
1093               this_file = argbuffer[i].what;
1094 #endif
1095               /* Input file named "-" means read file names from stdin
1096                  (one per line) and use them. */
1097               if (streq (this_file, "-"))
1098                 while (readline_internal (&filename_lb, stdin) > 0)
1099                   process_file (filename_lb.buffer);
1100               else
1101                 process_file (this_file);
1102 #ifdef VMS
1103             }
1104 #endif
1105           break;
1106         }
1107     }
1108
1109 #ifdef ETAGS_REGEXPS
1110   free_patterns ();
1111 #endif /* ETAGS_REGEXPS */
1112
1113   if (!CTAGS)
1114     {
1115       while (nincluded_files-- > 0)
1116         fprintf (tagf, "\f\n%s,include\n", *included_files++);
1117
1118       fclose (tagf);
1119       exit (GOOD);
1120     }
1121
1122   /* If CTAGS, we are here.  process_file did not write the tags yet,
1123      because we want them ordered.  Let's do it now. */
1124   if (cxref_style)
1125     {
1126       put_entries (head);
1127       free_tree (head);
1128       head = NULL;
1129       exit (GOOD);
1130     }
1131
1132   if (update)
1133     {
1134       char cmd[BUFSIZ];
1135       for (i = 0; i < current_arg; ++i)
1136         {
1137           if (argbuffer[i].arg_type != at_filename)
1138             continue;
1139           sprintf (cmd,
1140                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
1141                    tagfile, argbuffer[i].what, tagfile);
1142           if (system (cmd) != GOOD)
1143             fatal ("failed to execute shell command", (char *)NULL);
1144         }
1145       append_to_tagfile = TRUE;
1146     }
1147
1148   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
1149   if (tagf == NULL)
1150     pfatal (tagfile);
1151   put_entries (head);
1152   free_tree (head);
1153   head = NULL;
1154   fclose (tagf);
1155
1156   if (update)
1157     {
1158       char cmd[BUFSIZ];
1159       sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
1160       exit (system (cmd));
1161     }
1162   return GOOD;
1163 }
1164
1165
1166
1167 /*
1168  * Return a compressor given the file name.  If EXTPTR is non-zero,
1169  * return a pointer into FILE where the compressor-specific
1170  * extension begins.  If no compressor is found, NULL is returned
1171  * and EXTPTR is not significant.
1172  * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca>
1173  */
1174 compressor *
1175 get_compressor_from_suffix (file, extptr)
1176      char *file;
1177      char **extptr;
1178 {
1179   compressor *compr;
1180   char *slash, *suffix;
1181
1182   /* This relies on FN to be after canonicalize_filename,
1183      so we don't need to consider backslashes on DOS_NT.  */
1184   slash = etags_strrchr (file, '/');
1185   suffix = etags_strrchr (file, '.');
1186   if (suffix == NULL || suffix < slash)
1187     return NULL;
1188   if (extptr != NULL)
1189     *extptr = suffix;
1190   suffix += 1;
1191   /* Let those poor souls who live with DOS 8+3 file name limits get
1192      some solace by treating foo.cgz as if it were foo.c.gz, etc.
1193      Only the first do loop is run if not MSDOS */
1194   do
1195     {
1196       for (compr = compressors; compr->suffix != NULL; compr++)
1197         if (streq (compr->suffix, suffix))
1198           return compr;
1199 #ifndef MSDOS
1200       break;
1201 #endif
1202       if (extptr != NULL)
1203         *extptr = ++suffix;
1204     } while (*suffix != '\0');
1205   return NULL;
1206 }
1207
1208
1209
1210 /*
1211  * Return a language given the name.
1212  */
1213 language *
1214 get_language_from_name (name)
1215      char *name;
1216 {
1217   language *lang;
1218
1219   if (name == NULL)
1220     error ("empty language name", (char *)NULL);
1221   else
1222     {
1223       for (lang = lang_names; lang->name != NULL; lang++)
1224         if (streq (name, lang->name))
1225           return lang;
1226       error ("unknown language \"%s\"", name);
1227     }
1228
1229   return NULL;
1230 }
1231
1232
1233 /*
1234  * Return a language given the interpreter name.
1235  */
1236 language *
1237 get_language_from_interpreter (interpreter)
1238      char *interpreter;
1239 {
1240   language *lang;
1241   char **iname;
1242
1243   if (interpreter == NULL)
1244     return NULL;
1245   for (lang = lang_names; lang->name != NULL; lang++)
1246     if (lang->interpreters != NULL)
1247       for (iname = lang->interpreters; *iname != NULL; iname++)
1248         if (streq (*iname, interpreter))
1249             return lang;
1250
1251   return NULL;
1252 }
1253
1254
1255
1256 /*
1257  * Return a language given the file name.
1258  */
1259 language *
1260 get_language_from_suffix (file)
1261      char *file;
1262 {
1263   language *lang;
1264   char **ext, *suffix;
1265
1266   suffix = etags_strrchr (file, '.');
1267   if (suffix == NULL)
1268     return NULL;
1269   suffix += 1;
1270   for (lang = lang_names; lang->name != NULL; lang++)
1271     if (lang->suffixes != NULL)
1272       for (ext = lang->suffixes; *ext != NULL; ext++)
1273         if (streq (*ext, suffix))
1274           return lang;
1275   return NULL;
1276 }
1277
1278
1279
1280 /*
1281  * This routine is called on each file argument.
1282  */
1283 void
1284 process_file (file)
1285      char *file;
1286 {
1287   struct stat stat_buf;
1288   FILE *inf;
1289   compressor *compr;
1290   char *compressed_name, *uncompressed_name;
1291   char *ext, *real_name;
1292
1293
1294   canonicalize_filename (file);
1295   if (streq (file, tagfile) && !streq (tagfile, "-"))
1296     {
1297       error ("skipping inclusion of %s in self.", file);
1298       return;
1299     }
1300   if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
1301     {
1302       compressed_name = NULL;
1303       real_name = uncompressed_name = savestr (file);
1304     }
1305   else
1306     {
1307       real_name = compressed_name = savestr (file);
1308       uncompressed_name = savenstr (file, ext - file);
1309     }
1310
1311   /* If the canonicalised uncompressed name has already be dealt with,
1312      skip it silently, else add it to the list. */
1313   {
1314     typedef struct processed_file
1315     {
1316       char *filename;
1317       struct processed_file *next;
1318     } processed_file;
1319     static processed_file *pf_head = NULL;
1320     register processed_file *fnp;
1321
1322     for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
1323       if (streq (uncompressed_name, fnp->filename))
1324         goto exit;
1325     fnp = pf_head;
1326     pf_head = xnew (1, struct processed_file);
1327     pf_head->filename = savestr (uncompressed_name);
1328     pf_head->next = fnp;
1329   }
1330
1331   if (stat (real_name, &stat_buf) != 0)
1332     {
1333       /* Reset real_name and try with a different name. */
1334       real_name = NULL;
1335       if (compressed_name != NULL) /* try with the given suffix */
1336         {
1337           if (stat (uncompressed_name, &stat_buf) == 0)
1338             real_name = uncompressed_name;
1339         }
1340       else                      /* try all possible suffixes */
1341         {
1342           for (compr = compressors; compr->suffix != NULL; compr++)
1343             {
1344               compressed_name = concat (file, ".", compr->suffix);
1345               if (stat (compressed_name, &stat_buf) != 0)
1346                 {
1347 #ifdef MSDOS
1348                   char *suf = compressed_name + strlen (file);
1349                   size_t suflen = strlen (compr->suffix) + 1;
1350                   for ( ; suf[1]; suf++, suflen--)
1351                     {
1352                       memmove (suf, suf + 1, suflen);
1353                       if (stat (compressed_name, &stat_buf) == 0)
1354                         {
1355                           real_name = compressed_name;
1356                           break;
1357                         }
1358                     }
1359                   if (real_name != NULL)
1360                     break;
1361 #endif
1362                   free (compressed_name);
1363                   compressed_name = NULL;
1364                 }
1365               else
1366                 {
1367                   real_name = compressed_name;
1368                   break;
1369                 }
1370             }
1371         }
1372       if (real_name == NULL)
1373         {
1374           perror (file);
1375           goto exit;
1376         }
1377     } /* try with a different name */
1378
1379   if (!S_ISREG (stat_buf.st_mode))
1380     {
1381       error ("skipping %s: it is not a regular file.", real_name);
1382       goto exit;
1383     }
1384   if (real_name == compressed_name)
1385     {
1386       char *cmd = concat (compr->command, " ", real_name);
1387       inf = popen (cmd, "r");
1388       free (cmd);
1389     }
1390   else
1391     inf = fopen (real_name, "r");
1392   if (inf == NULL)
1393     {
1394       perror (real_name);
1395       goto exit;
1396     }
1397
1398   find_entries (uncompressed_name, inf);
1399
1400   if (real_name == compressed_name)
1401     pclose (inf);
1402   else
1403     fclose (inf);
1404
1405   if (!CTAGS)
1406     {
1407       char *filename;
1408
1409       if (filename_is_absolute (uncompressed_name))
1410         {
1411           /* file is an absolute file name.  Canonicalise it. */
1412           filename = absolute_filename (uncompressed_name, cwd);
1413         }
1414       else
1415         {
1416           /* file is a file name relative to cwd.  Make it relative
1417              to the directory of the tags file. */
1418           filename = relative_filename (uncompressed_name, tagfiledir);
1419         }
1420       fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
1421       free (filename);
1422       put_entries (head);
1423       free_tree (head);
1424       head = NULL;
1425     }
1426
1427  exit:
1428   if (compressed_name) free(compressed_name);
1429   if (uncompressed_name) free(uncompressed_name);
1430   return;
1431 }
1432
1433 /*
1434  * This routine sets up the boolean pseudo-functions which work
1435  * by setting boolean flags dependent upon the corresponding character.
1436  * Every char which is NOT in that string is not a white char.  Therefore,
1437  * all of the array "_wht" is set to FALSE, and then the elements
1438  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
1439  * of a char is TRUE if it is the string "white", else FALSE.
1440  */
1441 void
1442 init ()
1443 {
1444   register char *sp;
1445   register int i;
1446
1447   for (i = 0; i < CHARS; i++)
1448     iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
1449   for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
1450   for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
1451   for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
1452   for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
1453   for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
1454   iswhite('\0') = iswhite('\n');
1455   notinname('\0') = notinname('\n');
1456   begtoken('\0') = begtoken('\n');
1457   intoken('\0') = intoken('\n');
1458   endtoken('\0') = endtoken('\n');
1459 }
1460
1461 /*
1462  * This routine opens the specified file and calls the function
1463  * which finds the function and type definitions.
1464  */
1465 node *last_node = NULL;
1466
1467 void
1468 find_entries (file, inf)
1469      char *file;
1470      FILE *inf;
1471 {
1472   char *cp;
1473   language *lang;
1474   node *old_last_node;
1475
1476   /* Memory leakage here: the string pointed by curfile is
1477      never released, because curfile is copied into np->file
1478      for each node, to be used in CTAGS mode.  The amount of
1479      memory leaked here is the sum of the lengths of the
1480      file names. */
1481   curfile = savestr (file);
1482
1483   /* If user specified a language, use it. */
1484   lang = forced_lang;
1485   if (lang != NULL && lang->function != NULL)
1486     {
1487       curlang = lang;
1488       lang->function (inf);
1489       return;
1490     }
1491
1492   /* Try to guess the language given the file name. */
1493   lang = get_language_from_suffix (file);
1494   if (lang != NULL && lang->function != NULL)
1495     {
1496       curlang = lang;
1497       lang->function (inf);
1498       return;
1499     }
1500
1501   /* Look for sharp-bang as the first two characters. */
1502   if (readline_internal (&lb, inf) > 0
1503       && lb.len >= 2
1504       && lb.buffer[0] == '#'
1505       && lb.buffer[1] == '!')
1506     {
1507       char *lp;
1508
1509       /* Set lp to point at the first char after the last slash in the
1510          line or, if no slashes, at the first nonblank.  Then set cp to
1511          the first successive blank and terminate the string. */
1512       lp = etags_strrchr (lb.buffer+2, '/');
1513       if (lp != NULL)
1514         lp += 1;
1515       else
1516         lp = skip_spaces (lb.buffer + 2);
1517       cp = skip_non_spaces (lp);
1518       *cp = '\0';
1519
1520       if (strlen (lp) > 0)
1521         {
1522           lang = get_language_from_interpreter (lp);
1523           if (lang != NULL && lang->function != NULL)
1524             {
1525               curlang = lang;
1526               lang->function (inf);
1527               return;
1528             }
1529         }
1530     }
1531   /* We rewind here, even if inf may be a pipe.  We fail if the
1532      length of the first line is longer than the pipe block size,
1533      which is unlikely. */
1534   rewind (inf);
1535
1536   /* Try Fortran. */
1537   old_last_node = last_node;
1538   curlang = get_language_from_name ("fortran");
1539   Fortran_functions (inf);
1540
1541   /* No Fortran entries found.  Try C. */
1542   if (old_last_node == last_node)
1543     {
1544       /* We do not tag if rewind fails.
1545          Only the file name will be recorded in the tags file. */
1546       rewind (inf);
1547       curlang = get_language_from_name (cplusplus ? "c++" : "c");
1548       default_C_entries (inf);
1549     }
1550   return;
1551 }
1552 \f
1553 /* Record a tag. */
1554 void
1555 pfnote (name, is_func, linestart, linelen, lno, cno)
1556      char *name;                /* tag name, or NULL if unnamed */
1557      bool is_func;              /* tag is a function */
1558      char *linestart;           /* start of the line where tag is */
1559      int linelen;               /* length of the line where tag is */
1560      int lno;                   /* line number */
1561      long cno;                  /* character number */
1562 {
1563   register node *np;
1564
1565   if (CTAGS && name == NULL)
1566     return;
1567
1568   np = xnew (1, node);
1569
1570   /* If ctags mode, change name "main" to M<thisfilename>. */
1571   if (CTAGS && !cxref_style && streq (name, "main"))
1572     {
1573       register char *fp = etags_strrchr (curfile, '/');
1574       np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
1575       fp = etags_strrchr (np->name, '.');
1576       if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
1577         fp[0] = '\0';
1578     }
1579   else
1580     np->name = name;
1581   np->been_warned = FALSE;
1582   np->file = curfile;
1583   np->is_func = is_func;
1584   np->lno = lno;
1585   /* Our char numbers are 0-base, because of C language tradition?
1586      ctags compatibility?  old versions compatibility?   I don't know.
1587      Anyway, since emacs's are 1-base we expect etags.el to take care
1588      of the difference.  If we wanted to have 1-based numbers, we would
1589      uncomment the +1 below. */
1590   np->cno = cno /* + 1 */ ;
1591   np->left = np->right = NULL;
1592   if (CTAGS && !cxref_style)
1593     {
1594       if (strlen (linestart) < 50)
1595         np->pat = concat (linestart, "$", "");
1596       else
1597         np->pat = savenstr (linestart, 50);
1598     }
1599   else
1600     np->pat = savenstr (linestart, linelen);
1601
1602   add_node (np, &head);
1603 }
1604
1605 /* Date: Wed, 22 Jan 1997 02:56:31 -0500 [last amended 18 Sep 1997]
1606  * From: Sam Kendall <kendall@mv.mv.com>
1607  * Subject: Proposal for firming up the TAGS format specification
1608  * To: F.Potorti@cnuce.cnr.it
1609  *
1610  * pfnote should emit the optimized form [unnamed tag] only if:
1611  *  1. name does not contain any of the characters " \t\r\n(),;";
1612  *  2. linestart contains name as either a rightmost, or rightmost but
1613  *     one character, substring;
1614  *  3. the character, if any, immediately before name in linestart must
1615  *     be one of the characters " \t(),;";
1616  *  4. the character, if any, immediately after name in linestart must
1617  *     also be one of the characters " \t(),;".
1618  *
1619  * The real implementation uses the notinname() macro, which recognises
1620  * characters slightly different form " \t\r\n(),;".  See the variable
1621  * `nonam'.
1622  */
1623 #define traditional_tag_style TRUE
1624 void
1625 new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
1626      char *name;                /* tag name, or NULL if unnamed */
1627      int namelen;               /* tag length */
1628      bool is_func;              /* tag is a function */
1629      char *linestart;           /* start of the line where tag is */
1630      int linelen;               /* length of the line where tag is */
1631      int lno;                   /* line number */
1632      long cno;                  /* character number */
1633 {
1634   register char *cp;
1635   bool named;
1636
1637   named = TRUE;
1638   if (!CTAGS)
1639     {
1640       for (cp = name; !notinname (*cp); cp++)
1641         continue;
1642       if (*cp == '\0')                          /* rule #1 */
1643         {
1644           cp = linestart + linelen - namelen;
1645           if (notinname (linestart[linelen-1]))
1646             cp -= 1;                            /* rule #4 */
1647           if (cp >= linestart                   /* rule #2 */
1648               && (cp == linestart
1649                   || notinname (cp[-1]))        /* rule #3 */
1650               && strneq (name, cp, namelen))    /* rule #2 */
1651             named = FALSE;      /* use unnamed tag */
1652         }
1653     }
1654
1655   if (named)
1656     name = savenstr (name, namelen);
1657   else
1658     name = NULL;
1659   pfnote (name, is_func, linestart, linelen, lno, cno);
1660 }
1661
1662 /*
1663  * free_tree ()
1664  *      recurse on left children, iterate on right children.
1665  */
1666 void
1667 free_tree (np)
1668      register node *np;
1669 {
1670   while (np)
1671     {
1672       register node *node_right = np->right;
1673       free_tree (np->left);
1674       if (np->name != NULL)
1675         free (np->name);
1676       free (np->pat);
1677       free (np);
1678       np = node_right;
1679     }
1680 }
1681
1682 /*
1683  * add_node ()
1684  *      Adds a node to the tree of nodes.  In etags mode, we don't keep
1685  *      it sorted; we just keep a linear list.  In ctags mode, maintain
1686  *      an ordered tree, with no attempt at balancing.
1687  *
1688  *      add_node is the only function allowed to add nodes, so it can
1689  *      maintain state.
1690  */
1691 void
1692 add_node (np, cur_node_p)
1693      node *np, **cur_node_p;
1694 {
1695   register int dif;
1696   register node *cur_node = *cur_node_p;
1697
1698   if (cur_node == NULL)
1699     {
1700       *cur_node_p = np;
1701       last_node = np;
1702       return;
1703     }
1704
1705   if (!CTAGS)
1706     {
1707       /* Etags Mode */
1708       if (last_node == NULL)
1709         fatal ("internal error in add_node", (char *)NULL);
1710       last_node->right = np;
1711       last_node = np;
1712     }
1713   else
1714     {
1715       /* Ctags Mode */
1716       dif = strcmp (np->name, cur_node->name);
1717
1718       /*
1719        * If this tag name matches an existing one, then
1720        * do not add the node, but maybe print a warning.
1721        */
1722       if (!dif)
1723         {
1724           if (streq (np->file, cur_node->file))
1725             {
1726               if (!no_warnings)
1727                 {
1728                   fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
1729                            np->file, lineno, np->name);
1730                   fprintf (stderr, "Second entry ignored\n");
1731                 }
1732             }
1733           else if (!cur_node->been_warned && !no_warnings)
1734             {
1735               fprintf
1736                 (stderr,
1737                  "Duplicate entry in files %s and %s: %s (Warning only)\n",
1738                  np->file, cur_node->file, np->name);
1739               cur_node->been_warned = TRUE;
1740             }
1741           return;
1742         }
1743
1744       /* Actually add the node */
1745       add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
1746     }
1747 }
1748 \f
1749 void
1750 put_entries (np)
1751      register node *np;
1752 {
1753   register char *sp;
1754
1755   if (np == NULL)
1756     return;
1757
1758   /* Output subentries that precede this one */
1759   put_entries (np->left);
1760
1761   /* Output this entry */
1762
1763   if (!CTAGS)
1764     {
1765       if (np->name != NULL)
1766         fprintf (tagf, "%s\177%s\001%d,%ld\n",
1767                  np->pat, np->name, np->lno, np->cno);
1768       else
1769         fprintf (tagf, "%s\177%d,%ld\n",
1770                  np->pat, np->lno, np->cno);
1771     }
1772   else
1773     {
1774       if (np->name == NULL)
1775         error ("internal error: NULL name in ctags mode.", (char *)NULL);
1776
1777       if (cxref_style)
1778         {
1779           if (vgrind_style)
1780             fprintf (stdout, "%s %s %d\n",
1781                      np->name, np->file, (np->lno + 63) / 64);
1782           else
1783             fprintf (stdout, "%-16s %3d %-16s %s\n",
1784                      np->name, np->lno, np->file, np->pat);
1785         }
1786       else
1787         {
1788           fprintf (tagf, "%s\t%s\t", np->name, np->file);
1789
1790           if (np->is_func)
1791             {                   /* a function */
1792               putc (searchar, tagf);
1793               putc ('^', tagf);
1794
1795               for (sp = np->pat; *sp; sp++)
1796                 {
1797                   if (*sp == '\\' || *sp == searchar)
1798                     putc ('\\', tagf);
1799                   putc (*sp, tagf);
1800                 }
1801               putc (searchar, tagf);
1802             }
1803           else
1804             {                   /* a typedef; text pattern inadequate */
1805               fprintf (tagf, "%d", np->lno);
1806             }
1807           putc ('\n', tagf);
1808         }
1809     }
1810
1811   /* Output subentries that follow this one */
1812   put_entries (np->right);
1813 }
1814
1815 /* Length of a number's decimal representation. */
1816 int
1817 number_len (num)
1818      long num;
1819 {
1820   int len = 1;
1821   while ((num /= 10) > 0)
1822     len += 1;
1823   return len;
1824 }
1825
1826 /*
1827  * Return total number of characters that put_entries will output for
1828  * the nodes in the subtree of the specified node.  Works only if
1829  * we are not ctags, but called only in that case.  This count
1830  * is irrelevant with the new tags.el, but is still supplied for
1831  * backward compatibility.
1832  */
1833 int
1834 total_size_of_entries (np)
1835      register node *np;
1836 {
1837   register int total;
1838
1839   if (np == NULL)
1840     return 0;
1841
1842   for (total = 0; np != NULL; np = np->right)
1843     {
1844       /* Count left subentries. */
1845       total += total_size_of_entries (np->left);
1846
1847       /* Count this entry */
1848       total += strlen (np->pat) + 1;
1849       total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
1850       if (np->name != NULL)
1851         total += 1 + strlen (np->name); /* \001name */
1852     }
1853
1854   return total;
1855 }
1856 \f
1857 /*
1858  * The C symbol tables.
1859  */
1860 enum sym_type
1861 {
1862   st_none,
1863   st_C_objprot, st_C_objimpl, st_C_objend,
1864   st_C_gnumacro,
1865   st_C_ignore,
1866   st_C_javastruct,
1867   st_C_operator,
1868   st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
1869 };
1870
1871 /* Feed stuff between (but not including) %[ and %] lines to:
1872       gperf -c -k 1,3 -o -p -r -t
1873 %[
1874 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
1875 %%
1876 if,             0,      st_C_ignore
1877 for,            0,      st_C_ignore
1878 while,          0,      st_C_ignore
1879 switch,         0,      st_C_ignore
1880 return,         0,      st_C_ignore
1881 @interface,     0,      st_C_objprot
1882 @protocol,      0,      st_C_objprot
1883 @implementation,0,      st_C_objimpl
1884 @end,           0,      st_C_objend
1885 import,         C_JAVA, st_C_ignore
1886 package,        C_JAVA, st_C_ignore
1887 friend,         C_PLPL, st_C_ignore
1888 extends,        C_JAVA, st_C_javastruct
1889 implements,     C_JAVA, st_C_javastruct
1890 interface,      C_JAVA, st_C_struct
1891 class,          C_PLPL, st_C_struct
1892 namespace,      C_PLPL, st_C_struct
1893 domain,         C_STAR, st_C_struct
1894 union,          0,      st_C_struct
1895 struct,         0,      st_C_struct
1896 extern,         0,      st_C_extern
1897 enum,           0,      st_C_enum
1898 typedef,        0,      st_C_typedef
1899 define,         0,      st_C_define
1900 operator,       C_PLPL, st_C_operator
1901 bool,           C_PLPL, st_C_typespec
1902 long,           0,      st_C_typespec
1903 short,          0,      st_C_typespec
1904 int,            0,      st_C_typespec
1905 char,           0,      st_C_typespec
1906 float,          0,      st_C_typespec
1907 double,         0,      st_C_typespec
1908 signed,         0,      st_C_typespec
1909 unsigned,       0,      st_C_typespec
1910 auto,           0,      st_C_typespec
1911 void,           0,      st_C_typespec
1912 static,         0,      st_C_typespec
1913 const,          0,      st_C_typespec
1914 volatile,       0,      st_C_typespec
1915 explicit,       C_PLPL, st_C_typespec
1916 mutable,        C_PLPL, st_C_typespec
1917 typename,       C_PLPL, st_C_typespec
1918 # DEFUN used in emacs, the next three used in glibc (SYSCALL only for mach).
1919 DEFUN,          0,      st_C_gnumacro
1920 SYSCALL,        0,      st_C_gnumacro
1921 ENTRY,          0,      st_C_gnumacro
1922 PSEUDO,         0,      st_C_gnumacro
1923 # These are defined inside C functions, so currently they are not met.
1924 # EXFUN used in glibc, DEFVAR_* in emacs.
1925 #EXFUN,         0,      st_C_gnumacro
1926 #DEFVAR_,       0,      st_C_gnumacro
1927 %]
1928 and replace lines between %< and %> with its output. */
1929 /*%<*/
1930 /* C code produced by gperf version 2.7.1 (19981006 egcs) */
1931 /* Command-line: gperf -c -k 1,3 -o -p -r -t  */
1932 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
1933
1934 #define TOTAL_KEYWORDS 46
1935 #define MIN_WORD_LENGTH 2
1936 #define MAX_WORD_LENGTH 15
1937 #define MIN_HASH_VALUE 13
1938 #define MAX_HASH_VALUE 123
1939 /* maximum key range = 111, duplicates = 0 */
1940
1941 #ifdef __GNUC__
1942 __inline
1943 #endif
1944 static unsigned int
1945 hash (str, len)
1946      register const char *str;
1947      register unsigned int len;
1948 {
1949   static unsigned char asso_values[] =
1950     {
1951       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1952       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1953       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1954       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1955       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1956       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1957       124, 124, 124, 124,   3, 124, 124, 124,  43,   6,
1958        11, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1959        11, 124, 124,  58,   7, 124, 124, 124, 124, 124,
1960       124, 124, 124, 124, 124, 124, 124,  57,   7,  42,
1961         4,  14,  52,   0, 124,  53, 124, 124,  29,  11,
1962         6,  35,  32, 124,  29,  34,  59,  58,  51,  24,
1963       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1964       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1965       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1966       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1967       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1968       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1969       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1970       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1971       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1972       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1973       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1974       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1975       124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
1976       124, 124, 124, 124, 124, 124
1977     };
1978   register int hval = len;
1979
1980   switch (hval)
1981     {
1982       default:
1983       case 3:
1984         hval += asso_values[(unsigned char)str[2]];
1985       case 2:
1986       case 1:
1987         hval += asso_values[(unsigned char)str[0]];
1988         break;
1989     }
1990   return hval;
1991 }
1992
1993 #ifdef __GNUC__
1994 __inline
1995 #endif
1996 struct C_stab_entry *
1997 in_word_set (str, len)
1998      register const char *str;
1999      register unsigned int len;
2000 {
2001   static struct C_stab_entry wordlist[] =
2002     {
2003       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2004       {""}, {""}, {""}, {""},
2005       {"@end",          0,      st_C_objend},
2006       {""}, {""}, {""}, {""},
2007       {"ENTRY",         0,      st_C_gnumacro},
2008       {"@interface",    0,      st_C_objprot},
2009       {""},
2010       {"domain",        C_STAR, st_C_struct},
2011       {""},
2012       {"PSEUDO",                0,      st_C_gnumacro},
2013       {""}, {""},
2014       {"namespace",     C_PLPL, st_C_struct},
2015       {""}, {""},
2016       {"@implementation",0,     st_C_objimpl},
2017       {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
2018       {"long",          0,      st_C_typespec},
2019       {"signed",        0,      st_C_typespec},
2020       {"@protocol",     0,      st_C_objprot},
2021       {""}, {""}, {""}, {""},
2022       {"bool",          C_PLPL, st_C_typespec},
2023       {""}, {""}, {""}, {""}, {""}, {""},
2024       {"const",         0,      st_C_typespec},
2025       {"explicit",      C_PLPL, st_C_typespec},
2026       {"if",            0,      st_C_ignore},
2027       {""},
2028       {"operator",      C_PLPL, st_C_operator},
2029       {""},
2030       {"DEFUN",         0,      st_C_gnumacro},
2031       {""}, {""},
2032       {"define",        0,      st_C_define},
2033       {""}, {""}, {""}, {""}, {""},
2034       {"double",        0,      st_C_typespec},
2035       {"struct",        0,      st_C_struct},
2036       {""}, {""}, {""}, {""},
2037       {"short",         0,      st_C_typespec},
2038       {""},
2039       {"enum",          0,      st_C_enum},
2040       {"mutable",       C_PLPL, st_C_typespec},
2041       {""},
2042       {"extern",        0,      st_C_extern},
2043       {"extends",       C_JAVA, st_C_javastruct},
2044       {"package",       C_JAVA, st_C_ignore},
2045       {"while",         0,      st_C_ignore},
2046       {""},
2047       {"for",           0,      st_C_ignore},
2048       {""}, {""}, {""},
2049       {"volatile",      0,      st_C_typespec},
2050       {""}, {""},
2051       {"import",                C_JAVA, st_C_ignore},
2052       {"float",         0,      st_C_typespec},
2053       {"switch",                0,      st_C_ignore},
2054       {"return",                0,      st_C_ignore},
2055       {"implements",    C_JAVA, st_C_javastruct},
2056       {""},
2057       {"static",        0,      st_C_typespec},
2058       {"typedef",       0,      st_C_typedef},
2059       {"typename",      C_PLPL, st_C_typespec},
2060       {"unsigned",      0,      st_C_typespec},
2061       {""}, {""},
2062       {"char",          0,      st_C_typespec},
2063       {"class",         C_PLPL, st_C_struct},
2064       {""}, {""}, {""},
2065       {"void",          0,      st_C_typespec},
2066       {""}, {""},
2067       {"friend",                C_PLPL, st_C_ignore},
2068       {""}, {""}, {""},
2069       {"int",           0,      st_C_typespec},
2070       {"union",         0,      st_C_struct},
2071       {""}, {""}, {""},
2072       {"auto",          0,      st_C_typespec},
2073       {"interface",     C_JAVA, st_C_struct},
2074       {""},
2075       {"SYSCALL",       0,      st_C_gnumacro}
2076     };
2077
2078   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
2079     {
2080       register int key = hash (str, len);
2081
2082       if (key <= MAX_HASH_VALUE && key >= 0)
2083         {
2084           register const char *s = wordlist[key].name;
2085
2086           if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
2087             return &wordlist[key];
2088         }
2089     }
2090   return 0;
2091 }
2092 /*%>*/
2093
2094 enum sym_type
2095 C_symtype (str, len, c_ext)
2096      char *str;
2097      int len;
2098      int c_ext;
2099 {
2100   register struct C_stab_entry *se = in_word_set (str, len);
2101
2102   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
2103     return st_none;
2104   return se->type;
2105 }
2106 \f
2107  /*
2108   * C functions and variables are recognized using a simple
2109   * finite automaton.  fvdef is its state variable.
2110   */
2111 enum
2112 {
2113   fvnone,                       /* nothing seen */
2114   foperator,                    /* func: operator keyword seen (cplpl) */
2115   fvnameseen,                   /* function or variable name seen */
2116   fstartlist,                   /* func: just after open parenthesis */
2117   finlist,                      /* func: in parameter list */
2118   flistseen,                    /* func: after parameter list */
2119   fignore,                      /* func: before open brace */
2120   vignore                       /* var-like: ignore until ';' */
2121 } fvdef;
2122
2123 bool fvextern;                  /* func or var: extern keyword seen; */
2124
2125  /*
2126   * typedefs are recognized using a simple finite automaton.
2127   * typdef is its state variable.
2128   */
2129 enum
2130 {
2131   tnone,                        /* nothing seen */
2132   tkeyseen,                     /* typedef keyword seen */
2133   ttypeseen,                    /* defined type seen */
2134   tinbody,                      /* inside typedef body */
2135   tend,                         /* just before typedef tag */
2136   tignore                       /* junk after typedef tag */
2137 } typdef;
2138
2139
2140  /*
2141   * struct-like structures (enum, struct and union) are recognized
2142   * using another simple finite automaton.  `structdef' is its state
2143   * variable.
2144   */
2145 enum
2146 {
2147   snone,                        /* nothing seen yet */
2148   skeyseen,                     /* struct-like keyword seen */
2149   stagseen,                     /* struct-like tag seen */
2150   scolonseen,                   /* colon seen after struct-like tag */
2151   sinbody                       /* in struct body: recognize member func defs*/
2152 } structdef;
2153
2154 /*
2155  * When structdef is stagseen, scolonseen, or sinbody, structtag is the
2156  * struct tag, and structtype is the type of the preceding struct-like
2157  * keyword.
2158  */
2159 char *structtag = "<uninited>";
2160 enum sym_type structtype;
2161
2162 /*
2163  * When objdef is different from onone, objtag is the name of the class.
2164  */
2165 char *objtag = "<uninited>";
2166
2167 /*
2168  * Yet another little state machine to deal with preprocessor lines.
2169  */
2170 enum
2171 {
2172   dnone,                        /* nothing seen */
2173   dsharpseen,                   /* '#' seen as first char on line */
2174   ddefineseen,                  /* '#' and 'define' seen */
2175   dignorerest                   /* ignore rest of line */
2176 } definedef;
2177
2178 /*
2179  * State machine for Objective C protocols and implementations.
2180  * Tom R.Hageman <tom@basil.icce.rug.nl>
2181  */
2182 enum
2183 {
2184   onone,                        /* nothing seen */
2185   oprotocol,                    /* @interface or @protocol seen */
2186   oimplementation,              /* @implementations seen */
2187   otagseen,                     /* class name seen */
2188   oparenseen,                   /* parenthesis before category seen */
2189   ocatseen,                     /* category name seen */
2190   oinbody,                      /* in @implementation body */
2191   omethodsign,                  /* in @implementation body, after +/- */
2192   omethodtag,                   /* after method name */
2193   omethodcolon,                 /* after method colon */
2194   omethodparm,                  /* after method parameter */
2195   oignore                       /* wait for @end */
2196 } objdef;
2197
2198
2199 /*
2200  * Use this structure to keep info about the token read, and how it
2201  * should be tagged.  Used by the make_C_tag function to build a tag.
2202  */
2203 typedef struct
2204 {
2205   bool valid;
2206   char *str;
2207   bool named;
2208   int linelen;
2209   int lineno;
2210   long linepos;
2211   char *buffer;
2212 } token;
2213
2214 token tok;                      /* latest token read */
2215
2216 /*
2217  * Set this to TRUE, and the next token considered is called a function.
2218  * Used only for GNU emacs's function-defining macros.
2219  */
2220 bool next_token_is_func;
2221
2222 /*
2223  * TRUE in the rules part of a yacc file, FALSE outside (parse as C).
2224  */
2225 bool yacc_rules;
2226
2227 /*
2228  * methodlen is the length of the method name stored in token_name.
2229  */
2230 int methodlen;
2231
2232 /*
2233  * consider_token ()
2234  *      checks to see if the current token is at the start of a
2235  *      function or variable, or corresponds to a typedef, or
2236  *      is a struct/union/enum tag, or #define, or an enum constant.
2237  *
2238  *      *IS_FUNC gets TRUE iff the token is a function or #define macro
2239  *      with args.  C_EXT is which language we are looking at.
2240  *
2241  * Globals
2242  *      fvdef                   IN OUT
2243  *      structdef               IN OUT
2244  *      definedef               IN OUT
2245  *      typdef                  IN OUT
2246  *      objdef                  IN OUT
2247  *      next_token_is_func      IN OUT
2248  */
2249
2250 bool
2251 consider_token (str, len, c, c_ext, cblev, parlev, is_func_or_var)
2252      register char *str;        /* IN: token pointer */
2253      register int len;          /* IN: token length */
2254      register char c;           /* IN: first char after the token */
2255      int c_ext;                 /* IN: C extensions mask */
2256      int cblev;                 /* IN: curly brace level */
2257      int parlev;                /* IN: parenthesis level */
2258      bool *is_func_or_var;      /* OUT: function or variable found */
2259 {
2260   enum sym_type toktype = C_symtype (str, len, c_ext);
2261
2262   /*
2263    * Advance the definedef state machine.
2264    */
2265   switch (definedef)
2266     {
2267     case dnone:
2268       /* We're not on a preprocessor line. */
2269       break;
2270     case dsharpseen:
2271       if (toktype == st_C_define)
2272         {
2273           definedef = ddefineseen;
2274         }
2275       else
2276         {
2277           definedef = dignorerest;
2278         }
2279       return FALSE;
2280     case ddefineseen:
2281       /*
2282        * Make a tag for any macro, unless it is a constant
2283        * and constantypedefs is FALSE.
2284        */
2285       definedef = dignorerest;
2286       *is_func_or_var = (c == '(');
2287       if (!*is_func_or_var && !constantypedefs)
2288         return FALSE;
2289       else
2290         return TRUE;
2291     case dignorerest:
2292       return FALSE;
2293     default:
2294       error ("internal error: definedef value.", (char *)NULL);
2295     }
2296
2297   /*
2298    * Now typedefs
2299    */
2300   switch (typdef)
2301     {
2302     case tnone:
2303       if (toktype == st_C_typedef)
2304         {
2305           if (typedefs)
2306             typdef = tkeyseen;
2307           fvextern = FALSE;
2308           fvdef = fvnone;
2309           return FALSE;
2310         }
2311       break;
2312     case tkeyseen:
2313       switch (toktype)
2314         {
2315         case st_none:
2316         case st_C_typespec:
2317         case st_C_struct:
2318         case st_C_enum:
2319           typdef = ttypeseen;
2320           break;
2321         }
2322       /* Do not return here, so the structdef stuff has a chance. */
2323       break;
2324     case tend:
2325       switch (toktype)
2326         {
2327         case st_C_typespec:
2328         case st_C_struct:
2329         case st_C_enum:
2330           return FALSE;
2331         }
2332       return TRUE;
2333     }
2334
2335   /*
2336    * This structdef business is currently only invoked when cblev==0.
2337    * It should be recursively invoked whatever the curly brace level,
2338    * and a stack of states kept, to allow for definitions of structs
2339    * within structs.
2340    *
2341    * This structdef business is NOT invoked when we are ctags and the
2342    * file is plain C.  This is because a struct tag may have the same
2343    * name as another tag, and this loses with ctags.
2344    */
2345   switch (toktype)
2346     {
2347     case st_C_javastruct:
2348       if (structdef == stagseen)
2349         structdef = scolonseen;
2350       return FALSE;
2351     case st_C_struct:
2352     case st_C_enum:
2353       if (typdef == tkeyseen
2354           || (typedefs_and_cplusplus && cblev == 0 && structdef == snone))
2355         {
2356           structdef = skeyseen;
2357           structtype = toktype;
2358         }
2359       return FALSE;
2360     }
2361
2362   if (structdef == skeyseen)
2363     {
2364       /* Save the tag for struct/union/class, for functions and variables
2365          that may be defined inside. */
2366       if (structtype == st_C_struct)
2367         structtag = savenstr (str, len);
2368       else
2369         structtag = "<enum>";
2370       structdef = stagseen;
2371       return TRUE;
2372     }
2373
2374   if (typdef != tnone)
2375     definedef = dnone;
2376
2377   /* Detect GNU macros.
2378
2379      Writers of emacs code are recommended to put the
2380      first two args of a DEFUN on the same line.
2381
2382       The DEFUN macro, used in emacs C source code, has a first arg
2383      that is a string (the lisp function name), and a second arg that
2384      is a C function name.  Since etags skips strings, the second arg
2385      is tagged.  This is unfortunate, as it would be better to tag the
2386      first arg.  The simplest way to deal with this problem would be
2387      to name the tag with a name built from the function name, by
2388      removing the initial 'F' character and substituting '-' for '_'.
2389      Anyway, this assumes that the conventions of naming lisp
2390      functions will never change.  Currently, this method is not
2391      implemented. */
2392   if (definedef == dnone && toktype == st_C_gnumacro)
2393     {
2394       next_token_is_func = TRUE;
2395       return FALSE;
2396     }
2397   if (next_token_is_func)
2398     {
2399       next_token_is_func = FALSE;
2400       fvdef = fignore;
2401       *is_func_or_var = TRUE;
2402       return TRUE;
2403     }
2404
2405   /* Detect Objective C constructs. */
2406   switch (objdef)
2407     {
2408     case onone:
2409       switch (toktype)
2410         {
2411         case st_C_objprot:
2412           objdef = oprotocol;
2413           return FALSE;
2414         case st_C_objimpl:
2415           objdef = oimplementation;
2416           return FALSE;
2417         }
2418       break;
2419     case oimplementation:
2420       /* Save the class tag for functions or variables defined inside. */
2421       objtag = savenstr (str, len);
2422       objdef = oinbody;
2423       return FALSE;
2424     case oprotocol:
2425       /* Save the class tag for categories. */
2426       objtag = savenstr (str, len);
2427       objdef = otagseen;
2428       *is_func_or_var = TRUE;
2429       return TRUE;
2430     case oparenseen:
2431       objdef = ocatseen;
2432       *is_func_or_var = TRUE;
2433       return TRUE;
2434     case oinbody:
2435       break;
2436     case omethodsign:
2437       if (parlev == 0)
2438         {
2439           objdef = omethodtag;
2440           methodlen = len;
2441           grow_linebuffer (&token_name, methodlen + 1);
2442           strncpy (token_name.buffer, str, len);
2443           token_name.buffer[methodlen] = '\0';
2444           token_name.len = methodlen;
2445           return TRUE;
2446         }
2447       return FALSE;
2448     case omethodcolon:
2449       if (parlev == 0)
2450         objdef = omethodparm;
2451       return FALSE;
2452     case omethodparm:
2453       if (parlev == 0)
2454         {
2455           objdef = omethodtag;
2456           methodlen += len;
2457           grow_linebuffer (&token_name, methodlen + 1);
2458           strncat (token_name.buffer, str, len);
2459           token_name.len = methodlen;
2460           return TRUE;
2461         }
2462       return FALSE;
2463     case oignore:
2464       if (toktype == st_C_objend)
2465         {
2466           /* Memory leakage here: the string pointed by objtag is
2467              never released, because many tests would be needed to
2468              avoid breaking on incorrect input code.  The amount of
2469              memory leaked here is the sum of the lengths of the
2470              class tags.
2471           free (objtag); */
2472           objdef = onone;
2473         }
2474       return FALSE;
2475     }
2476
2477   /* A function, variable or enum constant? */
2478   switch (toktype)
2479     {
2480     case st_C_extern:
2481       fvextern = TRUE;
2482       /* FALLTHRU */
2483     case st_C_typespec:
2484       if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
2485         fvdef = fvnone;         /* should be useless */
2486       return FALSE;
2487     case st_C_ignore:
2488       fvextern = FALSE;
2489       fvdef = vignore;
2490       return FALSE;
2491     case st_C_operator:
2492       fvdef = foperator;
2493       *is_func_or_var = TRUE;
2494       return TRUE;
2495     case st_none:
2496       if ((c_ext & C_PLPL) && strneq (str+len-10, "::operator", 10))
2497         {
2498           fvdef = foperator;
2499           *is_func_or_var = TRUE;
2500           return TRUE;
2501         }
2502       if (constantypedefs && structdef == sinbody && structtype == st_C_enum)
2503         return TRUE;
2504       if (fvdef == fvnone)
2505         {
2506           fvdef = fvnameseen;   /* function or variable */
2507           *is_func_or_var = TRUE;
2508           return TRUE;
2509         }
2510       break;
2511     }
2512
2513   return FALSE;
2514 }
2515
2516 /*
2517  * C_entries ()
2518  *      This routine finds functions, variables, typedefs,
2519  *      #define's, enum constants and struct/union/enum definitions in
2520  *      C syntax and adds them to the list.
2521  */
2522 #define current_lb_is_new (newndx == curndx)
2523 #define switch_line_buffers() (curndx = 1 - curndx)
2524
2525 #define curlb (lbs[curndx].lb)
2526 #define othlb (lbs[1-curndx].lb)
2527 #define newlb (lbs[newndx].lb)
2528 #define curlinepos (lbs[curndx].linepos)
2529 #define othlinepos (lbs[1-curndx].linepos)
2530 #define newlinepos (lbs[newndx].linepos)
2531
2532 #define CNL_SAVE_DEFINEDEF()                                            \
2533 do {                                                                    \
2534   curlinepos = charno;                                                  \
2535   lineno++;                                                             \
2536   linecharno = charno;                                                  \
2537   charno += readline (&curlb, inf);                                     \
2538   lp = curlb.buffer;                                                    \
2539   quotednl = FALSE;                                                     \
2540   newndx = curndx;                                                      \
2541 } while (0)
2542
2543 #define CNL()                                                           \
2544 do {                                                                    \
2545   CNL_SAVE_DEFINEDEF();                                                 \
2546   if (savetok.valid)                                                    \
2547     {                                                                   \
2548       tok = savetok;                                                    \
2549       savetok.valid = FALSE;                                            \
2550     }                                                                   \
2551   definedef = dnone;                                                    \
2552 } while (0)
2553
2554
2555 void
2556 make_C_tag (isfun)
2557      bool isfun;
2558 {
2559   /* This function should never be called when tok.valid is FALSE, but
2560      we must protect against invalid input or internal errors. */
2561   if (tok.valid)
2562     {
2563       if (traditional_tag_style)
2564         {
2565           /* This was the original code.  Now we call new_pfnote instead,
2566              which uses the new method for naming tags (see new_pfnote). */
2567           char *name = NULL;
2568
2569           if (CTAGS || tok.named)
2570             name = savestr (token_name.buffer);
2571           pfnote (name, isfun,
2572                   tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2573         }
2574       else
2575         new_pfnote (token_name.buffer, token_name.len, isfun,
2576                     tok.buffer, tok.linelen, tok.lineno, tok.linepos);
2577       tok.valid = FALSE;
2578     }
2579   else if (DEBUG)
2580     abort ();
2581 }
2582
2583
2584 void
2585 C_entries (c_ext, inf)
2586      int c_ext;                 /* extension of C */
2587      FILE *inf;                 /* input file */
2588 {
2589   register char c;              /* latest char read; '\0' for end of line */
2590   register char *lp;            /* pointer one beyond the character `c' */
2591   int curndx, newndx;           /* indices for current and new lb */
2592   register int tokoff;          /* offset in line of start of current token */
2593   register int toklen;          /* length of current token */
2594   char *qualifier;              /* string used to qualify names */
2595   int qlen;                     /* length of qualifier */
2596   int cblev;                    /* current curly brace level */
2597   int parlev;                   /* current parenthesis level */
2598   bool incomm, inquote, inchar, quotednl, midtoken;
2599   bool purec, cplpl, cjava;
2600   token savetok;                /* token saved during preprocessor handling */
2601
2602
2603   tokoff = toklen = 0;          /* keep compiler quiet */
2604   curndx = newndx = 0;
2605   lineno = 0;
2606   charno = 0;
2607   lp = curlb.buffer;
2608   *lp = 0;
2609
2610   fvdef = fvnone; fvextern = FALSE; typdef = tnone;
2611   structdef = snone; definedef = dnone; objdef = onone;
2612   next_token_is_func = yacc_rules = FALSE;
2613   midtoken = inquote = inchar = incomm = quotednl = FALSE;
2614   tok.valid = savetok.valid = FALSE;
2615   cblev = 0;
2616   parlev = 0;
2617   purec = !(c_ext & ~YACC);     /* no extensions (apart from possibly yacc) */
2618   cplpl = (c_ext & C_PLPL) == C_PLPL;
2619   cjava = (c_ext & C_JAVA) == C_JAVA;
2620   if (cjava)
2621     { qualifier = "."; qlen = 1; }
2622   else
2623     { qualifier = "::"; qlen = 2; }
2624
2625   while (!feof (inf))
2626     {
2627       c = *lp++;
2628       if (c == '\\')
2629         {
2630           /* If we're at the end of the line, the next character is a
2631              '\0'; don't skip it, because it's the thing that tells us
2632              to read the next line.  */
2633           if (*lp == '\0')
2634             {
2635               quotednl = TRUE;
2636               continue;
2637             }
2638           lp++;
2639           c = ' ';
2640         }
2641       else if (incomm)
2642         {
2643           switch (c)
2644             {
2645             case '*':
2646               if (*lp == '/')
2647                 {
2648                   c = *lp++;
2649                   incomm = FALSE;
2650                 }
2651               break;
2652             case '\0':
2653               /* Newlines inside comments do not end macro definitions in
2654                  traditional cpp. */
2655               CNL_SAVE_DEFINEDEF ();
2656               break;
2657             }
2658           continue;
2659         }
2660       else if (inquote)
2661         {
2662           switch (c)
2663             {
2664             case '"':
2665               inquote = FALSE;
2666               break;
2667             case '\0':
2668               /* Newlines inside strings do not end macro definitions
2669                  in traditional cpp, even though compilers don't
2670                  usually accept them. */
2671               CNL_SAVE_DEFINEDEF ();
2672               break;
2673             }
2674           continue;
2675         }
2676       else if (inchar)
2677         {
2678           switch (c)
2679             {
2680             case '\0':
2681               /* Hmmm, something went wrong. */
2682               CNL ();
2683               /* FALLTHRU */
2684             case '\'':
2685               inchar = FALSE;
2686               break;
2687             }
2688           continue;
2689         }
2690       else
2691         switch (c)
2692           {
2693           case '"':
2694             inquote = TRUE;
2695             if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2696               {
2697                 fvextern = FALSE;
2698                 fvdef = fvnone;
2699               }
2700             continue;
2701           case '\'':
2702             inchar = TRUE;
2703             if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
2704               {
2705                 fvextern = FALSE;
2706                 fvdef = fvnone;
2707               }
2708             continue;
2709           case '/':
2710             if (*lp == '*')
2711               {
2712                 lp++;
2713                 incomm = TRUE;
2714                 continue;
2715               }
2716             else if (/* cplpl && */ *lp == '/')
2717               {
2718                 c = '\0';
2719                 break;
2720               }
2721             else
2722               break;
2723           case '%':
2724             if ((c_ext & YACC) && *lp == '%')
2725               {
2726                 /* entering or exiting rules section in yacc file */
2727                 lp++;
2728                 definedef = dnone; fvdef = fvnone; fvextern = FALSE;
2729                 typdef = tnone; structdef = snone;
2730                 next_token_is_func = FALSE;
2731                 midtoken = inquote = inchar = incomm = quotednl = FALSE;
2732                 cblev = 0;
2733                 yacc_rules = !yacc_rules;
2734                 continue;
2735               }
2736             else
2737               break;
2738           case '#':
2739             if (definedef == dnone)
2740               {
2741                 char *cp;
2742                 bool cpptoken = TRUE;
2743
2744                 /* Look back on this line.  If all blanks, or nonblanks
2745                    followed by an end of comment, this is a preprocessor
2746                    token. */
2747                 for (cp = newlb.buffer; cp < lp-1; cp++)
2748                   if (!iswhite (*cp))
2749                     {
2750                       if (*cp == '*' && *(cp+1) == '/')
2751                         {
2752                           cp++;
2753                           cpptoken = TRUE;
2754                         }
2755                       else
2756                         cpptoken = FALSE;
2757                     }
2758                 if (cpptoken)
2759                   definedef = dsharpseen;
2760               } /* if (definedef == dnone) */
2761
2762             continue;
2763           } /* switch (c) */
2764
2765
2766       /* Consider token only if some complicated conditions are satisfied. */
2767       if ((definedef != dnone
2768            || (cblev == 0 && structdef != scolonseen)
2769            || (cblev == 1 && cplpl && structdef == sinbody)
2770            || (structdef == sinbody && purec))
2771           && typdef != tignore
2772           && definedef != dignorerest
2773           && fvdef != finlist)
2774         {
2775           if (midtoken)
2776             {
2777               if (endtoken (c))
2778                 {
2779                   bool funorvar = FALSE;
2780
2781                   if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
2782                     {
2783                       /*
2784                        * This handles :: in the middle, but not at the
2785                        * beginning of an identifier.  Also, space-separated
2786                        * :: is not recognised.
2787                        */
2788                       lp += 2;
2789                       toklen += 2;
2790                       c = lp[-1];
2791                       goto intok;
2792                     }
2793                   else
2794                     {
2795                       if (yacc_rules
2796                           || consider_token (newlb.buffer + tokoff, toklen, c,
2797                                              c_ext, cblev, parlev, &funorvar))
2798                         {
2799                           if (fvdef == foperator)
2800                             {
2801                               char *oldlp = lp;
2802                               lp = skip_spaces (lp-1);
2803                               if (*lp != '\0')
2804                                 lp += 1;
2805                               while (*lp != '\0'
2806                                      && !isspace (*lp) && *lp != '(')
2807                                 lp += 1;
2808                               c = *lp++;
2809                               toklen += lp - oldlp;
2810                             }
2811                           tok.named = FALSE;
2812                           if (!purec
2813                               && funorvar
2814                               && definedef == dnone
2815                               && structdef == sinbody)
2816                             /* function or var defined in C++ class body */
2817                             {
2818                               int len = strlen (structtag) + qlen + toklen;
2819                               grow_linebuffer (&token_name, len + 1);
2820                               strcpy (token_name.buffer, structtag);
2821                               strcat (token_name.buffer, qualifier);
2822                               strncat (token_name.buffer,
2823                                        newlb.buffer + tokoff, toklen);
2824                               token_name.len = len;
2825                               tok.named = TRUE;
2826                             }
2827                           else if (objdef == ocatseen)
2828                             /* Objective C category */
2829                             {
2830                               int len = strlen (objtag) + 2 + toklen;
2831                               grow_linebuffer (&token_name, len + 1);
2832                               strcpy (token_name.buffer, objtag);
2833                               strcat (token_name.buffer, "(");
2834                               strncat (token_name.buffer,
2835                                        newlb.buffer + tokoff, toklen);
2836                               strcat (token_name.buffer, ")");
2837                               token_name.len = len;
2838                               tok.named = TRUE;
2839                             }
2840                           else if (objdef == omethodtag
2841                                    || objdef == omethodparm)
2842                             /* Objective C method */
2843                             {
2844                               tok.named = TRUE;
2845                             }
2846                           else
2847                             {
2848                               grow_linebuffer (&token_name, toklen + 1);
2849                               strncpy (token_name.buffer,
2850                                        newlb.buffer + tokoff, toklen);
2851                               token_name.buffer[toklen] = '\0';
2852                               token_name.len = toklen;
2853                               /* Name macros and members. */
2854                               tok.named = (structdef == stagseen
2855                                            || typdef == ttypeseen
2856                                            || typdef == tend
2857                                            || (funorvar
2858                                                && definedef == dignorerest)
2859                                            || (funorvar
2860                                                && definedef == dnone
2861                                                && structdef == sinbody));
2862                             }
2863                           tok.lineno = lineno;
2864                           tok.linelen = tokoff + toklen + 1;
2865                           tok.buffer = newlb.buffer;
2866                           tok.linepos = newlinepos;
2867                           tok.valid = TRUE;
2868
2869                           if (definedef == dnone
2870                               && (fvdef == fvnameseen
2871                                   || fvdef == foperator
2872                                   || structdef == stagseen
2873                                   || typdef == tend
2874                                   || objdef != onone))
2875                             {
2876                               if (current_lb_is_new)
2877                                 switch_line_buffers ();
2878                             }
2879                           else
2880                             make_C_tag (funorvar);
2881                         }
2882                       midtoken = FALSE;
2883                     }
2884                 } /* if (endtoken (c)) */
2885               else if (intoken (c))
2886                 intok:
2887                 {
2888                   toklen++;
2889                   continue;
2890                 }
2891             } /* if (midtoken) */
2892           else if (begtoken (c))
2893             {
2894               switch (definedef)
2895                 {
2896                 case dnone:
2897                   switch (fvdef)
2898                     {
2899                     case fstartlist:
2900                       fvdef = finlist;
2901                       continue;
2902                     case flistseen:
2903                       make_C_tag (TRUE); /* a function */
2904                       fvdef = fignore;
2905                       break;
2906                     case fvnameseen:
2907                       fvdef = fvnone;
2908                       break;
2909                     }
2910                   if (structdef == stagseen && !cjava)
2911                     structdef = snone;
2912                   break;
2913                 case dsharpseen:
2914                   savetok = tok;
2915                 }
2916               if (!yacc_rules || lp == newlb.buffer + 1)
2917                 {
2918                   tokoff = lp - 1 - newlb.buffer;
2919                   toklen = 1;
2920                   midtoken = TRUE;
2921                 }
2922               continue;
2923             } /* if (begtoken) */
2924         } /* if must look at token */
2925
2926
2927       /* Detect end of line, colon, comma, semicolon and various braces
2928          after having handled a token.*/
2929       switch (c)
2930         {
2931         case ':':
2932           if (definedef != dnone)
2933             break;
2934           switch (objdef)
2935             {
2936             case  otagseen:
2937               objdef = oignore;
2938               make_C_tag (TRUE); /* an Objective C class */
2939               break;
2940             case omethodtag:
2941             case omethodparm:
2942               objdef = omethodcolon;
2943               methodlen += 1;
2944               grow_linebuffer (&token_name, methodlen + 1);
2945               strcat (token_name.buffer, ":");
2946               token_name.len = methodlen;
2947               break;
2948             }
2949           if (structdef == stagseen)
2950             structdef = scolonseen;
2951           else
2952             switch (fvdef)
2953               {
2954               case fvnameseen:
2955                 if (yacc_rules)
2956                   {
2957                     make_C_tag (FALSE); /* a yacc function */
2958                     fvdef = fignore;
2959                   }
2960                 break;
2961               case fstartlist:
2962                 fvextern = FALSE;
2963                 fvdef = fvnone;
2964                 break;
2965               }
2966           break;
2967         case ';':
2968           if (definedef != dnone)
2969             break;
2970           if (cblev == 0)
2971             switch (typdef)
2972               {
2973               case tend:
2974                 make_C_tag (FALSE); /* a typedef */
2975                 /* FALLTHRU */
2976               default:
2977                 typdef = tnone;
2978               }
2979           switch (fvdef)
2980             {
2981             case fignore:
2982               break;
2983             case fvnameseen:
2984               if ((members && cblev == 1)
2985                   || (globals && cblev == 0 && (!fvextern || declarations)))
2986                 make_C_tag (FALSE); /* a variable */
2987               fvextern = FALSE;
2988               fvdef = fvnone;
2989               tok.valid = FALSE;
2990               break;
2991             case flistseen:
2992               if (declarations && (cblev == 0 || cblev == 1))
2993                 make_C_tag (TRUE); /* a function declaration */
2994               /* FALLTHRU */
2995             default:
2996               fvextern = FALSE;
2997               fvdef = fvnone;
2998               /* The following instruction invalidates the token.
2999                  Probably the token should be invalidated in all
3000                  other cases  where some state machine is reset. */
3001               tok.valid = FALSE;
3002             }
3003           if (structdef == stagseen)
3004             structdef = snone;
3005           break;
3006         case ',':
3007           if (definedef != dnone)
3008             break;
3009           switch (objdef)
3010             {
3011             case omethodtag:
3012             case omethodparm:
3013               make_C_tag (TRUE); /* an Objective C method */
3014               objdef = oinbody;
3015               break;
3016             }
3017           switch (fvdef)
3018             {
3019             case foperator:
3020             case finlist:
3021             case fignore:
3022             case vignore:
3023               break;
3024             case fvnameseen:
3025               if ((members && cblev == 1)
3026                   || (globals && cblev == 0 && (!fvextern || declarations)))
3027                 make_C_tag (FALSE); /* a variable */
3028               break;
3029             default:
3030               fvdef = fvnone;
3031             }
3032           if (structdef == stagseen)
3033             structdef = snone;
3034           break;
3035         case '[':
3036           if (definedef != dnone)
3037             break;
3038           if (cblev == 0 && typdef == tend)
3039             {
3040               typdef = tignore;
3041               make_C_tag (FALSE);       /* a typedef */
3042               break;
3043             }
3044           switch (fvdef)
3045             {
3046             case foperator:
3047             case finlist:
3048             case fignore:
3049             case vignore:
3050               break;
3051             case fvnameseen:
3052               if ((members && cblev == 1)
3053                   || (globals && cblev == 0 && (!fvextern || declarations)))
3054                 make_C_tag (FALSE); /* a variable */
3055               /* FALLTHRU */
3056             default:
3057               fvdef = fvnone;
3058             }
3059           if (structdef == stagseen)
3060             structdef = snone;
3061           break;
3062         case '(':
3063           if (definedef != dnone)
3064             break;
3065           if (objdef == otagseen && parlev == 0)
3066             objdef = oparenseen;
3067           switch (fvdef)
3068             {
3069             case fvnameseen:
3070               if (typdef == ttypeseen
3071                   && tok.valid
3072                   && *lp != '*'
3073                   && structdef != sinbody)
3074                 {
3075                   /* This handles constructs like:
3076                      typedef void OperatorFun (int fun); */
3077                   make_C_tag (FALSE);
3078                   typdef = tignore;
3079                 }
3080               /* FALLTHRU */
3081             case foperator:
3082               fvdef = fstartlist;
3083               break;
3084             case flistseen:
3085               fvdef = finlist;
3086               break;
3087             }
3088           parlev++;
3089           break;
3090         case ')':
3091           if (definedef != dnone)
3092             break;
3093           if (objdef == ocatseen && parlev == 1)
3094             {
3095               make_C_tag (TRUE); /* an Objective C category */
3096               objdef = oignore;
3097             }
3098           if (--parlev == 0)
3099             {
3100               switch (fvdef)
3101                 {
3102                 case fstartlist:
3103                 case finlist:
3104                   fvdef = flistseen;
3105                   break;
3106                 }
3107               if (cblev == 0 && (typdef == tend))
3108                 {
3109                   typdef = tignore;
3110                   make_C_tag (FALSE); /* a typedef */
3111                 }
3112             }
3113           else if (parlev < 0)  /* can happen due to ill-conceived #if's. */
3114             parlev = 0;
3115           break;
3116         case '{':
3117           if (definedef != dnone)
3118             break;
3119           if (typdef == ttypeseen)
3120             typdef = tinbody;
3121           switch (structdef)
3122             {
3123             case skeyseen:      /* unnamed struct */
3124               structdef = sinbody;
3125               structtag = "_anonymous_";
3126               break;
3127             case stagseen:
3128             case scolonseen:    /* named struct */
3129               structdef = sinbody;
3130               make_C_tag (FALSE);       /* a struct */
3131               break;
3132             }
3133           switch (fvdef)
3134             {
3135             case flistseen:
3136               make_C_tag (TRUE); /* a function */
3137               /* FALLTHRU */
3138             case fignore:
3139               fvdef = fvnone;
3140               break;
3141             case fvnone:
3142               switch (objdef)
3143                 {
3144                 case otagseen:
3145                   make_C_tag (TRUE); /* an Objective C class */
3146                   objdef = oignore;
3147                   break;
3148                 case omethodtag:
3149                 case omethodparm:
3150                   make_C_tag (TRUE); /* an Objective C method */
3151                   objdef = oinbody;
3152                   break;
3153                 default:
3154                   /* Neutralize `extern "C" {' grot. */
3155                   if (cblev == 0 && structdef == snone && typdef == tnone)
3156                     cblev = -1;
3157                 }
3158             }
3159           cblev++;
3160           break;
3161         case '*':
3162           if (definedef != dnone)
3163             break;
3164           if (fvdef == fstartlist)
3165             fvdef = fvnone;     /* avoid tagging `foo' in `foo (*bar()) ()' */
3166           break;
3167         case '}':
3168           if (definedef != dnone)
3169             break;
3170           if (!noindentypedefs && lp == newlb.buffer + 1)
3171             {
3172               cblev = 0;        /* reset curly brace level if first column */
3173               parlev = 0;       /* also reset paren level, just in case... */
3174             }
3175           else if (cblev > 0)
3176             cblev--;
3177           if (cblev == 0)
3178             {
3179               if (typdef == tinbody)
3180                 typdef = tend;
3181               /* Memory leakage here: the string pointed by structtag is
3182                  never released, because I fear to miss something and
3183                  break things while freeing the area.  The amount of
3184                  memory leaked here is the sum of the lengths of the
3185                  struct tags.
3186               if (structdef == sinbody)
3187                 free (structtag); */
3188
3189               structdef = snone;
3190               structtag = "<error>";
3191             }
3192           break;
3193         case '=':
3194           if (definedef != dnone)
3195             break;
3196           switch (fvdef)
3197             {
3198             case foperator:
3199             case finlist:
3200             case fignore:
3201             case vignore:
3202               break;
3203             case fvnameseen:
3204               if ((members && cblev == 1)
3205                   || (globals && cblev == 0 && (!fvextern || declarations)))
3206                 make_C_tag (FALSE); /* a variable */
3207               /* FALLTHRU */
3208             default:
3209               fvdef = vignore;
3210             }
3211           break;
3212         case '+':
3213         case '-':
3214           if (objdef == oinbody && cblev == 0)
3215             {
3216               objdef = omethodsign;
3217               break;
3218             }
3219           /* FALLTHRU */
3220         case '#': case '~': case '&': case '%': case '/': case '|':
3221         case '^': case '!': case '<': case '>': case '.': case '?': case ']':
3222           if (definedef != dnone)
3223             break;
3224           /* These surely cannot follow a function tag in C. */
3225           switch (fvdef)
3226             {
3227             case foperator:
3228             case finlist:
3229             case fignore:
3230             case vignore:
3231               break;
3232             default:
3233               fvdef = fvnone;
3234             }
3235           break;
3236         case '\0':
3237           if (objdef == otagseen)
3238             {
3239               make_C_tag (TRUE); /* an Objective C class */
3240               objdef = oignore;
3241             }
3242           /* If a macro spans multiple lines don't reset its state. */
3243           if (quotednl)
3244             CNL_SAVE_DEFINEDEF ();
3245           else
3246             CNL ();
3247           break;
3248         } /* switch (c) */
3249
3250     } /* while not eof */
3251 }
3252
3253 /*
3254  * Process either a C++ file or a C file depending on the setting
3255  * of a global flag.
3256  */
3257 void
3258 default_C_entries (inf)
3259      FILE *inf;
3260 {
3261   C_entries (cplusplus ? C_PLPL : 0, inf);
3262 }
3263
3264 /* Always do plain ANSI C. */
3265 void
3266 plain_C_entries (inf)
3267      FILE *inf;
3268 {
3269   C_entries (0, inf);
3270 }
3271
3272 /* Always do C++. */
3273 void
3274 Cplusplus_entries (inf)
3275      FILE *inf;
3276 {
3277   C_entries (C_PLPL, inf);
3278 }
3279
3280 /* Always do Java. */
3281 void
3282 Cjava_entries (inf)
3283      FILE *inf;
3284 {
3285   C_entries (C_JAVA, inf);
3286 }
3287
3288 /* Always do C*. */
3289 void
3290 Cstar_entries (inf)
3291      FILE *inf;
3292 {
3293   C_entries (C_STAR, inf);
3294 }
3295
3296 /* Always do Yacc. */
3297 void
3298 Yacc_entries (inf)
3299      FILE *inf;
3300 {
3301   C_entries (YACC, inf);
3302 }
3303 \f
3304 /* A useful macro. */
3305 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)    \
3306   for (lineno = charno = 0;     /* loop initialization */               \
3307        !feof (file_pointer)     /* loop test */                         \
3308        && (lineno++,            /* instructions at start of loop */     \
3309            linecharno = charno,                                         \
3310            charno += readline (&line_buffer, file_pointer),             \
3311            char_pointer = lb.buffer,                                    \
3312            TRUE);                                                       \
3313       )
3314
3315
3316 /*
3317  * Read a file, but do no processing.  This is used to do regexp
3318  * matching on files that have no language defined.
3319  */
3320 void
3321 just_read_file (inf)
3322      FILE *inf;
3323 {
3324   register char *dummy;
3325
3326   LOOP_ON_INPUT_LINES (inf, lb, dummy)
3327     continue;
3328 }
3329 \f
3330 /* Fortran parsing */
3331
3332 bool
3333 tail (cp)
3334      char *cp;
3335 {
3336   register int len = 0;
3337
3338   while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
3339     cp++, len++;
3340   if (*cp == '\0' && !intoken (dbp[len]))
3341     {
3342       dbp += len;
3343       return TRUE;
3344     }
3345   return FALSE;
3346 }
3347
3348 void
3349 takeprec ()
3350 {
3351   dbp = skip_spaces (dbp);
3352   if (*dbp != '*')
3353     return;
3354   dbp++;
3355   dbp = skip_spaces (dbp);
3356   if (strneq (dbp, "(*)", 3))
3357     {
3358       dbp += 3;
3359       return;
3360     }
3361   if (!isdigit (*dbp))
3362     {
3363       --dbp;                    /* force failure */
3364       return;
3365     }
3366   do
3367     dbp++;
3368   while (isdigit (*dbp));
3369 }
3370
3371 void
3372 getit (inf)
3373      FILE *inf;
3374 {
3375   register char *cp;
3376
3377   dbp = skip_spaces (dbp);
3378   if (*dbp == '\0')
3379     {
3380       lineno++;
3381       linecharno = charno;
3382       charno += readline (&lb, inf);
3383       dbp = lb.buffer;
3384       if (dbp[5] != '&')
3385         return;
3386       dbp += 6;
3387       dbp = skip_spaces (dbp);
3388     }
3389   if (!isalpha (*dbp) && *dbp != '_' && *dbp != '$')
3390     return;
3391   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
3392     continue;
3393   pfnote (savenstr (dbp, cp-dbp), TRUE,
3394           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3395 }
3396
3397
3398 void
3399 Fortran_functions (inf)
3400      FILE *inf;
3401 {
3402   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3403     {
3404       if (*dbp == '%')
3405         dbp++;                  /* Ratfor escape to fortran */
3406       dbp = skip_spaces (dbp);
3407       if (*dbp == '\0')
3408         continue;
3409       switch (lowcase (*dbp))
3410         {
3411         case 'i':
3412           if (tail ("integer"))
3413             takeprec ();
3414           break;
3415         case 'r':
3416           if (tail ("real"))
3417             takeprec ();
3418           break;
3419         case 'l':
3420           if (tail ("logical"))
3421             takeprec ();
3422           break;
3423         case 'c':
3424           if (tail ("complex") || tail ("character"))
3425             takeprec ();
3426           break;
3427         case 'd':
3428           if (tail ("double"))
3429             {
3430               dbp = skip_spaces (dbp);
3431               if (*dbp == '\0')
3432                 continue;
3433               if (tail ("precision"))
3434                 break;
3435               continue;
3436             }
3437           break;
3438         }
3439       dbp = skip_spaces (dbp);
3440       if (*dbp == '\0')
3441         continue;
3442       switch (lowcase (*dbp))
3443         {
3444         case 'f':
3445           if (tail ("function"))
3446             getit (inf);
3447           continue;
3448         case 's':
3449           if (tail ("subroutine"))
3450             getit (inf);
3451           continue;
3452         case 'e':
3453           if (tail ("entry"))
3454             getit (inf);
3455           continue;
3456         case 'b':
3457           if (tail ("blockdata") || tail ("block data"))
3458             {
3459               dbp = skip_spaces (dbp);
3460               if (*dbp == '\0') /* assume un-named */
3461                 pfnote (savestr ("blockdata"), TRUE,
3462                         lb.buffer, dbp - lb.buffer, lineno, linecharno);
3463               else
3464                 getit (inf);    /* look for name */
3465             }
3466           continue;
3467         }
3468     }
3469 }
3470 \f
3471 /*
3472  * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be>, 1998-04-24
3473  * Ada parsing
3474  */
3475 /* Once we are positioned after an "interesting" keyword, let's get
3476    the real tag value necessary. */
3477 void
3478 adagetit (inf, name_qualifier)
3479      FILE *inf;
3480      char *name_qualifier;
3481 {
3482   register char *cp;
3483   char *name;
3484   char c;
3485
3486   while (!feof (inf))
3487     {
3488       dbp = skip_spaces (dbp);
3489       if (*dbp == '\0'
3490           || (dbp[0] == '-' && dbp[1] == '-'))
3491         {
3492           lineno++;
3493           linecharno = charno;
3494           charno += readline (&lb, inf);
3495           dbp = lb.buffer;
3496         }
3497       switch (*dbp)
3498         {
3499         case 'b':
3500         case 'B':
3501           if (tail ("body"))
3502             {
3503               /* Skipping body of   procedure body   or   package body or ....
3504                  resetting qualifier to body instead of spec. */
3505               name_qualifier = "/b";
3506               continue;
3507             }
3508           break;
3509         case 't':
3510         case 'T':
3511           /* Skipping type of   task type   or   protected type ... */
3512           if (tail ("type"))
3513             continue;
3514           break;
3515         }
3516       if (*dbp == '"')
3517         {
3518           dbp += 1;
3519           for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
3520             continue;
3521         }
3522       else
3523         {
3524           dbp = skip_spaces (dbp);
3525           for (cp = dbp;
3526                (*cp != '\0'
3527                 && (isalpha (*cp) || isdigit (*cp) || *cp == '_' || *cp == '.'));
3528                cp++)
3529             continue;
3530           if (cp == dbp)
3531             return;
3532         }
3533       c = *cp;
3534       *cp = '\0';
3535       name = concat (dbp, name_qualifier, "");
3536       *cp = c;
3537       pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3538       if (c == '"')
3539         dbp = cp + 1;
3540       return;
3541     }
3542 }
3543
3544 void
3545 Ada_funcs (inf)
3546      FILE *inf;
3547 {
3548   bool inquote = FALSE;
3549
3550   LOOP_ON_INPUT_LINES (inf, lb, dbp)
3551     {
3552       while (*dbp != '\0')
3553         {
3554           /* Skip a string i.e. "abcd". */
3555           if (inquote || (*dbp == '"'))
3556             {
3557               dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
3558               if (dbp != NULL)
3559                 {
3560                   inquote = FALSE;
3561                   dbp += 1;
3562                   continue;     /* advance char */
3563                 }
3564               else
3565                 {
3566                   inquote = TRUE;
3567                   break;        /* advance line */
3568                 }
3569             }
3570
3571           /* Skip comments. */
3572           if (dbp[0] == '-' && dbp[1] == '-')
3573             break;              /* advance line */
3574
3575           /* Skip character enclosed in single quote i.e. 'a'
3576              and skip single quote starting an attribute i.e. 'Image. */
3577           if (*dbp == '\'')
3578             {
3579               dbp++ ;
3580               if (*dbp != '\0')
3581                 dbp++;
3582               continue;
3583             }
3584
3585           /* Search for beginning of a token.  */
3586           if (!begtoken (*dbp))
3587             {
3588               dbp++;
3589               continue;         /* advance char */
3590             }
3591
3592           /* We are at the beginning of a token. */
3593           switch (*dbp)
3594             {
3595             case 'f':
3596             case 'F':
3597               if (!packages_only && tail ("function"))
3598                 adagetit (inf, "/f");
3599               else
3600                 break;          /* from switch */
3601               continue;         /* advance char */
3602             case 'p':
3603             case 'P':
3604               if (!packages_only && tail ("procedure"))
3605                 adagetit (inf, "/p");
3606               else if (tail ("package"))
3607                 adagetit (inf, "/s");
3608               else if (tail ("protected")) /* protected type */
3609                 adagetit (inf, "/t");
3610               else
3611                 break;          /* from switch */
3612               continue;         /* advance char */
3613             case 't':
3614             case 'T':
3615               if (!packages_only && tail ("task"))
3616                 adagetit (inf, "/k");
3617               else if (typedefs && !packages_only && tail ("type"))
3618                 {
3619                   adagetit (inf, "/t");
3620                   while (*dbp != '\0')
3621                     dbp += 1;
3622                 }
3623               else
3624                 break;          /* from switch */
3625               continue;         /* advance char */
3626             }
3627
3628           /* Look for the end of the token. */
3629           while (!endtoken (*dbp))
3630             dbp++;
3631
3632         } /* advance char */
3633     } /* advance line */
3634 }
3635 \f
3636 /*
3637  * Bob Weiner, Motorola Inc., 4/3/94
3638  * Unix and microcontroller assembly tag handling
3639  * look for '^[a-zA-Z_.$][a-zA_Z0-9_.$]*[: ^I^J]'
3640  */
3641 void
3642 Asm_labels (inf)
3643      FILE *inf;
3644 {
3645   register char *cp;
3646
3647   LOOP_ON_INPUT_LINES (inf, lb, cp)
3648     {
3649       /* If first char is alphabetic or one of [_.$], test for colon
3650          following identifier. */
3651       if (isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3652         {
3653           /* Read past label. */
3654           cp++;
3655           while (isalnum (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
3656             cp++;
3657           if (*cp == ':' || isspace (*cp))
3658             {
3659               /* Found end of label, so copy it and add it to the table. */
3660               pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
3661                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3662             }
3663         }
3664     }
3665 }
3666 \f
3667 /*
3668  * Perl support by Bart Robinson <lomew@cs.utah.edu>
3669  *              enhanced by Michael Ernst <mernst@alum.mit.edu>
3670  * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
3671  * Perl variable names: /^(my|local).../
3672  */
3673 void
3674 Perl_functions (inf)
3675      FILE *inf;
3676 {
3677   register char *cp;
3678
3679   LOOP_ON_INPUT_LINES (inf, lb, cp)
3680     {
3681       if (*cp++ == 's'
3682           && *cp++ == 'u'
3683           && *cp++ == 'b' && isspace (*cp++))
3684         {
3685           cp = skip_spaces (cp);
3686           if (*cp != '\0')
3687             {
3688               char *sp = cp;
3689               while (*cp != '\0'
3690                      && !isspace (*cp) && *cp != '{' && *cp != '(')
3691                 cp++;
3692               pfnote (savenstr (sp, cp-sp), TRUE,
3693                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3694             }
3695         }
3696        else if (globals         /* only if tagging global vars is enabled */
3697                 && ((cp = lb.buffer,
3698                      *cp++ == 'm'
3699                      && *cp++ == 'y')
3700                     || (cp = lb.buffer,
3701                         *cp++ == 'l'
3702                         && *cp++ == 'o'
3703                         && *cp++ == 'c'
3704                         && *cp++ == 'a'
3705                         && *cp++ == 'l'))
3706                 && (*cp == '(' || isspace (*cp)))
3707         {
3708           /* After "my" or "local", but before any following paren or space. */
3709           char *varname = NULL;
3710
3711           cp = skip_spaces (cp);
3712           if (*cp == '$' || *cp == '@' || *cp == '%')
3713             {
3714               char* varstart = ++cp;
3715               while (isalnum (*cp) || *cp == '_')
3716                 cp++;
3717               varname = savenstr (varstart, cp-varstart);
3718             }
3719           else
3720             {
3721               /* Should be examining a variable list at this point;
3722                  could insist on seeing an open parenthesis. */
3723               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
3724                 cp++;
3725             }
3726
3727           /* Perhaps I should back cp up one character, so the TAGS table
3728              doesn't mention (and so depend upon) the following char. */
3729           pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
3730                   FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3731         }
3732     }
3733 }
3734 \f
3735 /*
3736  * Python support by Eric S. Raymond <esr@thyrsus.com>
3737  * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
3738  */
3739 void
3740 Python_functions (inf)
3741      FILE *inf;
3742 {
3743   register char *cp;
3744
3745   LOOP_ON_INPUT_LINES (inf, lb, cp)
3746     {
3747       if (*cp++ == 'd'
3748           && *cp++ == 'e'
3749           && *cp++ == 'f' && isspace (*cp++))
3750         {
3751           cp = skip_spaces (cp);
3752           while (*cp != '\0' && !isspace (*cp) && *cp != '(' && *cp != ':')
3753             cp++;
3754           pfnote (NULL, TRUE,
3755                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3756         }
3757
3758       cp = lb.buffer;
3759       if (*cp++ == 'c'
3760           && *cp++ == 'l'
3761           && *cp++ == 'a'
3762           && *cp++ == 's'
3763           && *cp++ == 's' && isspace (*cp++))
3764         {
3765           cp = skip_spaces (cp);
3766           while (*cp != '\0' && !isspace (*cp) && *cp != '(' && *cp != ':')
3767             cp++;
3768           pfnote (NULL, TRUE,
3769                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
3770         }
3771     }
3772 }
3773 \f
3774 /* Idea by Corny de Souza
3775  * Cobol tag functions
3776  * We could look for anything that could be a paragraph name.
3777  * i.e. anything that starts in column 8 is one word and ends in a full stop.
3778  */
3779 void
3780 Cobol_paragraphs (inf)
3781      FILE *inf;
3782 {
3783   register char *bp, *ep;
3784
3785   LOOP_ON_INPUT_LINES (inf, lb, bp)
3786     {
3787       if (lb.len < 9)
3788         continue;
3789       bp += 8;
3790
3791       /* If eoln, compiler option or comment ignore whole line. */
3792       if (bp[-1] != ' ' || !isalnum (bp[0]))
3793         continue;
3794
3795       for (ep = bp; isalnum (*ep) || *ep == '-'; ep++)
3796         continue;
3797       if (*ep++ == '.')
3798         pfnote (savenstr (bp, ep-bp), TRUE,
3799                 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
3800     }
3801 }
3802 \f
3803 /* Added by Mosur Mohan, 4/22/88 */
3804 /* Pascal parsing                */
3805
3806 /*
3807  *  Locates tags for procedures & functions.  Doesn't do any type- or
3808  *  var-definitions.  It does look for the keyword "extern" or
3809  *  "forward" immediately following the procedure statement; if found,
3810  *  the tag is skipped.
3811  */
3812 void
3813 Pascal_functions (inf)
3814      FILE *inf;
3815 {
3816   linebuffer tline;             /* mostly copied from C_entries */
3817   long save_lcno;
3818   int save_lineno, save_len;
3819   char c, *cp, *namebuf;
3820
3821   bool                          /* each of these flags is TRUE iff: */
3822     incomment,                  /* point is inside a comment */
3823     inquote,                    /* point is inside '..' string */
3824     get_tagname,                /* point is after PROCEDURE/FUNCTION
3825                                    keyword, so next item = potential tag */
3826     found_tag,                  /* point is after a potential tag */
3827     inparms,                    /* point is within parameter-list */
3828     verify_tag;                 /* point has passed the parm-list, so the
3829                                    next token will determine whether this
3830                                    is a FORWARD/EXTERN to be ignored, or
3831                                    whether it is a real tag */
3832
3833   save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
3834   namebuf = NULL;               /* keep compiler quiet */
3835   lineno = 0;
3836   charno = 0;
3837   dbp = lb.buffer;
3838   *dbp = '\0';
3839   initbuffer (&tline);
3840
3841   incomment = inquote = FALSE;
3842   found_tag = FALSE;            /* have a proc name; check if extern */
3843   get_tagname = FALSE;          /* have found "procedure" keyword    */
3844   inparms = FALSE;              /* found '(' after "proc"            */
3845   verify_tag = FALSE;           /* check if "extern" is ahead        */
3846
3847
3848   while (!feof (inf))           /* long main loop to get next char */
3849     {
3850       c = *dbp++;
3851       if (c == '\0')            /* if end of line */
3852         {
3853           lineno++;
3854           linecharno = charno;
3855           charno += readline (&lb, inf);
3856           dbp = lb.buffer;
3857           if (*dbp == '\0')
3858             continue;
3859           if (!((found_tag && verify_tag)
3860                 || get_tagname))
3861             c = *dbp++;         /* only if don't need *dbp pointing
3862                                    to the beginning of the name of
3863                                    the procedure or function */
3864         }
3865       if (incomment)
3866         {
3867           if (c == '}')         /* within { } comments */
3868             incomment = FALSE;
3869           else if (c == '*' && *dbp == ')') /* within (* *) comments */
3870             {
3871               dbp++;
3872               incomment = FALSE;
3873             }
3874           continue;
3875         }
3876       else if (inquote)
3877         {
3878           if (c == '\'')
3879             inquote = FALSE;
3880           continue;
3881         }
3882       else
3883         switch (c)
3884           {
3885           case '\'':
3886             inquote = TRUE;     /* found first quote */
3887             continue;
3888           case '{':             /* found open { comment */
3889             incomment = TRUE;
3890             continue;
3891           case '(':
3892             if (*dbp == '*')    /* found open (* comment */
3893               {
3894                 incomment = TRUE;
3895                 dbp++;
3896               }
3897             else if (found_tag) /* found '(' after tag, i.e., parm-list */
3898               inparms = TRUE;
3899             continue;
3900           case ')':             /* end of parms list */
3901             if (inparms)
3902               inparms = FALSE;
3903             continue;
3904           case ';':
3905             if (found_tag && !inparms) /* end of proc or fn stmt */
3906               {
3907                 verify_tag = TRUE;
3908                 break;
3909               }
3910             continue;
3911           }
3912       if (found_tag && verify_tag && (*dbp != ' '))
3913         {
3914           /* check if this is an "extern" declaration */
3915           if (*dbp == '\0')
3916             continue;
3917           if (lowcase (*dbp == 'e'))
3918             {
3919               if (tail ("extern"))      /* superfluous, really! */
3920                 {
3921                   found_tag = FALSE;
3922                   verify_tag = FALSE;
3923                 }
3924             }
3925           else if (lowcase (*dbp) == 'f')
3926             {
3927               if (tail ("forward"))     /*  check for forward reference */
3928                 {
3929                   found_tag = FALSE;
3930                   verify_tag = FALSE;
3931                 }
3932             }
3933           if (found_tag && verify_tag) /* not external proc, so make tag */
3934             {
3935               found_tag = FALSE;
3936               verify_tag = FALSE;
3937               pfnote (namebuf, TRUE,
3938                       tline.buffer, save_len, save_lineno, save_lcno);
3939               continue;
3940             }
3941         }
3942       if (get_tagname)          /* grab name of proc or fn */
3943         {
3944           if (*dbp == '\0')
3945             continue;
3946
3947           /* save all values for later tagging */
3948           grow_linebuffer (&tline, lb.len + 1);
3949           strcpy (tline.buffer, lb.buffer);
3950           save_lineno = lineno;
3951           save_lcno = linecharno;
3952
3953           /* grab block name */
3954           for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
3955             continue;
3956           namebuf = savenstr (dbp, cp-dbp);
3957           dbp = cp;             /* set dbp to e-o-token */
3958           save_len = dbp - lb.buffer + 1;
3959           get_tagname = FALSE;
3960           found_tag = TRUE;
3961           continue;
3962
3963           /* and proceed to check for "extern" */
3964         }
3965       else if (!incomment && !inquote && !found_tag)
3966         {
3967           /* check for proc/fn keywords */
3968           switch (lowcase (c))
3969             {
3970             case 'p':
3971               if (tail ("rocedure"))    /* c = 'p', dbp has advanced */
3972                 get_tagname = TRUE;
3973               continue;
3974             case 'f':
3975               if (tail ("unction"))
3976                 get_tagname = TRUE;
3977               continue;
3978             }
3979         }
3980     }                           /* while not eof */
3981
3982   free (tline.buffer);
3983 }
3984 \f
3985 /*
3986  * lisp tag functions
3987  *  look for (def or (DEF, quote or QUOTE
3988  */
3989 int
3990 L_isdef (strp)
3991      register char *strp;
3992 {
3993   return ((strp[1] == 'd' || strp[1] == 'D')
3994           && (strp[2] == 'e' || strp[2] == 'E')
3995           && (strp[3] == 'f' || strp[3] == 'F'));
3996 }
3997
3998 int
3999 L_isquote (strp)
4000      register char *strp;
4001 {
4002   return ((*++strp == 'q' || *strp == 'Q')
4003           && (*++strp == 'u' || *strp == 'U')
4004           && (*++strp == 'o' || *strp == 'O')
4005           && (*++strp == 't' || *strp == 'T')
4006           && (*++strp == 'e' || *strp == 'E')
4007           && isspace (*++strp));
4008 }
4009
4010 void
4011 L_getit ()
4012 {
4013   register char *cp;
4014
4015   if (*dbp == '\'')             /* Skip prefix quote */
4016     dbp++;
4017   else if (*dbp == '(')
4018   {
4019     if (L_isquote (dbp))
4020       dbp += 7;                 /* Skip "(quote " */
4021     else
4022       dbp += 1;                 /* Skip "(" before name in (defstruct (foo)) */
4023     dbp = skip_spaces (dbp);
4024   }
4025
4026   for (cp = dbp /*+1*/;
4027        *cp != '\0' && *cp != '(' && !isspace(*cp) && *cp != ')';
4028        cp++)
4029     continue;
4030   if (cp == dbp)
4031     return;
4032
4033   pfnote (savenstr (dbp, cp-dbp), TRUE,
4034           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4035 }
4036
4037 void
4038 Lisp_functions (inf)
4039      FILE *inf;
4040 {
4041   LOOP_ON_INPUT_LINES (inf, lb, dbp)
4042     {
4043       if (dbp[0] == '(')
4044         {
4045           if (L_isdef (dbp))
4046             {
4047               dbp = skip_non_spaces (dbp);
4048               dbp = skip_spaces (dbp);
4049               L_getit ();
4050             }
4051           else
4052             {
4053               /* Check for (foo::defmumble name-defined ... */
4054               do
4055                 dbp++;
4056               while (*dbp != '\0' && !isspace (*dbp)
4057                      && *dbp != ':' && *dbp != '(' && *dbp != ')');
4058               if (*dbp == ':')
4059                 {
4060                   do
4061                     dbp++;
4062                   while (*dbp == ':');
4063
4064                   if (L_isdef (dbp - 1))
4065                     {
4066                       dbp = skip_non_spaces (dbp);
4067                       dbp = skip_spaces (dbp);
4068                       L_getit ();
4069                     }
4070                 }
4071             }
4072         }
4073     }
4074 }
4075 \f
4076 /*
4077  * Postscript tag functions
4078  * Just look for lines where the first character is '/'
4079  * Richard Mlynarik <mly@adoc.xerox.com>
4080  * Also look at "defineps" for PSWrap
4081  * suggested by Masatake YAMATO <masata-y@is.aist-nara.ac.jp>
4082  */
4083 void
4084 Postscript_functions (inf)
4085      FILE *inf;
4086 {
4087   register char *bp, *ep;
4088
4089   LOOP_ON_INPUT_LINES (inf, lb, bp)
4090     {
4091       if (bp[0] == '/')
4092         {
4093           for (ep = bp+1;
4094                *ep != '\0' && *ep != ' ' && *ep != '{';
4095                ep++)
4096             continue;
4097           pfnote (savenstr (bp, ep-bp), TRUE,
4098                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
4099         }
4100       else if (strneq (bp, "defineps", 8))
4101         {
4102           bp = skip_non_spaces (bp);
4103           bp = skip_spaces (bp);
4104           get_tag (bp);
4105         }
4106     }
4107 }
4108
4109 \f
4110 /*
4111  * Scheme tag functions
4112  * look for (def... xyzzy
4113  * look for (def... (xyzzy
4114  * look for (def ... ((...(xyzzy ....
4115  * look for (set! xyzzy
4116  */
4117
4118 void
4119 Scheme_functions (inf)
4120      FILE *inf;
4121 {
4122   register char *bp;
4123
4124   LOOP_ON_INPUT_LINES (inf, lb, bp)
4125     {
4126       if (bp[0] == '('
4127           && (bp[1] == 'D' || bp[1] == 'd')
4128           && (bp[2] == 'E' || bp[2] == 'e')
4129           && (bp[3] == 'F' || bp[3] == 'f'))
4130         {
4131           bp = skip_non_spaces (bp);
4132           /* Skip over open parens and white space */
4133           while (isspace (*bp) || *bp == '(')
4134             bp++;
4135           get_tag (bp);
4136         }
4137       if (bp[0] == '('
4138           && (bp[1] == 'S' || bp[1] == 's')
4139           && (bp[2] == 'E' || bp[2] == 'e')
4140           && (bp[3] == 'T' || bp[3] == 't')
4141           && (bp[4] == '!' || bp[4] == '!')
4142           && (isspace (bp[5])))
4143         {
4144           bp = skip_non_spaces (bp);
4145           bp = skip_spaces (bp);
4146           get_tag (bp);
4147         }
4148     }
4149 }
4150 \f
4151 /* Find tags in TeX and LaTeX input files.  */
4152
4153 /* TEX_toktab is a table of TeX control sequences that define tags.
4154    Each TEX_tabent records one such control sequence.
4155    CONVERT THIS TO USE THE Stab TYPE!! */
4156 struct TEX_tabent
4157 {
4158   char *name;
4159   int len;
4160 };
4161
4162 struct TEX_tabent *TEX_toktab = NULL;   /* Table with tag tokens */
4163
4164 /* Default set of control sequences to put into TEX_toktab.
4165    The value of environment var TEXTAGS is prepended to this.  */
4166
4167 char *TEX_defenv = "\
4168 :chapter:section:subsection:subsubsection:eqno:label:ref:cite:bibitem\
4169 :part:appendix:entry:index";
4170
4171 void TEX_mode ();
4172 struct TEX_tabent *TEX_decode_env ();
4173 int TEX_Token ();
4174
4175 char TEX_esc = '\\';
4176 char TEX_opgrp = '{';
4177 char TEX_clgrp = '}';
4178
4179 /*
4180  * TeX/LaTeX scanning loop.
4181  */
4182 void
4183 TeX_functions (inf)
4184      FILE *inf;
4185 {
4186   char *cp, *lasthit;
4187   register int i;
4188
4189   /* Select either \ or ! as escape character.  */
4190   TEX_mode (inf);
4191
4192   /* Initialize token table once from environment. */
4193   if (!TEX_toktab)
4194     TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
4195
4196   LOOP_ON_INPUT_LINES (inf, lb, cp)
4197     {
4198       lasthit = cp;
4199       /* Look at each esc in line. */
4200       while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
4201         {
4202           if (*++cp == '\0')
4203             break;
4204           linecharno += cp - lasthit;
4205           lasthit = cp;
4206           i = TEX_Token (lasthit);
4207           if (i >= 0)
4208             {
4209               /* We seem to include the TeX command in the tag name.
4210               register char *p;
4211               for (p = lasthit + TEX_toktab[i].len;
4212                    *p != '\0' && *p != TEX_clgrp;
4213                    p++)
4214                 continue; */
4215               pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
4216                       lb.buffer, lb.len, lineno, linecharno);
4217               break;            /* We only tag a line once */
4218             }
4219         }
4220     }
4221 }
4222
4223 #define TEX_LESC '\\'
4224 #define TEX_SESC '!'
4225 #define TEX_cmt  '%'
4226
4227 /* Figure out whether TeX's escapechar is '\\' or '!' and set grouping
4228    chars accordingly. */
4229 void
4230 TEX_mode (inf)
4231      FILE *inf;
4232 {
4233   int c;
4234
4235   while ((c = getc (inf)) != EOF)
4236     {
4237       /* Skip to next line if we hit the TeX comment char. */
4238       if (c == TEX_cmt)
4239         while (c != '\n')
4240           c = getc (inf);
4241       else if (c == TEX_LESC || c == TEX_SESC )
4242         break;
4243     }
4244
4245   if (c == TEX_LESC)
4246     {
4247       TEX_esc = TEX_LESC;
4248       TEX_opgrp = '{';
4249       TEX_clgrp = '}';
4250     }
4251   else
4252     {
4253       TEX_esc = TEX_SESC;
4254       TEX_opgrp = '<';
4255       TEX_clgrp = '>';
4256     }
4257   /* If the input file is compressed, inf is a pipe, and rewind may fail.
4258      No attempt is made to correct the situation. */
4259   rewind (inf);
4260 }
4261
4262 /* Read environment and prepend it to the default string.
4263    Build token table. */
4264 struct TEX_tabent *
4265 TEX_decode_env (evarname, defenv)
4266      char *evarname;
4267      char *defenv;
4268 {
4269   register char *env, *p;
4270
4271   struct TEX_tabent *tab;
4272   int size, i;
4273
4274   /* Append default string to environment. */
4275   env = getenv (evarname);
4276   if (!env)
4277     env = defenv;
4278   else
4279     {
4280       char *oldenv = env;
4281       env = concat (oldenv, defenv, "");
4282     }
4283
4284   /* Allocate a token table */
4285   for (size = 1, p = env; p;)
4286     if ((p = etags_strchr (p, ':')) && *++p != '\0')
4287       size++;
4288   /* Add 1 to leave room for null terminator.  */
4289   tab = xnew (size + 1, struct TEX_tabent);
4290
4291   /* Unpack environment string into token table. Be careful about */
4292   /* zero-length strings (leading ':', "::" and trailing ':') */
4293   for (i = 0; *env;)
4294     {
4295       p = etags_strchr (env, ':');
4296       if (!p)                   /* End of environment string. */
4297         p = env + strlen (env);
4298       if (p - env > 0)
4299         {                       /* Only non-zero strings. */
4300           tab[i].name = savenstr (env, p - env);
4301           tab[i].len = strlen (tab[i].name);
4302           i++;
4303         }
4304       if (*p)
4305         env = p + 1;
4306       else
4307         {
4308           tab[i].name = NULL;   /* Mark end of table. */
4309           tab[i].len = 0;
4310           break;
4311         }
4312     }
4313   return tab;
4314 }
4315
4316 /* If the text at CP matches one of the tag-defining TeX command names,
4317    return the pointer to the first occurrence of that command in TEX_toktab.
4318    Otherwise return -1.
4319    Keep the capital `T' in `token' for dumb truncating compilers
4320    (this distinguishes it from `TEX_toktab' */
4321 int
4322 TEX_Token (cp)
4323      char *cp;
4324 {
4325   int i;
4326
4327   for (i = 0; TEX_toktab[i].len > 0; i++)
4328     if (strneq (TEX_toktab[i].name, cp, TEX_toktab[i].len))
4329       return i;
4330   return -1;
4331 }
4332 \f
4333 /*
4334  * Prolog support (rewritten) by Anders Lindgren, Mar. 96
4335  *
4336  * Assumes that the predicate starts at column 0.
4337  * Only the first clause of a predicate is added.
4338  */
4339 int prolog_pred ();
4340 void prolog_skip_comment ();
4341 int prolog_atom ();
4342
4343 void
4344 Prolog_functions (inf)
4345      FILE *inf;
4346 {
4347   char *cp, *last;
4348   int len;
4349   int allocated;
4350
4351   allocated = 0;
4352   len = 0;
4353   last = NULL;
4354
4355   LOOP_ON_INPUT_LINES (inf, lb, cp)
4356     {
4357       if (cp[0] == '\0')        /* Empty line */
4358         continue;
4359       else if (isspace (cp[0])) /* Not a predicate */
4360         continue;
4361       else if (cp[0] == '/' && cp[1] == '*')    /* comment. */
4362         prolog_skip_comment (&lb, inf);
4363       else if ((len = prolog_pred (cp, last)) > 0)
4364         {
4365           /* Predicate.  Store the function name so that we only
4366              generate a tag for the first clause.  */
4367           if (last == NULL)
4368             last = xnew(len + 1, char);
4369           else if (len + 1 > allocated)
4370             last = xrnew (last, len + 1, char);
4371           allocated = len + 1;
4372           strncpy (last, cp, len);
4373           last[len] = '\0';
4374         }
4375     }
4376 }
4377
4378
4379 void
4380 prolog_skip_comment (plb, inf)
4381      linebuffer *plb;
4382      FILE *inf;
4383 {
4384   char *cp;
4385
4386   do
4387     {
4388       for (cp = plb->buffer; *cp != '\0'; cp++)
4389         if (cp[0] == '*' && cp[1] == '/')
4390           return;
4391       lineno++;
4392       linecharno += readline (plb, inf);
4393     }
4394   while (!feof(inf));
4395 }
4396
4397 /*
4398  * A predicate definition is added if it matches:
4399  *     <beginning of line><Prolog Atom><whitespace>(
4400  *
4401  * It is added to the tags database if it doesn't match the
4402  * name of the previous clause header.
4403  *
4404  * Return the size of the name of the predicate, or 0 if no header
4405  * was found.
4406  */
4407 int
4408 prolog_pred (s, last)
4409      char *s;
4410      char *last;                /* Name of last clause. */
4411 {
4412   int pos;
4413   int len;
4414
4415   pos = prolog_atom (s, 0);
4416   if (pos < 1)
4417     return 0;
4418
4419   len = pos;
4420   pos = skip_spaces (s + pos) - s;
4421
4422   if ((s[pos] == '(') || (s[pos] == '.'))
4423     {
4424       if (s[pos] == '(')
4425         pos++;
4426
4427       /* Save only the first clause. */
4428       if (last == NULL
4429           || len != (int)strlen (last)
4430           || !strneq (s, last, len))
4431         {
4432           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4433           return len;
4434         }
4435     }
4436   return 0;
4437 }
4438
4439 /*
4440  * Consume a Prolog atom.
4441  * Return the number of bytes consumed, or -1 if there was an error.
4442  *
4443  * A prolog atom, in this context, could be one of:
4444  * - An alphanumeric sequence, starting with a lower case letter.
4445  * - A quoted arbitrary string. Single quotes can escape themselves.
4446  *   Backslash quotes everything.
4447  */
4448 int
4449 prolog_atom (s, pos)
4450      char *s;
4451      int pos;
4452 {
4453   int origpos;
4454
4455   origpos = pos;
4456
4457   if (islower(s[pos]) || (s[pos] == '_'))
4458     {
4459       /* The atom is unquoted. */
4460       pos++;
4461       while (isalnum(s[pos]) || (s[pos] == '_'))
4462         {
4463           pos++;
4464         }
4465       return pos - origpos;
4466     }
4467   else if (s[pos] == '\'')
4468     {
4469       pos++;
4470
4471       while (1)
4472         {
4473           if (s[pos] == '\'')
4474             {
4475               pos++;
4476               if (s[pos] != '\'')
4477                 break;
4478               pos++;            /* A double quote */
4479             }
4480           else if (s[pos] == '\0')
4481             /* Multiline quoted atoms are ignored. */
4482             return -1;
4483           else if (s[pos] == '\\')
4484             {
4485               if (s[pos+1] == '\0')
4486                 return -1;
4487               pos += 2;
4488             }
4489           else
4490             pos++;
4491         }
4492       return pos - origpos;
4493     }
4494   else
4495     return -1;
4496 }
4497 \f
4498 /*
4499  * Support for Erlang  --  Anders Lindgren, Feb 1996.
4500  *
4501  * Generates tags for functions, defines, and records.
4502  *
4503  * Assumes that Erlang functions start at column 0.
4504  */
4505 int erlang_func ();
4506 void erlang_attribute ();
4507 int erlang_atom ();
4508
4509 void
4510 Erlang_functions (inf)
4511      FILE *inf;
4512 {
4513   char *cp, *last;
4514   int len;
4515   int allocated;
4516
4517   allocated = 0;
4518   len = 0;
4519   last = NULL;
4520
4521   LOOP_ON_INPUT_LINES (inf, lb, cp)
4522     {
4523       if (cp[0] == '\0')        /* Empty line */
4524         continue;
4525       else if (isspace (cp[0])) /* Not function nor attribute */
4526         continue;
4527       else if (cp[0] == '%')    /* comment */
4528         continue;
4529       else if (cp[0] == '"')    /* Sometimes, strings start in column one */
4530         continue;
4531       else if (cp[0] == '-')    /* attribute, e.g. "-define" */
4532         {
4533           erlang_attribute (cp);
4534           last = NULL;
4535         }
4536       else if ((len = erlang_func (cp, last)) > 0)
4537         {
4538           /*
4539            * Function.  Store the function name so that we only
4540            * generates a tag for the first clause.
4541            */
4542           if (last == NULL)
4543             last = xnew (len + 1, char);
4544           else if (len + 1 > allocated)
4545             last = xrnew (last, len + 1, char);
4546           allocated = len + 1;
4547           strncpy (last, cp, len);
4548           last[len] = '\0';
4549         }
4550     }
4551 }
4552
4553
4554 /*
4555  * A function definition is added if it matches:
4556  *     <beginning of line><Erlang Atom><whitespace>(
4557  *
4558  * It is added to the tags database if it doesn't match the
4559  * name of the previous clause header.
4560  *
4561  * Return the size of the name of the function, or 0 if no function
4562  * was found.
4563  */
4564 int
4565 erlang_func (s, last)
4566      char *s;
4567      char *last;                /* Name of last clause. */
4568 {
4569   int pos;
4570   int len;
4571
4572   pos = erlang_atom (s, 0);
4573   if (pos < 1)
4574     return 0;
4575
4576   len = pos;
4577   pos = skip_spaces (s + pos) - s;
4578
4579   /* Save only the first clause. */
4580   if (s[pos++] == '('
4581       && (last == NULL
4582           || len != (int)strlen (last)
4583           || !strneq (s, last, len)))
4584         {
4585           pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
4586           return len;
4587         }
4588
4589   return 0;
4590 }
4591
4592
4593 /*
4594  * Handle attributes.  Currently, tags are generated for defines
4595  * and records.
4596  *
4597  * They are on the form:
4598  * -define(foo, bar).
4599  * -define(Foo(M, N), M+N).
4600  * -record(graph, {vtab = notable, cyclic = true}).
4601  */
4602 void
4603 erlang_attribute (s)
4604      char *s;
4605 {
4606   int pos;
4607   int len;
4608
4609   if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
4610     {
4611       pos = skip_spaces (s + 7) - s;
4612       if (s[pos++] == '(')
4613         {
4614           pos = skip_spaces (s + pos) - s;
4615           len = erlang_atom (s, pos);
4616           if (len != 0)
4617             pfnote (savenstr (& s[pos], len), TRUE,
4618                     s, pos + len, lineno, linecharno);
4619         }
4620     }
4621   return;
4622 }
4623
4624
4625 /*
4626  * Consume an Erlang atom (or variable).
4627  * Return the number of bytes consumed, or -1 if there was an error.
4628  */
4629 int
4630 erlang_atom (s, pos)
4631      char *s;
4632      int pos;
4633 {
4634   int origpos;
4635
4636   origpos = pos;
4637
4638   if (isalpha (s[pos]) || s[pos] == '_')
4639     {
4640       /* The atom is unquoted. */
4641       pos++;
4642       while (isalnum (s[pos]) || s[pos] == '_')
4643         pos++;
4644       return pos - origpos;
4645     }
4646   else if (s[pos] == '\'')
4647     {
4648       pos++;
4649
4650       while (1)
4651         {
4652           if (s[pos] == '\'')
4653             {
4654               pos++;
4655               break;
4656             }
4657           else if (s[pos] == '\0')
4658             /* Multiline quoted atoms are ignored. */
4659             return -1;
4660           else if (s[pos] == '\\')
4661             {
4662               if (s[pos+1] == '\0')
4663                 return -1;
4664               pos += 2;
4665             }
4666           else
4667             pos++;
4668         }
4669       return pos - origpos;
4670     }
4671   else
4672     return -1;
4673 }
4674 \f
4675 #ifdef ETAGS_REGEXPS
4676
4677 /* Take a string like "/blah/" and turn it into "blah", making sure
4678    that the first and last characters are the same, and handling
4679    quoted separator characters.  Actually, stops on the occurrence of
4680    an unquoted separator.  Also turns "\t" into a Tab character.
4681    Returns pointer to terminating separator.  Works in place.  Null
4682    terminates name string. */
4683 char *
4684 scan_separators (name)
4685      char *name;
4686 {
4687   char sep = name[0];
4688   char *copyto = name;
4689   bool quoted = FALSE;
4690
4691   for (++name; *name != '\0'; ++name)
4692     {
4693       if (quoted)
4694         {
4695           if (*name == 't')
4696             *copyto++ = '\t';
4697           else if (*name == sep)
4698             *copyto++ = sep;
4699           else
4700             {
4701               /* Something else is quoted, so preserve the quote. */
4702               *copyto++ = '\\';
4703               *copyto++ = *name;
4704             }
4705           quoted = FALSE;
4706         }
4707       else if (*name == '\\')
4708         quoted = TRUE;
4709       else if (*name == sep)
4710         break;
4711       else
4712         *copyto++ = *name;
4713     }
4714
4715   /* Terminate copied string. */
4716   *copyto = '\0';
4717   return name;
4718 }
4719
4720 /* Look at the argument of --regex or --no-regex and do the right
4721    thing.  Same for each line of a regexp file. */
4722 void
4723 analyse_regex (regex_arg, ignore_case)
4724      char *regex_arg;
4725      bool ignore_case;
4726 {
4727   if (regex_arg == NULL)
4728     free_patterns ();           /* --no-regex: remove existing regexps */
4729
4730   /* A real --regexp option or a line in a regexp file. */
4731   switch (regex_arg[0])
4732     {
4733       /* Comments in regexp file or null arg to --regex. */
4734     case '\0':
4735     case ' ':
4736     case '\t':
4737       break;
4738
4739       /* Read a regex file.  This is recursive and may result in a
4740          loop, which will stop when the file descriptors are exhausted. */
4741     case '@':
4742       {
4743         FILE *regexfp;
4744         linebuffer regexbuf;
4745         char *regexfile = regex_arg + 1;
4746
4747         /* regexfile is a file containing regexps, one per line. */
4748         regexfp = fopen (regexfile, "r");
4749         if (regexfp == NULL)
4750           {
4751             pfatal (regexfile);
4752             return;
4753           }
4754         initbuffer (&regexbuf);
4755         while (readline_internal (&regexbuf, regexfp) > 0)
4756           analyse_regex (regexbuf.buffer, ignore_case);
4757         free (regexbuf.buffer);
4758         fclose (regexfp);
4759       }
4760       break;
4761
4762       /* Regexp to be used for a specific language only. */
4763     case '{':
4764       {
4765         language *lang;
4766         char *lang_name = regex_arg + 1;
4767         char *cp;
4768
4769         for (cp = lang_name; *cp != '}'; cp++)
4770           if (*cp == '\0')
4771             {
4772               error ("unterminated language name in regex: %s", regex_arg);
4773               return;
4774             }
4775         *cp = '\0';
4776         lang = get_language_from_name (lang_name);
4777         if (lang == NULL)
4778           return;
4779         add_regex (cp + 1, ignore_case, lang);
4780       }
4781       break;
4782
4783       /* Regexp to be used for any language. */
4784     default:
4785       add_regex (regex_arg, ignore_case, NULL);
4786       break;
4787     }
4788 }
4789
4790 /* Turn a name, which is an ed-style (but Emacs syntax) regular
4791    expression, into a real regular expression by compiling it. */
4792 void
4793 add_regex (regexp_pattern, ignore_case, lang)
4794      char *regexp_pattern;
4795      bool ignore_case;
4796      language *lang;
4797 {
4798   char *name;
4799   const char *err;
4800   struct re_pattern_buffer *patbuf;
4801   pattern *pp;
4802
4803
4804   if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
4805     {
4806       error ("%s: unterminated regexp", regexp_pattern);
4807       return;
4808     }
4809   name = scan_separators (regexp_pattern);
4810   if (regexp_pattern[0] == '\0')
4811     {
4812       error ("null regexp", (char *)NULL);
4813       return;
4814     }
4815   (void) scan_separators (name);
4816
4817   patbuf = xnew (1, struct re_pattern_buffer);
4818   /* Translation table to fold case if appropriate. */
4819   patbuf->translate = (ignore_case) ? lc_trans : NULL;
4820   patbuf->fastmap = NULL;
4821   patbuf->buffer = NULL;
4822   patbuf->allocated = 0;
4823
4824   err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
4825   if (err != NULL)
4826     {
4827       error ("%s while compiling pattern", err);
4828       return;
4829     }
4830
4831   pp = p_head;
4832   p_head = xnew (1, pattern);
4833   p_head->regex = savestr (regexp_pattern);
4834   p_head->p_next = pp;
4835   p_head->language = lang;
4836   p_head->pattern = patbuf;
4837   p_head->name_pattern = savestr (name);
4838   p_head->error_signaled = FALSE;
4839 }
4840
4841 /*
4842  * Do the substitutions indicated by the regular expression and
4843  * arguments.
4844  */
4845 char *
4846 substitute (in, out, regs)
4847      char *in, *out;
4848      struct re_registers *regs;
4849 {
4850   char *result, *t;
4851   int size, dig, diglen;
4852
4853   result = NULL;
4854   size = strlen (out);
4855
4856   /* Pass 1: figure out how much to allocate by finding all \N strings. */
4857   if (out[size - 1] == '\\')
4858     fatal ("pattern error in \"%s\"", out);
4859   for (t = etags_strchr (out, '\\');
4860        t != NULL;
4861        t = etags_strchr (t + 2, '\\'))
4862     if (isdigit (t[1]))
4863       {
4864         dig = t[1] - '0';
4865         diglen = regs->end[dig] - regs->start[dig];
4866         size += diglen - 2;
4867       }
4868     else
4869       size -= 1;
4870
4871   /* Allocate space and do the substitutions. */
4872   result = xnew (size + 1, char);
4873
4874   for (t = result; *out != '\0'; out++)
4875     if (*out == '\\' && isdigit (*++out))
4876       {
4877         /* Using "dig2" satisfies my debugger.  Bleah. */
4878         dig = *out - '0';
4879         diglen = regs->end[dig] - regs->start[dig];
4880         strncpy (t, in + regs->start[dig], diglen);
4881         t += diglen;
4882       }
4883     else
4884       *t++ = *out;
4885   *t = '\0';
4886
4887   if (DEBUG && (t > result + size || t - result != (int)strlen (result)))
4888     abort ();
4889
4890   return result;
4891 }
4892
4893 /* Deallocate all patterns. */
4894 void
4895 free_patterns ()
4896 {
4897   pattern *pp;
4898   while (p_head != NULL)
4899     {
4900       pp = p_head->p_next;
4901       free (p_head->regex);
4902       free (p_head->name_pattern);
4903       free (p_head);
4904       p_head = pp;
4905     }
4906   return;
4907 }
4908 \f
4909 void
4910 get_tag (bp)
4911      register char *bp;
4912 {
4913   register char *cp;
4914
4915   if (*bp == '\0')
4916     return;
4917   /* Go till you get to white space or a syntactic break */
4918   for (cp = bp + 1;
4919        *cp != '\0' && *cp != '(' && *cp != ')' && !isspace (*cp);
4920        cp++)
4921     continue;
4922   pfnote (savenstr (bp, cp-bp), TRUE,
4923           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
4924 }
4925
4926 #endif /* ETAGS_REGEXPS */
4927 /* Initialize a linebuffer for use */
4928 void
4929 initbuffer (lbp)
4930      linebuffer *lbp;
4931 {
4932   lbp->size = 200;
4933   lbp->buffer = xnew (200, char);
4934 }
4935
4936 /*
4937  * Read a line of text from `stream' into `lbp', excluding the
4938  * newline or CR-NL, if any.  Return the number of characters read from
4939  * `stream', which is the length of the line including the newline.
4940  *
4941  * On DOS or Windows we do not count the CR character, if any, before the
4942  * NL, in the returned length; this mirrors the behavior of emacs on those
4943  * platforms (for text files, it translates CR-NL to NL as it reads in the
4944  * file).
4945  */
4946 long
4947 readline_internal (lbp, stream)
4948      linebuffer *lbp;
4949      register FILE *stream;
4950 {
4951   char *buffer = lbp->buffer;
4952   register char *p = lbp->buffer;
4953   register char *pend;
4954   int chars_deleted;
4955
4956   pend = p + lbp->size;         /* Separate to avoid 386/IX compiler bug.  */
4957
4958   while (1)
4959     {
4960       register int c = getc (stream);
4961       if (p == pend)
4962         {
4963           /* We're at the end of linebuffer: expand it. */
4964           lbp->size *= 2;
4965           buffer = xrnew (buffer, lbp->size, char);
4966           p += buffer - lbp->buffer;
4967           pend = buffer + lbp->size;
4968           lbp->buffer = buffer;
4969         }
4970       if (c == EOF)
4971         {
4972           *p = '\0';
4973           chars_deleted = 0;
4974           break;
4975         }
4976       if (c == '\n')
4977         {
4978           if (p > buffer && p[-1] == '\r')
4979             {
4980               p -= 1;
4981 #ifdef DOS_NT
4982              /* Assume CRLF->LF translation will be performed by Emacs
4983                 when loading this file, so CRs won't appear in the buffer.
4984                 It would be cleaner to compensate within Emacs;
4985                 however, Emacs does not know how many CRs were deleted
4986                 before any given point in the file.  */
4987               chars_deleted = 1;
4988 #else
4989               chars_deleted = 2;
4990 #endif
4991             }
4992           else
4993             {
4994               chars_deleted = 1;
4995             }
4996           *p = '\0';
4997           break;
4998         }
4999       *p++ = c;
5000     }
5001   lbp->len = p - buffer;
5002
5003   return lbp->len + chars_deleted;
5004 }
5005
5006 /*
5007  * Like readline_internal, above, but in addition try to match the
5008  * input line against relevant regular expressions.
5009  */
5010 long
5011 readline (lbp, stream)
5012      linebuffer *lbp;
5013      FILE *stream;
5014 {
5015   /* Read new line. */
5016   long result = readline_internal (lbp, stream);
5017 #ifdef ETAGS_REGEXPS
5018   int match;
5019   pattern *pp;
5020
5021   /* Match against relevant patterns. */
5022   if (lbp->len > 0)
5023     for (pp = p_head; pp != NULL; pp = pp->p_next)
5024       {
5025         /* Only use generic regexps or those for the current language. */
5026         if (pp->language != NULL && pp->language != curlang)
5027           continue;
5028
5029         match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
5030         switch (match)
5031           {
5032           case -2:
5033             /* Some error. */
5034             if (!pp->error_signaled)
5035               {
5036                 error ("error while matching \"%s\"", pp->regex);
5037                 pp->error_signaled = TRUE;
5038               }
5039             break;
5040           case -1:
5041             /* No match. */
5042             break;
5043           default:
5044             /* Match occurred.  Construct a tag. */
5045             if (pp->name_pattern[0] != '\0')
5046               {
5047                 /* Make a named tag. */
5048                 char *name = substitute (lbp->buffer,
5049                                          pp->name_pattern, &pp->regs);
5050                 if (name != NULL)
5051                   pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
5052               }
5053             else
5054               {
5055                 /* Make an unnamed tag. */
5056                 pfnote ((char *)NULL, TRUE,
5057                         lbp->buffer, match, lineno, linecharno);
5058               }
5059             break;
5060           }
5061       }
5062 #endif /* ETAGS_REGEXPS */
5063
5064   return result;
5065 }
5066 \f
5067 /*
5068  * Return a pointer to a space of size strlen(cp)+1 allocated
5069  * with xnew where the string CP has been copied.
5070  */
5071 char *
5072 savestr (cp)
5073      char *cp;
5074 {
5075   return savenstr (cp, strlen (cp));
5076 }
5077
5078 /*
5079  * Return a pointer to a space of size LEN+1 allocated with xnew where
5080  * the string CP has been copied for at most the first LEN characters.
5081  */
5082 char *
5083 savenstr (cp, len)
5084      char *cp;
5085      int len;
5086 {
5087   register char *dp;
5088
5089   dp = xnew (len + 1, char);
5090   strncpy (dp, cp, len);
5091   dp[len] = '\0';
5092   return dp;
5093 }
5094
5095 /*
5096  * Return the ptr in sp at which the character c last
5097  * appears; NULL if not found
5098  *
5099  * Identical to System V strrchr, included for portability.
5100  */
5101 char *
5102 etags_strrchr (sp, c)
5103      register char *sp, c;
5104 {
5105   register char *r;
5106
5107   r = NULL;
5108   do
5109     {
5110       if (*sp == c)
5111         r = sp;
5112   } while (*sp++);
5113   return r;
5114 }
5115
5116
5117 /*
5118  * Return the ptr in sp at which the character c first
5119  * appears; NULL if not found
5120  *
5121  * Identical to System V strchr, included for portability.
5122  */
5123 char *
5124 etags_strchr (sp, c)
5125      register char *sp, c;
5126 {
5127   do
5128     {
5129       if (*sp == c)
5130         return sp;
5131     } while (*sp++);
5132   return NULL;
5133 }
5134
5135 /* Skip spaces, return new pointer. */
5136 char *
5137 skip_spaces (cp)
5138      char *cp;
5139 {
5140   while (isspace (*cp))         /* isspace('\0')==FALSE */
5141     cp++;
5142   return cp;
5143 }
5144
5145 /* Skip non spaces, return new pointer. */
5146 char *
5147 skip_non_spaces (cp)
5148      char *cp;
5149 {
5150   while (!iswhite (*cp))        /* iswhite('\0')==TRUE */
5151     cp++;
5152   return cp;
5153 }
5154
5155 /* Print error message and exit.  */
5156 void
5157 fatal (s1, s2)
5158      char *s1, *s2;
5159 {
5160   error (s1, s2);
5161   exit (BAD);
5162 }
5163
5164 void
5165 pfatal (s1)
5166      char *s1;
5167 {
5168   perror (s1);
5169   exit (BAD);
5170 }
5171
5172 void
5173 suggest_asking_for_help ()
5174 {
5175   fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
5176            progname,
5177 #ifdef LONG_OPTIONS
5178            "--help"
5179 #else
5180            "-h"
5181 #endif
5182            );
5183   exit (BAD);
5184 }
5185
5186 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
5187 void
5188 error (s1, s2)
5189      char *s1, *s2;
5190 {
5191   fprintf (stderr, "%s: ", progname);
5192   fprintf (stderr, s1, s2);
5193   fprintf (stderr, "\n");
5194 }
5195
5196 /* Return a newly-allocated string whose contents
5197    concatenate those of s1, s2, s3.  */
5198 char *
5199 concat (s1, s2, s3)
5200      char *s1, *s2, *s3;
5201 {
5202   int len1 = strlen (s1), len2 = strlen (s2), len3 = strlen (s3);
5203   char *result = xnew (len1 + len2 + len3 + 1, char);
5204
5205   strcpy (result, s1);
5206   strcpy (result + len1, s2);
5207   strcpy (result + len1 + len2, s3);
5208   result[len1 + len2 + len3] = '\0';
5209
5210   return result;
5211 }
5212 \f
5213 /* Does the same work as the system V getcwd, but does not need to
5214    guess the buffer size in advance. */
5215 char *
5216 etags_getcwd ()
5217 {
5218 #ifdef HAVE_GETCWD
5219   int bufsize = 200;
5220   char *path = xnew (bufsize, char);
5221
5222   while (getcwd (path, bufsize) == NULL)
5223     {
5224       if (errno != ERANGE)
5225         pfatal ("getcwd");
5226       bufsize *= 2;
5227       free (path);
5228       path = xnew (bufsize, char);
5229     }
5230
5231   canonicalize_filename (path);
5232   return path;
5233
5234 #else /* not HAVE_GETCWD */
5235 #ifdef MSDOS
5236   char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
5237
5238   getwd (path);
5239
5240   for (p = path; *p != '\0'; p++)
5241     if (*p == '\\')
5242       *p = '/';
5243     else
5244       *p = lowcase (*p);
5245
5246   return strdup (path);
5247 #else /* not MSDOS */
5248   linebuffer path;
5249   FILE *pipe;
5250
5251   initbuffer (&path);
5252   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
5253   if (pipe == NULL || readline_internal (&path, pipe) == 0)
5254     pfatal ("pwd");
5255   pclose (pipe);
5256
5257   return path.buffer;
5258 #endif /* not MSDOS */
5259 #endif /* not HAVE_GETCWD */
5260 }
5261
5262 /* Return a newly allocated string containing the file name of FILE
5263    relative to the absolute directory DIR (which should end with a slash). */
5264 char *
5265 relative_filename (file, dir)
5266      char *file, *dir;
5267 {
5268   char *fp, *dp, *afn, *res;
5269   int i;
5270
5271   /* Find the common root of file and dir (with a trailing slash). */
5272   afn = absolute_filename (file, cwd);
5273   fp = afn;
5274   dp = dir;
5275   while (*fp++ == *dp++)
5276     continue;
5277   fp--, dp--;                   /* back to the first differing char */
5278 #ifdef DOS_NT
5279   if (fp == afn && afn[0] != '/') /* cannot build a relative name */
5280     return afn;
5281 #endif
5282   do                            /* look at the equal chars until '/' */
5283     fp--, dp--;
5284   while (*fp != '/');
5285
5286   /* Build a sequence of "../" strings for the resulting relative file name. */
5287   i = 0;
5288   while ((dp = etags_strchr (dp + 1, '/')) != NULL)
5289     i += 1;
5290   res = xnew (3*i + strlen (fp + 1) + 1, char);
5291   res[0] = '\0';
5292   while (i-- > 0)
5293     strcat (res, "../");
5294
5295   /* Add the file name relative to the common root of file and dir. */
5296   strcat (res, fp + 1);
5297   free (afn);
5298
5299   return res;
5300 }
5301
5302 /* Return a newly allocated string containing the absolute file name
5303    of FILE given DIR (which should end with a slash). */
5304 char *
5305 absolute_filename (file, dir)
5306      char *file, *dir;
5307 {
5308   char *slashp, *cp, *res;
5309
5310   if (filename_is_absolute (file))
5311     res = savestr (file);
5312 #ifdef DOS_NT
5313   /* We don't support non-absolute file names with a drive
5314      letter, like `d:NAME' (it's too much hassle).  */
5315   else if (file[1] == ':')
5316     fatal ("%s: relative file names with drive letters not supported", file);
5317 #endif
5318   else
5319     res = concat (dir, file, "");
5320
5321   /* Delete the "/dirname/.." and "/." substrings. */
5322   slashp = etags_strchr (res, '/');
5323   while (slashp != NULL && slashp[0] != '\0')
5324     {
5325       if (slashp[1] == '.')
5326         {
5327           if (slashp[2] == '.'
5328               && (slashp[3] == '/' || slashp[3] == '\0'))
5329             {
5330               cp = slashp;
5331               do
5332                 cp--;
5333               while (cp >= res && !filename_is_absolute (cp));
5334               if (cp < res)
5335                 cp = slashp;    /* the absolute name begins with "/.." */
5336 #ifdef DOS_NT
5337               /* Under MSDOS and NT we get `d:/NAME' as absolute
5338                  file name, so the luser could say `d:/../NAME'.
5339                  We silently treat this as `d:/NAME'.  */
5340               else if (cp[0] != '/')
5341                 cp = slashp;
5342 #endif
5343               strcpy (cp, slashp + 3);
5344               slashp = cp;
5345               continue;
5346             }
5347           else if (slashp[2] == '/' || slashp[2] == '\0')
5348             {
5349               strcpy (slashp, slashp + 2);
5350               continue;
5351             }
5352         }
5353
5354       slashp = etags_strchr (slashp + 1, '/');
5355     }
5356
5357   if (res[0] == '\0')
5358     return savestr ("/");
5359   else
5360     return res;
5361 }
5362
5363 /* Return a newly allocated string containing the absolute
5364    file name of dir where FILE resides given DIR (which should
5365    end with a slash). */
5366 char *
5367 absolute_dirname (file, dir)
5368      char *file, *dir;
5369 {
5370   char *slashp, *res;
5371   char save;
5372
5373   canonicalize_filename (file);
5374   slashp = etags_strrchr (file, '/');
5375   if (slashp == NULL)
5376     return savestr (dir);
5377   save = slashp[1];
5378   slashp[1] = '\0';
5379   res = absolute_filename (file, dir);
5380   slashp[1] = save;
5381
5382   return res;
5383 }
5384
5385 /* Whether the argument string is an absolute file name.  The argument
5386    string must have been canonicalized with canonicalize_filename. */
5387 bool
5388 filename_is_absolute (fn)
5389      char *fn;
5390 {
5391   return (fn[0] == '/'
5392 #ifdef DOS_NT
5393           || (isalpha(fn[0]) && fn[1] == ':' && fn[2] == '/')
5394 #endif
5395           );
5396 }
5397
5398 /* Translate backslashes into slashes.  Works in place. */
5399 void
5400 canonicalize_filename (fn)
5401      register char *fn;
5402 {
5403 #ifdef DOS_NT
5404   /* Canonicalize drive letter case.  */
5405   if (islower (fn[0]))
5406     fn[0] = toupper (fn[0]);
5407   /* Convert backslashes to slashes.  */
5408   for (; *fn != '\0'; fn++)
5409     if (*fn == '\\')
5410       *fn = '/';
5411 #else
5412   /* No action. */
5413   fn = NULL;                    /* shut up the compiler */
5414 #endif
5415 }
5416
5417 /* Increase the size of a linebuffer. */
5418 void
5419 grow_linebuffer (lbp, toksize)
5420      linebuffer *lbp;
5421      int toksize;
5422 {
5423   while (lbp->size < toksize)
5424     lbp->size *= 2;
5425   lbp->buffer = xrnew (lbp->buffer, lbp->size, char);
5426 }
5427
5428 /* Like malloc but get fatal error if memory is exhausted.  */
5429 long *
5430 xmalloc (size)
5431      unsigned int size;
5432 {
5433   long *result = (long *) malloc (size);
5434   if (result == NULL)
5435     fatal ("virtual memory exhausted", (char *)NULL);
5436   return result;
5437 }
5438
5439 long *
5440 xrealloc (ptr, size)
5441      char *ptr;
5442      unsigned int size;
5443 {
5444   long *result =  (long *) realloc (ptr, size);
5445   if (result == NULL)
5446     fatal ("virtual memory exhausted", (char *)NULL);
5447   return result;
5448 }