*** empty log message ***

[bpt/emacs.git] / lib-src / etags.c
diff --git a/lib-src/etags.c b/lib-src/etags.c

index 6b4b379..829fc97 100644 (file)
--- a/lib-src/etags.c
+++ b/lib-src/etags.c
@@ -2,21 +2,21 @@
     Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
     Free Software Foundation, Inc. and Ken Arnold
  
-This file is not considered part of GNU Emacs.
+ This file is not considered part of GNU Emacs.
  
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
  
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
  
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software Foundation,
-Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  
  /*
   * Authors:
@@ -27,14 +27,15 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
   * 1989        Sam Kendall added C++.
   * 1992 Joseph B. Wells improved C and C++ parsing.
   * 1993        Francesco Potortì reorganised C and C++.
- * 1994        Regexp tags by Tom Tromey.
+ * 1994        Line-by-line regexp tags by Tom Tromey.
   * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
   * 2002 #line directives by Francesco Potortì.
   *
- *     Francesco Potortì <pot@gnu.org> has maintained it since 1993.
+ * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
+ *
   */
  
-char pot_etags_version[] = "@(#) pot revision number is 16.10";
+char pot_etags_version[] = "@(#) pot revision number is 16.56";
  
  #define        TRUE    1
  #define        FALSE   0
@@ -176,25 +177,18 @@ If you want regular expression support, you should delete this notice and
  # define CTAGS FALSE
  #endif
  
-/* Exit codes for success and failure.  */
-#ifdef VMS
-# define       GOOD    1
-# define       BAD     0
-#else
-# define       GOOD    0
-# define       BAD     1
-#endif
-
  #define streq(s,t)     (assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
+#define strcaseeq(s,t) (assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
  #define strneq(s,t,n)  (assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
+#define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
  
  #define CHARS 256              /* 2^sizeof(char) */
  #define CHAR(x)                ((unsigned int)(x) & (CHARS - 1))
-#define        iswhite(c)      (_wht[CHAR(c)]) /* c is white */
-#define notinname(c)   (_nin[CHAR(c)]) /* c is not in a name */
-#define        begtoken(c)     (_btk[CHAR(c)]) /* c can start token */
-#define        intoken(c)      (_itk[CHAR(c)]) /* c can be in token */
-#define        endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens */
+#define        iswhite(c)      (_wht[CHAR(c)]) /* c is white (see white) */
+#define notinname(c)   (_nin[CHAR(c)]) /* c is not in a name (see nonam) */
+#define        begtoken(c)     (_btk[CHAR(c)]) /* c can start token (see begtk) */
+#define        intoken(c)      (_itk[CHAR(c)]) /* c can be in token (see midtk) */
+#define        endtoken(c)     (_etk[CHAR(c)]) /* c ends tokens (see endtk) */
  
  #define ISALNUM(c)     isalnum (CHAR(c))
  #define ISALPHA(c)     isalpha (CHAR(c))
@@ -236,11 +230,12 @@ typedef struct
  typedef struct
  {
    char *name;                  /* language name */
-  bool metasource;             /* source used to generate other sources */
+  char *help;                   /* detailed help for the language */
    Lang_function *function;     /* parse function */
-  char **filenames;            /* names of this language's files */
    char **suffixes;             /* name suffixes of this language's files */
+  char **filenames;            /* names of this language's files */
    char **interpreters;         /* interpreters for this language */
+  bool metasource;             /* source used to generate other sources */
  } language;
  
  typedef struct fdesc
@@ -253,6 +248,7 @@ typedef struct fdesc
    language *lang;              /* language of file */
    char *prop;                  /* file properties to write in tagfile */
    bool usecharno;              /* etags tags shall contain char number */
+  bool written;                        /* entry written in the tags file */
  } fdesc;
  
  typedef struct node_st
@@ -260,9 +256,9 @@ typedef struct node_st
    struct node_st *left, *right;        /* left and right sons */
    fdesc *fdp;                  /* description of file to whom tag belongs */
    char *name;                  /* tag name */
-  char *pat;                   /* search pattern */
+  char *regex;                 /* search regexp */
    bool valid;                  /* write this tag on the tag file */
-  bool is_func;                        /* function tag: use pattern in CTAGS mode */
+  bool is_func;                        /* function tag: use regexp in CTAGS mode */
    bool been_warned;            /* warning already given for duplicated tag */
    int lno;                     /* line number tag is on */
    long cno;                    /* character number line starts on */
@@ -288,9 +284,9 @@ typedef struct
    enum {
      at_language,               /* a language specification */
      at_regexp,                 /* a regular expression */
-    at_icregexp,               /* same, but with case ignored */
      at_filename,               /* a file name */
-    at_stdin                   /* read from stdin here */
+    at_stdin,                  /* read from stdin here */
+    at_end                     /* stop parsing the list */
    } arg_type;                  /* argument type */
    language *lang;              /* language associated with the argument */
    char *what;                  /* the argument itself */
@@ -298,17 +294,19 @@ typedef struct
  
  #ifdef ETAGS_REGEXPS
  /* Structure defining a regular expression. */
-typedef struct pattern
+typedef struct regexp
  {
-  struct pattern *p_next;
-  language *lang;
-  char *regex;
-  struct re_pattern_buffer *pat;
-  struct re_registers regs;
-  char *name_pattern;
-  bool error_signaled;
-  bool ignore_case;
-} pattern;
+  struct regexp *p_next;       /* pointer to next in list */
+  language *lang;              /* if set, use only for this language */
+  char *pattern;               /* the regexp pattern */
+  char *name;                  /* tag name */
+  struct re_pattern_buffer *pat; /* the compiled pattern */
+  struct re_registers regs;    /* re registers */
+  bool error_signaled;         /* already signaled for this regexp */
+  bool force_explicit_name;    /* do not allow implict tag name */
+  bool ignore_case;            /* ignore case when matching */
+  bool multi_line;             /* do a multi-line match on the whole file */
+} regexp;
  #endif /* ETAGS_REGEXPS */
  
  
@@ -326,23 +324,24 @@ static void Cplusplus_entries __P((FILE *));
  static void Cstar_entries __P((FILE *));
  static void Erlang_functions __P((FILE *));
  static void Fortran_functions __P((FILE *));
-static void Yacc_entries __P((FILE *));
+static void HTML_labels __P((FILE *));
  static void Lisp_functions __P((FILE *));
  static void Makefile_targets __P((FILE *));
  static void Pascal_functions __P((FILE *));
  static void Perl_functions __P((FILE *));
  static void PHP_functions __P((FILE *));
-static void Postscript_functions __P((FILE *));
+static void PS_functions __P((FILE *));
  static void Prolog_functions __P((FILE *));
  static void Python_functions __P((FILE *));
  static void Scheme_functions __P((FILE *));
  static void TeX_commands __P((FILE *));
  static void Texinfo_nodes __P((FILE *));
+static void Yacc_entries __P((FILE *));
  static void just_read_file __P((FILE *));
  
  static void print_language_names __P((void));
  static void print_version __P((void));
-static void print_help __P((void));
+static void print_help __P((argument *));
  int main __P((int, char **));
  
  static compressor *get_compressor_from_suffix __P((char *, char **));
@@ -352,12 +351,12 @@ static language *get_language_from_filename __P((char *, bool));
  static void readline __P((linebuffer *, FILE *));
  static long readline_internal __P((linebuffer *, FILE *));
  static bool nocase_tail __P((char *));
-static char *get_tag __P((char *));
+static void get_tag __P((char *, char **));
  
  #ifdef ETAGS_REGEXPS
-static void analyse_regex __P((char *, bool));
-static void add_regex __P((char *, bool, language *));
-static void free_patterns __P((void));
+static void analyse_regex __P((char *));
+static void free_regexps __P((void));
+static void regex_tag_multiline __P((void));
  #endif /* ETAGS_REGEXPS */
  static void error __P((const char *, const char *));
  static void suggest_asking_for_help __P((void));
@@ -366,14 +365,13 @@ static void pfatal __P((char *));
  static void add_node __P((node *, node **));
  
  static void init __P((void));
-static void initbuffer __P((linebuffer *));
  static void process_file_name __P((char *, language *));
  static void process_file __P((FILE *, char *, language *));
  static void find_entries __P((FILE *));
  static void free_tree __P((node *));
  static void free_fdesc __P((fdesc *));
  static void pfnote __P((char *, bool, char *, int, int, long));
-static void new_pfnote __P((char *, int, bool, char *, int, int, long));
+static void make_tag __P((char *, int, bool, char *, int, int, long));
  static void invalidate_nodes __P((fdesc *, node **));
  static void put_entries __P((node *));
  
@@ -384,13 +382,15 @@ static char *savenstr __P((char *, int));
  static char *savestr __P((char *));
  static char *etags_strchr __P((const char *, int));
  static char *etags_strrchr __P((const char *, int));
-static bool strcaseeq __P((const char *, const char *));
+static int etags_strcasecmp __P((const char *, const char *));
+static int etags_strncasecmp __P((const char *, const char *, int));
  static char *etags_getcwd __P((void));
  static char *relative_filename __P((char *, char *));
  static char *absolute_filename __P((char *, char *));
  static char *absolute_dirname __P((char *, char *));
  static bool filename_is_absolute __P((char *f));
  static void canonicalize_filename __P((char *));
+static void linebuffer_init __P((linebuffer *));
  static void linebuffer_setlen __P((linebuffer *, int));
  static PTR xmalloc __P((unsigned int));
  static PTR xrealloc __P((char *, unsigned int));
@@ -417,6 +417,8 @@ static node *nodehead;              /* the head of the binary tree of tags */
  static node *last_node;                /* the last node created */
  
  static linebuffer lb;          /* the current line */
+static linebuffer filebuf;     /* a buffer containing the whole file */
+static linebuffer token_name;  /* a buffer containing a tag name */
  
  /* boolean "functions" (see init)      */
  static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
@@ -424,7 +426,7 @@ static char
    /* white chars */
    *white = " \f\t\n\r\v",
    /* not in a name */
-  *nonam = " \f\t\n\r()=,;",
+  *nonam = " \f\t\n\r()=,;",   /* look at make_tag before modifying! */
    /* token ending chars */
    *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
    /* token starting chars */
@@ -450,20 +452,17 @@ static bool vgrind_style; /* -v: create vgrind style index output */
  static bool no_warnings;       /* -w: suppress warnings */
  static bool cxref_style;       /* -x: create cxref style output */
  static bool cplusplus;         /* .[hc] means C++, not C */
-static bool noindentypedefs;   /* -I: ignore indentation in C */
+static bool ignoreindent;      /* -I: ignore indentation in C */
  static bool packages_only;     /* --packages-only: in Ada, only tag packages*/
  
  #define STDIN 0x1001           /* returned by getopt_long on --parse-stdin */
  static bool parsing_stdin;     /* --parse-stdin used */
  
  #ifdef ETAGS_REGEXPS
-/* List of all regexps. */
-static pattern *p_head;
-
-/* How many characters in the character set.  (From regex.c.)  */
-#define CHAR_SET_SIZE 256
-/* Translation table for case-insensitive matching. */
-static char lc_trans[CHAR_SET_SIZE];
+static regexp *p_head;         /* list of all regexps */
+static bool need_filebuf;      /* some regexes are multi-line */
+#else
+# define need_filebuf FALSE
  #endif /* ETAGS_REGEXPS */
  
  #ifdef LONG_OPTIONS
@@ -526,6 +525,22 @@ static compressor compressors[] =
  /* Ada code */
  static char *Ada_suffixes [] =
    { "ads", "adb", "ada", NULL };
+static char Ada_help [] =
+"In Ada code, functions, procedures, packages, tasks and types are\n\
+tags.  Use the `--packages-only' option to create tags for\n\
+packages only.\n\
+Ada tag names have suffixes indicating the type of entity:\n\
+       Entity type:    Qualifier:\n\
+       ------------    ----------\n\
+       function        /f\n\
+       procedure       /p\n\
+       package spec    /s\n\
+       package body    /b\n\
+       type            /t\n\
+       task            /k\n\
+Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
+body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
+will just search for any tag `bidule'.";
  
  /* Assembly code */
  static char *Asm_suffixes [] =
@@ -539,79 +554,187 @@ static char *Asm_suffixes [] =
      "src", /* BSO/Tasking C compiler output */
      NULL
    };
+static char Asm_help [] =
+"In assembler code, labels appearing at the beginning of a line,\n\
+followed by a colon, are tags.";
+
  
  /* Note that .c and .h can be considered C++, if the --c++ flag was
-   given, or if the `class' keyowrd is met inside the file.
+   given, or if the `class' or `template' keyowrds are met inside the file.
     That is why default_C_entries is called for these. */
  static char *default_C_suffixes [] =
    { "c", "h", NULL };
+static char default_C_help [] =
+"In C code, any C function or typedef is a tag, and so are\n\
+definitions of `struct', `union' and `enum'.  `#define' macro\n\
+definitions and `enum' constants are tags unless you specify\n\
+`--no-defines'.  Global variables are tags unless you specify\n\
+`--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
+can make the tags table file much smaller.\n\
+You can tag function declarations and external variables by\n\
+using `--declarations', and struct members by using `--members'.";
  
  static char *Cplusplus_suffixes [] =
    { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
      "M",                       /* Objective C++ */
      "pdb",                     /* Postscript with C syntax */
      NULL };
+static char Cplusplus_help [] =
+"In C++ code, all the tag constructs of C code are tagged.  (Use\n\
+--help --lang=c --lang=c++ for full help.)\n\
+In addition to C tags, member functions are also recognized, and\n\
+optionally member variables if you use the `--members' option.\n\
+Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
+and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
+`operator+'.";
  
  static char *Cjava_suffixes [] =
    { "java", NULL };
+static char Cjava_help [] =
+"In Java code, all the tags constructs of C and C++ code are\n\
+tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
+
  
  static char *Cobol_suffixes [] =
    { "COB", "cob", NULL };
+static char Cobol_help [] =
+"In Cobol code, tags are paragraph names; that is, any word\n\
+starting in column 8 and followed by a period.";
  
  static char *Cstar_suffixes [] =
    { "cs", "hs", NULL };
  
  static char *Erlang_suffixes [] =
    { "erl", "hrl", NULL };
+static char Erlang_help [] =
+"In Erlang code, the tags are the functions, records and macros\n\
+defined in the file.";
  
  static char *Fortran_suffixes [] =
    { "F", "f", "f90", "for", NULL };
+static char Fortran_help [] =
+"In Fortran code, functions, subroutines and block data are tags.";
+
+static char *HTML_suffixes [] =
+  { "htm", "html", "shtml", NULL };
+static char HTML_help [] =
+"In HTML input files, the tags are the `title' and the `h1', `h2',\n\
+`h3' headers.  Also, tags are `name=' in anchors and all\n\
+occurrences of `id='.";
  
  static char *Lisp_suffixes [] =
    { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
+static char Lisp_help [] =
+"In Lisp code, any function defined with `defun', any variable\n\
+defined with `defvar' or `defconst', and in general the first\n\
+argument of any expression that starts with `(def' in column zero\n\
+is a tag.";
  
  static char *Makefile_filenames [] =
    { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
+static char Makefile_help [] =
+"In makefiles, targets are tags; additionally, variables are tags\n\
+unless you specify `--no-globals'.";
+
+static char *Objc_suffixes [] =
+  { "lm",                      /* Objective lex file */
+    "m",                       /* Objective C file */
+     NULL };
+static char Objc_help [] =
+"In Objective C code, tags include Objective C definitions for classes,\n\
+class categories, methods and protocols.  Tags for variables and\n\
+functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
  
  static char *Pascal_suffixes [] =
    { "p", "pas", NULL };
+static char Pascal_help [] =
+"In Pascal code, the tags are the functions and procedures defined\n\
+in the file.";
  
  static char *Perl_suffixes [] =
    { "pl", "pm", NULL };
-
  static char *Perl_interpreters [] =
    { "perl", "@PERL@", NULL };
+static char Perl_help [] =
+"In Perl code, the tags are the packages, subroutines and variables\n\
+defined by the `package', `sub', `my' and `local' keywords.  Use\n\
+`--globals' if you want to tag global variables.  Tags for\n\
+subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
+defined in the default package is `main::SUB'.";
  
  static char *PHP_suffixes [] =
    { "php", "php3", "php4", NULL };
+static char PHP_help [] =
+"In PHP code, tags are functions, classes and defines.  When using\n\
+the `--members' option, vars are tags too.";
  
  static char *plain_C_suffixes [] =
-  { "lm",                      /* Objective lex file */
-    "m",                       /* Objective C file */
-    "pc",                      /* Pro*C file */
+  { "pc",                      /* Pro*C file */
       NULL };
  
-static char *Postscript_suffixes [] =
+static char *PS_suffixes [] =
    { "ps", "psw", NULL };       /* .psw is for PSWrap */
+static char PS_help [] =
+"In PostScript code, the tags are the functions.";
  
  static char *Prolog_suffixes [] =
    { "prolog", NULL };
+static char Prolog_help [] =
+"In Prolog code, tags are predicates and rules at the beginning of\n\
+line.";
  
  static char *Python_suffixes [] =
    { "py", NULL };
+static char Python_help [] =
+"In Python code, `def' or `class' at the beginning of a line\n\
+generate a tag.";
  
  /* Can't do the `SCM' or `scm' prefix with a version number. */
  static char *Scheme_suffixes [] =
    { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
+static char Scheme_help [] =
+"In Scheme code, tags include anything defined with `def' or with a\n\
+construct whose name starts with `def'.  They also include\n\
+variables set with `set!' at top level in the file.";
  
  static char *TeX_suffixes [] =
    { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
+static char TeX_help [] =
+"In LaTeX text, the argument of any of the commands `\\chapter',\n\
+`\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
+`\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
+`\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
+`\\newenvironment' or `\\renewenvironment' is a tag.\n\
+\n\
+Other commands can be specified by setting the environment variable\n\
+`TEXTAGS' to a colon-separated list like, for example,\n\
+     TEXTAGS=\"mycommand:myothercommand\".";
+
  
  static char *Texinfo_suffixes [] =
    { "texi", "texinfo", "txi", NULL };
+static char Texinfo_help [] =
+"for texinfo files, lines starting with @node are tagged.";
  
  static char *Yacc_suffixes [] =
    { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
+static char Yacc_help [] =
+"In Bison or Yacc input files, each rule defines as a tag the\n\
+nonterminal it constructs.  The portions of the file that contain\n\
+C code are parsed as C code (use --help --lang=c --lang=yacc\n\
+for full help).";
+
+static char auto_help [] =
+"`auto' is not a real language, it indicates to use\n\
+a default language for files base on file name suffix and file contents.";
+
+static char none_help [] =
+"`none' is not a real language, it indicates to only do\n\
+regexp processing on files.";
+
+static char no_lang_help [] =
+"No detailed help available for this language.";
+
  
  /*
   * Table of languages.
@@ -622,31 +745,33 @@ static char *Yacc_suffixes [] =
  
  static language lang_names [] =
  {
-  { "ada",      FALSE, Ada_funcs,            NULL, Ada_suffixes,        NULL },
-  { "asm",      FALSE, Asm_labels,           NULL, Asm_suffixes,        NULL },
-  { "c",        FALSE, default_C_entries,    NULL, default_C_suffixes,  NULL },
-  { "c++",      FALSE, Cplusplus_entries,    NULL, Cplusplus_suffixes,  NULL },
-  { "c*",       FALSE, Cstar_entries,        NULL, Cstar_suffixes,      NULL },
-  { "cobol",    FALSE, Cobol_paragraphs,     NULL, Cobol_suffixes,      NULL },
-  { "erlang",   FALSE, Erlang_functions,     NULL, Erlang_suffixes,     NULL },
-  { "fortran",  FALSE, Fortran_functions,    NULL, Fortran_suffixes,    NULL },
-  { "java",     FALSE, Cjava_entries,        NULL, Cjava_suffixes,      NULL },
-  { "lisp",     FALSE, Lisp_functions,       NULL, Lisp_suffixes,       NULL },
-  { "makefile", FALSE, Makefile_targets,     Makefile_filenames, NULL,  NULL },
-  { "pascal",   FALSE, Pascal_functions,     NULL, Pascal_suffixes,     NULL },
-  { "perl",     FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
-  { "php",      FALSE, PHP_functions,        NULL, PHP_suffixes,        NULL },
-  { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
-  { "proc",     FALSE, plain_C_entries,      NULL, plain_C_suffixes,    NULL },
-  { "prolog",   FALSE, Prolog_functions,     NULL, Prolog_suffixes,     NULL },
-  { "python",   FALSE, Python_functions,     NULL, Python_suffixes,     NULL },
-  { "scheme",   FALSE, Scheme_functions,     NULL, Scheme_suffixes,     NULL },
-  { "tex",      FALSE, TeX_commands,         NULL, TeX_suffixes,        NULL },
-  { "texinfo",  FALSE, Texinfo_nodes,        NULL, Texinfo_suffixes,    NULL },
-  { "yacc",      TRUE, Yacc_entries,         NULL, Yacc_suffixes,       NULL },
-  { "auto", FALSE, NULL },             /* default guessing scheme */
-  { "none", FALSE, just_read_file },   /* regexp matching only */
-  { NULL, FALSE, NULL }                /* end of list */
+  { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
+  { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
+  { "c",         default_C_help, default_C_entries, default_C_suffixes },
+  { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
+  { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
+  { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
+  { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
+  { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
+  { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
+  { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
+  { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
+  { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
+  { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
+  { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
+  { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
+  { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
+  { "postscript",PS_help,        PS_functions,      PS_suffixes        },
+  { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
+  { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
+  { "python",    Python_help,    Python_functions,  Python_suffixes    },
+  { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
+  { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
+  { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
+  { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
+  { "auto",      auto_help },                      /* default guessing scheme */
+  { "none",      none_help,      just_read_file }, /* regexp matching only */
+  { NULL }                /* end of list */
  };
  
  \f
@@ -669,21 +794,25 @@ default file names and dot suffixes:");
           printf (" .%s", *ext);
        puts ("");
      }
-  puts ("Where `auto' means use default language for files based on file\n\
+  puts ("where `auto' means use default language for files based on file\n\
  name suffix, and `none' means only do regexp processing on files.\n\
  If no language is specified and no matching suffix is found,\n\
  the first line of the file is read for a sharp-bang (#!) sequence\n\
  followed by the name of an interpreter.  If no such sequence is found,\n\
  Fortran is tried first; if no tags are found, C is tried next.\n\
-When parsing any C file, a \"class\" keyword switches to C++.\n\
-Compressed files are supported using gzip and bzip2.");
+When parsing any C file, a \"class\" or \"template\" keyword\n\
+switches to C++.");
+  puts ("Compressed files are supported using gzip and bzip2.\n\
+\n\
+For detailed help on a given language use, for example,\n\
+etags --help --lang=ada.");
  }
  
  #ifndef EMACS_NAME
-# define EMACS_NAME "GNU Emacs"
+# define EMACS_NAME "standalone"
  #endif
  #ifndef VERSION
-# define VERSION "21"
+# define VERSION "version"
  #endif
  static void
  print_version ()
@@ -692,12 +821,27 @@ print_version ()
    puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
    puts ("This program is distributed under the same terms as Emacs");
  
-  exit (GOOD);
+  exit (EXIT_SUCCESS);
  }
  
  static void
-print_help ()
+print_help (argbuffer)
+     argument *argbuffer;
  {
+  bool help_for_lang = FALSE;
+
+  for (; argbuffer->arg_type != at_end; argbuffer++)
+    if (argbuffer->arg_type == at_language)
+      {
+       if (help_for_lang)
+         puts ("");
+       puts (argbuffer->lang->help);
+       help_for_lang = TRUE;
+      }
+
+  if (help_for_lang)
+    exit (EXIT_SUCCESS);
+
    printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
  \n\
  These are the options accepted by %s.\n", progname, progname);
@@ -726,7 +870,7 @@ Relative ones are stored relative to the output file's directory.\n");
    /* This option is mostly obsolete, because etags can now automatically
       detect C++.  Retained for backward compatibility and for debugging and
       experimentation.  In principle, we could want to tag as C++ even
-     before any "class" keyword.
+     before any "class" or "template" keyword.
    puts ("-C, --c++\n\
          Treat files whose name suffix defaults to C language as C++ files.");
    */
@@ -765,27 +909,27 @@ Relative ones are stored relative to the output file's directory.\n");
         Do not create tag entries for global variables in some\n\
         languages.  This makes the tags file smaller.");
    puts ("--members\n\
-       Create tag entries for member variables in C and derived languages.");
+       Create tag entries for members of structures in some languages.");
  
  #ifdef ETAGS_REGEXPS
-  puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
-        Make a tag for each line matching pattern REGEXP in the following\n\
-       files.  {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
-       regexfile is a file containing one REGEXP per line.\n\
-       REGEXP is anchored (as if preceded by ^).\n\
-       The form /REGEXP/NAME/ creates a named tag.\n\
+  puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
+        Make a tag for each line matching a regular expression pattern\n\
+       in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
+       files only.  REGEXFILE is a file containing one REGEXP per line.\n\
+       REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
+       optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
+  puts ("      If TAGNAME/ is present, the tags created are named.\n\
         For example Tcl named tags can be created with:\n\
-       --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
-  puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
-        Like -r, --regex but ignore case when matching expressions.");
+         --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
+       MODS are optional one-letter modifiers: `i' means to ignore case,\n\
+       `m' means to allow multi-line matches, `s' implies `m' and\n\
+       causes dot to match any character, including newline.");
    puts ("-R, --no-regex\n\
          Don't create tags from regexps for the following files.");
  #endif /* ETAGS_REGEXPS */
    puts ("-I, --ignore-indentation\n\
-        Don't rely on indentation quite as much as normal.  Currently,\n\
-        this means not to assume that a closing brace in the first\n\
-        column is the final brace of a function or structure\n\
-        definition in C and C++.");
+        In C and C++ do not assume that a closing brace in the first\n\
+        column is the final brace of a function or structure definition.");
    puts ("-o FILE, --output=FILE\n\
          Write the tags to FILE.");
    puts ("--parse-stdin=NAME\n\
@@ -828,14 +972,16 @@ Relative ones are stored relative to the output file's directory.\n");
    puts ("-V, --version\n\
          Print the version of the program.\n\
  -h, --help\n\
-        Print this help message.");
+        Print this help message.\n\
+        Followed by one or more `--language' options prints detailed\n\
+        help about tag generation for the specified languages.");
  
    print_language_names ();
  
    puts ("");
    puts ("Report bugs to bug-gnu-emacs@gnu.org");
  
-  exit (GOOD);
+  exit (EXIT_SUCCESS);
  }
  
  \f
@@ -975,6 +1121,7 @@ main (argc, argv)
    argument *argbuffer;
    int current_arg, file_count;
    linebuffer filename_lb;
+  bool help_asked = FALSE;
  #ifdef VMS
    bool got_err;
  #endif
@@ -996,14 +1143,6 @@ main (argc, argv)
       is small. */
    argbuffer = xnew (argc, argument);
  
-#ifdef ETAGS_REGEXPS
-  /* Set syntax for regular expression routines. */
-  re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
-  /* Translation table for case-insensitive search. */
-  for (i = 0; i < CHAR_SET_SIZE; i++)
-    lc_trans[i] = lowcase (i);
-#endif /* ETAGS_REGEXPS */
-
    /*
     * If etags, always find typedefs and structure tags.  Why not?
     * Also default to find macro constants, enum constants and
@@ -1061,12 +1200,13 @@ main (argc, argv)
           {
             error ("-o option may only be given once.", (char *)NULL);
             suggest_asking_for_help ();
+           /* NOTREACHED */
           }
         tagfile = optarg;
         break;
        case 'I':
        case 'S':                /* for backward compatibility */
-       noindentypedefs = TRUE;
+       ignoreindent = TRUE;
         break;
        case 'l':
         {
@@ -1079,6 +1219,10 @@ main (argc, argv)
             }
         }
         break;
+      case 'c':
+       /* Backward compatibility: support obsolete --ignore-case-regexp. */
+       optarg = concat (optarg, "i", ""); /* memory leak here */
+       /* FALLTHRU */
        case 'r':
         argbuffer[current_arg].arg_type = at_regexp;
         argbuffer[current_arg].what = optarg;
@@ -1089,17 +1233,12 @@ main (argc, argv)
         argbuffer[current_arg].what = NULL;
         ++current_arg;
         break;
-      case 'c':
-       argbuffer[current_arg].arg_type = at_icregexp;
-       argbuffer[current_arg].what = optarg;
-       ++current_arg;
-       break;
        case 'V':
         print_version ();
         break;
        case 'h':
        case 'H':
-       print_help ();
+       help_asked = TRUE;
         break;
  
         /* Etags options */
@@ -1118,9 +1257,10 @@ main (argc, argv)
        case 'w': no_warnings = TRUE;                            break;
        default:
         suggest_asking_for_help ();
+       /* NOTREACHED */
        }
  
-  for (; optind < argc; ++optind)
+  for (; optind < argc; optind++)
      {
        argbuffer[current_arg].arg_type = at_filename;
        argbuffer[current_arg].what = argv[optind];
@@ -1128,10 +1268,17 @@ main (argc, argv)
        ++file_count;
      }
  
+  argbuffer[current_arg].arg_type = at_end;
+
+  if (help_asked)
+    print_help (argbuffer);
+    /* NOTREACHED */
+
    if (nincluded_files == 0 && file_count == 0)
      {
        error ("no input files specified.", (char *)NULL);
        suggest_asking_for_help ();
+      /* NOTREACHED */
      }
  
    if (tagfile == NULL)
@@ -1150,8 +1297,10 @@ main (argc, argv)
  
    init ();                     /* set up boolean "functions" */
  
-  initbuffer (&lb);
-  initbuffer (&filename_lb);
+  linebuffer_init (&lb);
+  linebuffer_init (&filename_lb);
+  linebuffer_init (&filebuf);
+  linebuffer_init (&token_name);
  
    if (!CTAGS)
      {
@@ -1174,7 +1323,7 @@ main (argc, argv)
    /*
     * Loop through files finding functions.
     */
-  for (i = 0; i < current_arg; ++i)
+  for (i = 0; i < current_arg; i++)
      {
        static language *lang;   /* non-NULL if language is forced */
        char *this_file;
@@ -1186,10 +1335,7 @@ main (argc, argv)
           break;
  #ifdef ETAGS_REGEXPS
         case at_regexp:
-         analyse_regex (argbuffer[i].what, FALSE);
-         break;
-       case at_icregexp:
-         analyse_regex (argbuffer[i].what, TRUE);
+         analyse_regex (argbuffer[i].what);
           break;
  #endif
         case at_filename:
@@ -1232,21 +1378,33 @@ main (argc, argv)
      }
  
  #ifdef ETAGS_REGEXPS
-  free_patterns ();
+  free_regexps ();
  #endif /* ETAGS_REGEXPS */
+  free (lb.buffer);
+  free (filebuf.buffer);
+  free (token_name.buffer);
  
    if (!CTAGS || cxref_style)
      {
-      put_entries (nodehead);
+      put_entries (nodehead);  /* write the remainig tags (ETAGS) */
        free_tree (nodehead);
        nodehead = NULL;
        if (!CTAGS)
-       while (nincluded_files-- > 0)
-         fprintf (tagf, "\f\n%s,include\n", *included_files++);
+       {
+         fdesc *fdp;
+
+         /* Output file entries that have no tags. */
+         for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
+           if (!fdp->written)
+             fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
+
+         while (nincluded_files-- > 0)
+           fprintf (tagf, "\f\n%s,include\n", *included_files++);
+       }
  
        if (fclose (tagf) == EOF)
         pfatal (tagfile);
-      exit (GOOD);
+      exit (EXIT_SUCCESS);
      }
  
    if (update)
@@ -1265,7 +1423,7 @@ main (argc, argv)
           sprintf (cmd,
                    "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
                    tagfile, argbuffer[i].what, tagfile);
-         if (system (cmd) != GOOD)
+         if (system (cmd) != EXIT_SUCCESS)
             fatal ("failed to execute shell command", (char *)NULL);
         }
        append_to_tagfile = TRUE;
@@ -1274,7 +1432,7 @@ main (argc, argv)
    tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
    if (tagf == NULL)
      pfatal (tagfile);
-  put_entries (nodehead);
+  put_entries (nodehead);      /* write all the tags (CTAGS) */
    free_tree (nodehead);
    nodehead = NULL;
    if (fclose (tagf) == EOF)
@@ -1286,7 +1444,7 @@ main (argc, argv)
        sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
        exit (system (cmd));
      }
-  return GOOD;
+  return EXIT_SUCCESS;
  }
  
  
@@ -1571,6 +1729,7 @@ process_file (fh, fn, lang)
      }
    fdp->usecharno = TRUE;       /* use char position when making tags */
    fdp->prop = NULL;
+  fdp->written = FALSE;                /* not written on tags file yet */
  
    fdhead = fdp;
    curfdp = fdhead;             /* the current file description */
@@ -1648,7 +1807,6 @@ find_entries (inf)
       FILE *inf;
  {
    char *cp;
-  node *old_last_node;
    language *lang = curfdp->lang;
    Lang_function *parser = NULL;
  
@@ -1703,7 +1861,7 @@ find_entries (inf)
    /* We rewind here, even if inf may be a pipe.  We fail if the
       length of the first line is longer than the pipe block size,
       which is unlikely. */
-    rewind (inf);
+  rewind (inf);
  
    /* Else try to guess the language given the case insensitive file name. */
    if (parser == NULL)
@@ -1716,6 +1874,26 @@ find_entries (inf)
         }
      }
  
+  /* Else try Fortran or C. */
+  if (parser == NULL)
+    {
+      node *old_last_node = last_node;
+
+      curfdp->lang = get_language_from_langname ("fortran");
+      find_entries (inf);
+
+      if (old_last_node == last_node)
+       /* No Fortran entries found.  Try C. */
+       {
+         /* We do not tag if rewind fails.
+            Only the file name will be recorded in the tags file. */
+         rewind (inf);
+         curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
+         find_entries (inf);
+       }
+      return;
+    }
+
    if (!no_line_directive
        && curfdp->lang != NULL && curfdp->lang->metasource)
      /* It may be that this is a bingo.y file, and we already parsed a bingo.c
@@ -1733,12 +1911,8 @@ find_entries (inf)
           {
             fdesc *badfdp = *fdpp;
  
-           if (DEBUG)
-             fprintf (stderr,
-                      "Removing references to \"%s\" obtained from \"%s\"\n",
-                      badfdp->taggedfname, badfdp->infname);
-
-           /* Delete the tags referring to badfdp. */
+           /* Delete the tags referring to badfdp->taggedfname
+              that were obtained from badfdp->infname. */
             invalidate_nodes (badfdp, &nodehead);
  
             *fdpp = badfdp->next; /* remove the bad description from the list */
@@ -1748,35 +1922,89 @@ find_entries (inf)
           fdpp = &(*fdpp)->next; /* advance the list pointer */
      }
  
-  if (parser != NULL)
-    {
-      /* Generic initialisations before reading from file. */
-      lineno = 0;              /* reset global line number */
-      charno = 0;              /* reset global char number */
-      linecharno = 0;          /* reset global char number of line start */
+  assert (parser != NULL);
  
-      parser (inf);
-      return;
-    }
+  /* Generic initialisations before reading from file. */
+  linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
  
-  /* Else try Fortran. */
-  old_last_node = last_node;
-  curfdp->lang = get_language_from_langname ("fortran");
-  find_entries (inf);
+  /* Generic initialisations before parsing file with readline. */
+  lineno = 0;                 /* reset global line number */
+  charno = 0;                 /* reset global char number */
+  linecharno = 0;             /* reset global char number of line start */
+
+  parser (inf);
+
+#ifdef ETAGS_REGEXPS
+  regex_tag_multiline ();
+#endif /* ETAGS_REGEXPS */
+}
+
+\f
+/*
+ * Check whether an implicitly named tag should be created,
+ * then call `pfnote'.
+ * NAME is a string that is internally copied by this function.
+ *
+ * TAGS format specification
+ * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
+ * The following is explained in some more detail in etc/ETAGS.EBNF.
+ *
+ * make_tag creates tags with "implicit tag names" (unnamed tags)
+ * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
+ *  1. NAME does not contain any of the characters in NONAM;
+ *  2. LINESTART contains name as either a rightmost, or rightmost but
+ *     one character, substring;
+ *  3. the character, if any, immediately before NAME in LINESTART must
+ *     be a character in NONAM;
+ *  4. the character, if any, immediately after NAME in LINESTART must
+ *     also be a character in NONAM.
+ *
+ * The implementation uses the notinname() macro, which recognises the
+ * characters stored in the string `nonam'.
+ * etags.el needs to use the same characters that are in NONAM.
+ */
+static void
+make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
+     char *name;               /* tag name, or NULL if unnamed */
+     int namelen;              /* tag length */
+     bool is_func;             /* tag is a function */
+     char *linestart;          /* start of the line where tag is */
+     int linelen;              /* length of the line where tag is */
+     int lno;                  /* line number */
+     long cno;                 /* character number */
+{
+  bool named = (name != NULL && namelen > 0);
  
-  if (old_last_node == last_node)
-    /* No Fortran entries found.  Try C. */
+  if (!CTAGS && named)         /* maybe set named to false */
+    /* Let's try to make an implicit tag name, that is, create an unnamed tag
+       such that etags.el can guess a name from it. */
      {
-      /* We do not tag if rewind fails.
-        Only the file name will be recorded in the tags file. */
-      rewind (inf);
-      curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
-      find_entries (inf);
+      int i;
+      register char *cp = name;
+
+      for (i = 0; i < namelen; i++)
+       if (notinname (*cp++))
+         break;
+      if (i == namelen)                                /* rule #1 */
+       {
+         cp = linestart + linelen - namelen;
+         if (notinname (linestart[linelen-1]))
+           cp -= 1;                            /* rule #4 */
+         if (cp >= linestart                   /* rule #2 */
+             && (cp == linestart
+                 || notinname (cp[-1]))        /* rule #3 */
+             && strneq (name, cp, namelen))    /* rule #2 */
+           named = FALSE;      /* use implicit tag name */
+       }
      }
-  return;
+
+  if (named)
+    name = savenstr (name, namelen);
+  else
+    name = NULL;
+  pfnote (name, is_func, linestart, linelen, lno, cno);
  }
  
-\f
  /* Record a tag. */
  static void
  pfnote (name, is_func, linestart, linelen, lno, cno)
@@ -1789,6 +2017,7 @@ pfnote (name, is_func, linestart, linelen, lno, cno)
  {
    register node *np;
  
+  assert (name == NULL || name[0] != '\0');
    if (CTAGS && name == NULL)
      return;
  
@@ -1823,72 +2052,16 @@ pfnote (name, is_func, linestart, linelen, lno, cno)
    if (CTAGS && !cxref_style)
      {
        if (strlen (linestart) < 50)
-       np->pat = concat (linestart, "$", "");
+       np->regex = concat (linestart, "$", "");
        else
-       np->pat = savenstr (linestart, 50);
+       np->regex = savenstr (linestart, 50);
      }
    else
-    np->pat = savenstr (linestart, linelen);
+    np->regex = savenstr (linestart, linelen);
  
    add_node (np, &nodehead);
  }
  
-/*
- * TAGS format specification
- * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
- *
- * pfnote should emit the optimized form [unnamed tag] only if:
- *  1. name does not contain any of the characters " \t\r\n(),;";
- *  2. linestart contains name as either a rightmost, or rightmost but
- *     one character, substring;
- *  3. the character, if any, immediately before name in linestart must
- *     be one of the characters " \t(),;";
- *  4. the character, if any, immediately after name in linestart must
- *     also be one of the characters " \t(),;".
- *
- * The real implementation uses the notinname() macro, which recognises
- * characters slightly different from " \t\r\n(),;".  See the variable
- * `nonam'.
- */
-#define traditional_tag_style TRUE
-static void
-new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
-     char *name;               /* tag name, or NULL if unnamed */
-     int namelen;              /* tag length */
-     bool is_func;             /* tag is a function */
-     char *linestart;          /* start of the line where tag is */
-     int linelen;              /* length of the line where tag is */
-     int lno;                  /* line number */
-     long cno;                 /* character number */
-{
-  register char *cp;
-  bool named;
-
-  named = TRUE;
-  if (!CTAGS)
-    {
-      for (cp = name; !notinname (*cp); cp++)
-       continue;
-      if (*cp == '\0')                         /* rule #1 */
-       {
-         cp = linestart + linelen - namelen;
-         if (notinname (linestart[linelen-1]))
-           cp -= 1;                            /* rule #4 */
-         if (cp >= linestart                   /* rule #2 */
-             && (cp == linestart
-                 || notinname (cp[-1]))        /* rule #3 */
-             && strneq (name, cp, namelen))    /* rule #2 */
-           named = FALSE;      /* use unnamed tag */
-       }
-    }
-
-  if (named)
-    name = savenstr (name, namelen);
-  else
-    name = NULL;
-  pfnote (name, is_func, linestart, linelen, lno, cno);
-}
-
  /*
   * free_tree ()
   *     recurse on left children, iterate on right children.
@@ -1903,7 +2076,7 @@ free_tree (np)
        free_tree (np->left);
        if (np->name != NULL)
         free (np->name);
-      free (np->pat);
+      free (np->regex);
        free (np);
        np = node_right;
      }
@@ -2030,20 +2203,22 @@ invalidate_nodes (badfdp, npp)
        if (np->left != NULL)
         invalidate_nodes (badfdp, &np->left);
        if (np->fdp == badfdp)
-       np-> valid = FALSE;
+       np->valid = FALSE;
        if (np->right != NULL)
         invalidate_nodes (badfdp, &np->right);
      }
    else
      {
-      node **next = &np->left;
+      assert (np->fdp != NULL);
        if (np->fdp == badfdp)
         {
-         *npp = *next;         /* detach the sublist from the list */
+         *npp = np->left;      /* detach the sublist from the list */
           np->left = NULL;      /* isolate it */
           free_tree (np);       /* free it */
+         invalidate_nodes (badfdp, npp);
         }
-      invalidate_nodes (badfdp, next);
+      else
+       invalidate_nodes (badfdp, &np->left);
      }
  }
  
@@ -2075,15 +2250,16 @@ total_size_of_entries (np)
    register int total = 0;
  
    for (; np != NULL; np = np->right)
-    {
-      total += strlen (np->pat) + 1;           /* pat\177 */
-      if (np->name != NULL)
-       total += strlen (np->name) + 1;         /* name\001 */
-      total += number_len ((long) np->lno) + 1;        /* lno, */
-      if (np->cno != invalidcharno)            /* cno */
-       total += number_len (np->cno);
-      total += 1;                              /* newline */
-    }
+    if (np->valid)
+      {
+       total += strlen (np->regex) + 1;                /* pat\177 */
+       if (np->name != NULL)
+         total += strlen (np->name) + 1;               /* name\001 */
+       total += number_len ((long) np->lno) + 1;       /* lno, */
+       if (np->cno != invalidcharno)                   /* cno */
+         total += number_len (np->cno);
+       total += 1;                                     /* newline */
+      }
  
    return total;
  }
@@ -2113,8 +2289,9 @@ put_entries (np)
               fdp = np->fdp;
               fprintf (tagf, "\f\n%s,%d\n",
                        fdp->taggedfname, total_size_of_entries (np));
+             fdp->written = TRUE;
             }
-         fputs (np->pat, tagf);
+         fputs (np->regex, tagf);
           fputc ('\177', tagf);
           if (np->name != NULL)
             {
@@ -2139,7 +2316,7 @@ put_entries (np)
                          np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
               else
                 fprintf (stdout, "%-16s %3d %-16s %s\n",
-                        np->name, np->lno, np->fdp->taggedfname, np->pat);
+                        np->name, np->lno, np->fdp->taggedfname, np->regex);
             }
           else
             {
@@ -2150,7 +2327,7 @@ put_entries (np)
                   putc (searchar, tagf);
                   putc ('^', tagf);
  
-                 for (sp = np->pat; *sp; sp++)
+                 for (sp = np->regex; *sp; sp++)
                     {
                       if (*sp == '\\' || *sp == searchar)
                         putc ('\\', tagf);
@@ -2534,15 +2711,22 @@ static enum
   */
  static struct tok
  {
-  bool valid;
-  bool named;
-  int offset;
-  int length;
-  int lineno;
-  long linepos;
-  char *line;
+  char *line;                  /* string containing the token */
+  int offset;                  /* where the token starts in LINE */
+  int length;                  /* token length */
+  /*
+    The previous members can be used to pass strings around for generic
+    purposes.  The following ones specifically refer to creating tags.  In this
+    case the token contained here is the pattern that will be used to create a
+    tag.
+  */
+  bool valid;                  /* do not create a tag; the token should be
+                                  invalidated whenever a state machine is
+                                  reset prematurely */
+  bool named;                  /* create a named tag */
+  int lineno;                  /* source line number of tag */
+  long linepos;                        /* source char number of tag */
  } token;                       /* latest token read */
-static linebuffer token_name;  /* its name */
  
  /*
   * Variables and functions for dealing with nested structures.
@@ -2560,7 +2744,7 @@ static struct {
  } cstack;                      /* stack for nested declaration tags */
  /* Current struct nesting depth (namespace, class, struct, union, enum). */
  #define nestlev                (cstack.nl)
-/* After struct keyword or in struct body, not inside an nested function. */
+/* After struct keyword or in struct body, not inside a nested function. */
  #define instruct       (structdef == snone && nestlev > 0                      \
                          && cblev == cstack.cblev[nestlev-1] + 1)
  
@@ -2778,8 +2962,8 @@ consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
        return FALSE;
      case st_C_template:
      case st_C_class:
-      if (cblev == 0
-         && (*c_extp & C_AUTO) /* automatic detection of C++ language */
+      if ((*c_extp & C_AUTO)   /* automatic detection of C++ language */
+         && cblev == 0
           && definedef == dnone && structdef == snone
           && typdef == tnone && fvdef == fvnone)
         *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
@@ -2884,8 +3068,16 @@ consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
        fvextern = TRUE;
        /* FALLTHRU */
      case st_C_typespec:
-      if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
-       fvdef = fvnone;         /* should be useless */
+      switch  (fvdef)
+       {
+       case finlist:
+       case flistseen:
+       case fignore:
+       case vignore:
+         break;
+       default:
+         fvdef = fvnone;
+       }
        return FALSE;
      case st_C_ignore:
        fvextern = FALSE;
@@ -2915,8 +3107,10 @@ consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
               fvdef = vignore;
               return FALSE;
             }
-         if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
+         if (len >= 10 && strneq (str+len-10, "::operator", 10))
             {
+             if (*c_extp & C_AUTO) /* automatic detection of C++ */
+               *c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
               fvdef = foperator;
               *is_func_or_var = TRUE;
               return TRUE;
@@ -2953,6 +3147,10 @@ static struct
  #define curlinepos (lbs[curndx].linepos)
  #define newlinepos (lbs[newndx].linepos)
  
+#define plainc ((c_ext & C_EXT) == C_PLAIN)
+#define cplpl (c_ext & C_PLPL)
+#define cjava ((c_ext & C_JAVA) == C_JAVA)
+
  #define CNL_SAVE_DEFINEDEF()                                           \
  do {                                                                   \
    curlinepos = charno;                                                 \
@@ -2980,31 +3178,18 @@ make_C_tag (isfun)
  {
    /* This function should never be called when token.valid is FALSE, but
       we must protect against invalid input or internal errors. */
-  if (DEBUG || token.valid)
-    {
-      if (traditional_tag_style)
-       {
-         /* This was the original code.  Now we call new_pfnote instead,
-            which uses the new method for naming tags (see new_pfnote). */
-         char *name = NULL;
+  if (!DEBUG && !token.valid)
+    return;
  
-         if (CTAGS || token.named)
-           name = savestr (token_name.buffer);
-         if (DEBUG && !token.valid)
-           {
-             if (token.named)
-               name = concat (name, "##invalid##", "");
-             else
-               name = savestr ("##invalid##");
-           }
-         pfnote (name, isfun, token.line,
-                 token.offset+token.length+1, token.lineno, token.linepos);
-       }
-      else
-       new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
-                   token.offset+token.length+1, token.lineno, token.linepos);
-      token.valid = FALSE;
-    }
+  if (token.valid)
+    make_tag (token_name.buffer, token_name.len, isfun, token.line,
+             token.offset+token.length+1, token.lineno, token.linepos);
+  else                         /* this case is optimised away if !DEBUG */
+    make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
+             token_name.len + 17, isfun, token.line,
+             token.offset+token.length+1, token.lineno, token.linepos);
+
+  token.valid = FALSE;
  }
  
  
@@ -3030,14 +3215,12 @@ C_entries (c_ext, inf)
    int parlev;                  /* current parenthesis level */
    int typdefcblev;             /* cblev where a typedef struct body begun */
    bool incomm, inquote, inchar, quotednl, midtoken;
-  bool cplpl, cjava;
    bool yacc_rules;             /* in the rules part of a yacc file */
    struct tok savetoken;                /* token saved during preprocessor handling */
  
  
-  initbuffer (&token_name);
-  initbuffer (&lbs[0].lb);
-  initbuffer (&lbs[1].lb);
+  linebuffer_init (&lbs[0].lb);
+  linebuffer_init (&lbs[1].lb);
    if (cstack.size == 0)
      {
        cstack.size = (DEBUG) ? 1 : 4;
@@ -3058,8 +3241,6 @@ C_entries (c_ext, inf)
    token.valid = savetoken.valid = FALSE;
    cblev = 0;
    parlev = 0;
-  cplpl = (c_ext & C_PLPL) == C_PLPL;
-  cjava = (c_ext & C_JAVA) == C_JAVA;
    if (cjava)
      { qualifier = "."; qlen = 1; }
    else
@@ -3225,13 +3406,13 @@ C_entries (c_ext, inf)
             {
               if (endtoken (c))
                 {
-                 if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
+                 if (c == ':' && *lp == ':' && begtoken (lp[1]))
+                   /* This handles :: in the middle,
+                      but not at the beginning of an identifier.
+                      Also, space-separated :: is not recognised. */
                     {
-                     /*
-                      * This handles :: in the middle, but not at the
-                      * beginning of an identifier.  Also, space-separated
-                      * :: is not recognised.
-                      */
+                     if (c_ext & C_AUTO) /* automatic detection of C++ */
+                       c_ext = (c_ext | C_PLPL) & ~C_AUTO;
                       lp += 2;
                       toklen += 2;
                       c = lp[-1];
@@ -3258,7 +3439,7 @@ C_entries (c_ext, inf)
                               toklen += lp - oldlp;
                             }
                           token.named = FALSE;
-                         if ((c_ext & C_EXT)   /* not pure C */
+                         if (!plainc
                               && nestlev > 0 && definedef == dnone)
                             /* in struct body */
                             {
@@ -3374,8 +3555,11 @@ C_entries (c_ext, inf)
                       fvdef = finlist;
                       continue;
                     case flistseen:
-                     make_C_tag (TRUE); /* a function */
-                     fvdef = fignore;
+                     if (plainc || declarations)
+                       {
+                         make_C_tag (TRUE); /* a function */
+                         fvdef = fignore;
+                       }
                       break;
                     case fvnameseen:
                       fvdef = fvnone;
@@ -3428,7 +3612,17 @@ C_entries (c_ext, inf)
               break;
             }
           if (structdef == stagseen)
-           structdef = scolonseen;
+           {
+             structdef = scolonseen;
+             break;
+           }
+         /* Should be useless, but may be work as a safety net. */
+         if (cplpl && fvdef == flistseen)
+           {
+             make_C_tag (TRUE); /* a function */
+             fvdef = fignore;
+             break;
+           }
           break;
         case ';':
           if (definedef != dnone)
@@ -3447,7 +3641,7 @@ C_entries (c_ext, inf)
               switch (fvdef)
                 {
                 case fignore:
-                 if (typdef == tignore)
+                 if (typdef == tignore || cplpl)
                     fvdef = fvnone;
                   break;
                 case fvnameseen:
@@ -3459,20 +3653,17 @@ C_entries (c_ext, inf)
                   token.valid = FALSE;
                   break;
                 case flistseen:
-                 if ((declarations && typdef == tnone && !instruct)
-                     || (members && typdef != tignore && instruct))
+                 if (declarations
+                     && (typdef == tnone || (typdef != tignore && instruct)))
                     make_C_tag (TRUE);  /* a function declaration */
                   /* FALLTHRU */
                 default:
                   fvextern = FALSE;
                   fvdef = fvnone;
                   if (declarations
-                     && structdef == stagseen && (c_ext & C_PLPL))
+                      && cplpl && structdef == stagseen)
                     make_C_tag (FALSE); /* forward declaration */
                   else
-                   /* The following instruction invalidates the token.
-                      Probably the token should be invalidated in all other
-                      cases where some state machine is reset prematurely. */
                     token.valid = FALSE;
                 } /* switch (fvdef) */
               /* FALLTHRU */
@@ -3675,18 +3866,25 @@ C_entries (c_ext, inf)
           if (definedef != dnone)
             break;
           if (fvdef == fstartlist)
-           fvdef = fvnone;     /* avoid tagging `foo' in `foo (*bar()) ()' */
+           {
+             fvdef = fvnone;   /* avoid tagging `foo' in `foo (*bar()) ()' */
+             token.valid = FALSE;
+           }
           break;
         case '}':
           if (definedef != dnone)
             break;
-         if (!noindentypedefs && lp == newlb.buffer + 1)
+         if (!ignoreindent && lp == newlb.buffer + 1)
             {
+             if (cblev != 0)
+               token.valid = FALSE;
               cblev = 0;        /* reset curly brace level if first column */
               parlev = 0;       /* also reset paren level, just in case... */
             }
           else if (cblev > 0)
             cblev--;
+         else
+           token.valid = FALSE; /* something gone amiss, token unreliable */
           popclass_above (cblev);
           structdef = snone;
           /* Only if typdef == tinbody is typdefcblev significant. */
@@ -3770,7 +3968,6 @@ C_entries (c_ext, inf)
  
      } /* while not eof */
  
-  free (token_name.buffer);
    free (lbs[0].lb.buffer);
    free (lbs[1].lb.buffer);
  }
@@ -3831,8 +4028,9 @@ Yacc_entries (inf)
  #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)   \
    for (;                       /* loop initialization */               \
         !feof (file_pointer)    /* loop test */                         \
-       && (char_pointer = lb.buffer, /* instructions at start of loop */ \
-          readline (&line_buffer, file_pointer),                       \
+       &&                      /* instructions at start of loop */     \
+         (readline (&line_buffer, file_pointer),                       \
+           char_pointer = line_buffer.buffer,                          \
            TRUE);                                                       \
        )
  #define LOOKING_AT(cp, keyword)        /* keyword is a constant string */      \
@@ -3903,8 +4101,8 @@ F_getit (inf)
      return;
    for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
      continue;
-  pfnote (savenstr (dbp, cp-dbp), TRUE,
-         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+  make_tag (dbp, cp-dbp, TRUE,
+           lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  }
  
  
@@ -3971,8 +4169,8 @@ Fortran_functions (inf)
             {
               dbp = skip_spaces (dbp);
               if (*dbp == '\0') /* assume un-named */
-               pfnote (savestr ("blockdata"), TRUE,
-                       lb.buffer, dbp - lb.buffer, lineno, linecharno);
+               make_tag ("blockdata", 9, TRUE,
+                         lb.buffer, dbp - lb.buffer, lineno, linecharno);
               else
                 F_getit (inf);  /* look for name */
             }
@@ -3985,7 +4183,7 @@ Fortran_functions (inf)
  /*
   * Ada parsing
   * Original code by
- * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
+ * Philippe Waroquiers (1998)
   */
  
  static void Ada_getit __P((FILE *, char *));
@@ -4048,7 +4246,9 @@ Ada_getit (inf, name_qualifier)
        *cp = '\0';
        name = concat (dbp, name_qualifier, "");
        *cp = c;
-      pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+      make_tag (name, strlen (name), TRUE,
+               lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+      free (name);
        if (c == '"')
         dbp = cp + 1;
        return;
@@ -4060,6 +4260,7 @@ Ada_funcs (inf)
       FILE *inf;
  {
    bool inquote = FALSE;
+  bool skip_till_semicolumn = FALSE;
  
    LOOP_ON_INPUT_LINES (inf, lb, dbp)
      {
@@ -4096,6 +4297,14 @@ Ada_funcs (inf)
               continue;
             }
  
+         if (skip_till_semicolumn)
+           {
+             if (*dbp == ';')
+               skip_till_semicolumn = FALSE;
+             dbp++;
+             continue;         /* advance char */
+           }
+
           /* Search for beginning of a token.  */
           if (!begtoken (*dbp))
             {
@@ -4122,6 +4331,16 @@ Ada_funcs (inf)
               else
                 break;          /* from switch */
               continue;         /* advance char */
+
+           case 'u':
+             if (typedefs && !packages_only && nocase_tail ("use"))
+               {
+                 /* when tagging types, avoid tagging  use type Pack.Typename;
+                    for this, we will skip everything till a ; */
+                 skip_till_semicolumn = TRUE;
+                 continue;     /* advance char */
+               }
+
             case 't':
               if (!packages_only && nocase_tail ("task"))
                 Ada_getit (inf, "/k");
@@ -4167,11 +4386,9 @@ Asm_labels (inf)
           while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
             cp++;
           if (*cp == ':' || iswhite (*cp))
-           {
-             /* Found end of label, so copy it and add it to the table. */
-             pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
+           /* Found end of label, so copy it and add it to the table. */
+           make_tag (lb.buffer, cp - lb.buffer, TRUE,
                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
-           }
         }
      }
  }
@@ -4199,56 +4416,63 @@ Perl_functions (inf)
        if (LOOKING_AT (cp, "package"))
         {
           free (package);
-         package = get_tag (cp);
-         if (package == NULL)  /* can't parse package name */
-           package = savestr ("");
-         else
-           package = savestr(package); /* make a copy */
+         get_tag (cp, &package);
         }
        else if (LOOKING_AT (cp, "sub"))
         {
-         char *name, *fullname, *pos;
+         char *pos;
           char *sp = cp;
  
           while (!notinname (*cp))
             cp++;
           if (cp == sp)
-           continue;
-         name = savenstr (sp, cp-sp);
-         if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
-           fullname = name;
+           continue;           /* nothing found */
+         if ((pos = etags_strchr (sp, ':')) != NULL
+             && pos < cp && pos[1] == ':')
+           /* The name is already qualified. */
+           make_tag (sp, cp - sp, TRUE,
+                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
           else
-           fullname = concat (package, "::", name);
-         pfnote (fullname, TRUE,
-                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
-         if (name != fullname)
-           free (name);
+           /* Qualify it. */
+           {
+             char savechar, *name;
+
+             savechar = *cp;
+             *cp = '\0';
+             name = concat (package, "::", sp);
+             *cp = savechar;
+             make_tag (name, strlen(name), TRUE,
+                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+             free (name);
+           }
         }
-       else if (globals                /* only if tagging global vars is enabled */
-               && (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
+       else if (globals)       /* only if we are tagging global vars */
         {
+         /* Skip a qualifier, if any. */
+         bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
           /* After "my" or "local", but before any following paren or space. */
-         char *varname = NULL;
+         char *varstart = cp;
  
-         if (*cp == '$' || *cp == '@' || *cp == '%')
+         if (qual              /* should this be removed?  If yes, how? */
+             && (*cp == '$' || *cp == '@' || *cp == '%'))
             {
-             char* varstart = ++cp;
-             while (ISALNUM (*cp) || *cp == '_')
+             varstart += 1;
+             do
                 cp++;
-             varname = savenstr (varstart, cp-varstart);
+             while (ISALNUM (*cp) || *cp == '_');
             }
-         else
+         else if (qual)
             {
               /* Should be examining a variable list at this point;
                  could insist on seeing an open parenthesis. */
               while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
                 cp++;
             }
+         else
+           continue;
  
-         /* Perhaps I should back cp up one character, so the TAGS table
-            doesn't mention (and so depend upon) the following char. */
-         pfnote (varname, FALSE,
-                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+         make_tag (varstart, cp - varstart, FALSE,
+                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
         }
      }
  }
@@ -4274,8 +4498,8 @@ Python_functions (inf)
           char *name = cp;
           while (!notinname (*cp) && *cp != ':')
             cp++;
-         pfnote (savenstr (name, cp-name), TRUE,
-                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+         make_tag (name, cp - name, TRUE,
+                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
         }
      }
  }
@@ -4307,8 +4531,8 @@ PHP_functions (inf)
         {
           while (!notinname (*cp))
             cp++;
-         pfnote (savenstr (name, cp-name), TRUE,
-                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+         make_tag (name, cp - name, TRUE,
+                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
           search_identifier = FALSE;
         }
        else if (LOOKING_AT (cp, "function"))
@@ -4320,8 +4544,8 @@ PHP_functions (inf)
               name = cp;
               while (!notinname (*cp))
                 cp++;
-             pfnote (savenstr (name, cp-name), TRUE,
-                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+             make_tag (name, cp - name, TRUE,
+                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
             }
           else
             search_identifier = TRUE;
@@ -4333,8 +4557,8 @@ PHP_functions (inf)
               name = cp;
               while (*cp != '\0' && !iswhite (*cp))
                 cp++;
-             pfnote (savenstr (name, cp-name), FALSE,
-                     lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+             make_tag (name, cp - name, FALSE,
+                       lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
             }
           else
             search_identifier = TRUE;
@@ -4348,8 +4572,8 @@ PHP_functions (inf)
           name = cp;
           while (*cp != quote && *cp != '\0')
             cp++;
-         pfnote (savenstr (name, cp-name), FALSE,
-                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+         make_tag (name, cp - name, FALSE,
+                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
         }
        else if (members
                && LOOKING_AT (cp, "var")
@@ -4358,8 +4582,8 @@ PHP_functions (inf)
           name = cp;
           while (!notinname(*cp))
             cp++;
-         pfnote (savenstr (name, cp-name), FALSE,
-                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+         make_tag (name, cp - name, FALSE,
+                   lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
         }
      }
  }
@@ -4390,15 +4614,15 @@ Cobol_paragraphs (inf)
        for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
         continue;
        if (*ep++ == '.')
-       pfnote (savenstr (bp, ep-bp), TRUE,
-               lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
+       make_tag (bp, ep - bp, TRUE,
+                 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
      }
  }
  
  \f
  /*
   * Makefile support
- * Idea by Assar Westerlund <assar@sics.se> (2001)
+ * Ideas by Assar Westerlund <assar@sics.se> (2001)
   */
  static void
  Makefile_targets (inf)
@@ -4412,9 +4636,9 @@ Makefile_targets (inf)
         continue;
        while (*bp != '\0' && *bp != '=' && *bp != ':')
         bp++;
-      if (*bp == ':')
-       pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
-               lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
+      if (*bp == ':' || (globals && *bp == '='))
+       make_tag (lb.buffer, bp - lb.buffer, TRUE,
+                 lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
      }
  }
  
@@ -4434,8 +4658,8 @@ Pascal_functions (inf)
  {
    linebuffer tline;            /* mostly copied from C_entries */
    long save_lcno;
-  int save_lineno, save_len;
-  char c, *cp, *namebuf;
+  int save_lineno, namelen, taglen;
+  char c, *name;
  
    bool                         /* each of these flags is TRUE iff: */
      incomment,                 /* point is inside a comment */
@@ -4449,15 +4673,15 @@ Pascal_functions (inf)
                                    is a FORWARD/EXTERN to be ignored, or
                                    whether it is a real tag */
  
-  save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
-  namebuf = NULL;              /* keep compiler quiet */
+  save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
+  name = NULL;                 /* keep compiler quiet */
    dbp = lb.buffer;
    *dbp = '\0';
-  initbuffer (&tline);
+  linebuffer_init (&tline);
  
    incomment = inquote = FALSE;
    found_tag = FALSE;           /* have a proc name; check if extern */
-  get_tagname = FALSE;         /* have found "procedure" keyword    */
+  get_tagname = FALSE;         /* found "procedure" keyword         */
    inparms = FALSE;             /* found '(' after "proc"            */
    verify_tag = FALSE;          /* check if "extern" is ahead        */
  
@@ -4526,7 +4750,7 @@ Pascal_functions (inf)
           }
        if (found_tag && verify_tag && (*dbp != ' '))
         {
-         /* check if this is an "extern" declaration */
+         /* Check if this is an "extern" declaration. */
           if (*dbp == '\0')
             continue;
           if (lowcase (*dbp == 'e'))
@@ -4539,7 +4763,7 @@ Pascal_functions (inf)
             }
           else if (lowcase (*dbp) == 'f')
             {
-             if (nocase_tail ("forward")) /*  check for forward reference */
+             if (nocase_tail ("forward")) /* check for forward reference */
                 {
                   found_tag = FALSE;
                   verify_tag = FALSE;
@@ -4549,37 +4773,41 @@ Pascal_functions (inf)
             {
               found_tag = FALSE;
               verify_tag = FALSE;
-             pfnote (namebuf, TRUE,
-                     tline.buffer, save_len, save_lineno, save_lcno);
+             make_tag (name, namelen, TRUE,
+                       tline.buffer, taglen, save_lineno, save_lcno);
               continue;
             }
         }
        if (get_tagname)         /* grab name of proc or fn */
         {
+         char *cp;
+
           if (*dbp == '\0')
             continue;
  
-         /* save all values for later tagging */
+         /* Find block name. */
+         for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
+           continue;
+
+         /* Save all values for later tagging. */
           linebuffer_setlen (&tline, lb.len);
           strcpy (tline.buffer, lb.buffer);
           save_lineno = lineno;
           save_lcno = linecharno;
+         name = tline.buffer + (dbp - lb.buffer);
+         namelen = cp - dbp;
+         taglen = cp - lb.buffer + 1;
  
-         /* grab block name */
-         for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
-           continue;
-         namebuf = savenstr (dbp, cp-dbp);
           dbp = cp;             /* set dbp to e-o-token */
-         save_len = dbp - lb.buffer + 1;
           get_tagname = FALSE;
           found_tag = TRUE;
           continue;
  
-         /* and proceed to check for "extern" */
+         /* And proceed to check for "extern". */
         }
        else if (!incomment && !inquote && !found_tag)
         {
-         /* check for proc/fn keywords */
+         /* Check for proc/fn keywords. */
           switch (lowcase (c))
             {
             case 'p':
@@ -4592,7 +4820,7 @@ Pascal_functions (inf)
               continue;
             }
         }
-    }                          /* while not eof */
+    } /* while not eof */
  
    free (tline.buffer);
  }
@@ -4618,7 +4846,7 @@ L_getit ()
        /* Ok, then skip "(" before name in (defstruct (foo)) */
        dbp = skip_spaces (dbp);
    }
-  get_tag (dbp);
+  get_tag (dbp, NULL);
  }
  
  static void
@@ -4669,7 +4897,7 @@ Lisp_functions (inf)
   *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
   */
  static void
-Postscript_functions (inf)
+PS_functions (inf)
       FILE *inf;
  {
    register char *bp, *ep;
@@ -4682,11 +4910,11 @@ Postscript_functions (inf)
                *ep != '\0' && *ep != ' ' && *ep != '{';
                ep++)
             continue;
-         pfnote (savenstr (bp, ep-bp), TRUE,
-                 lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
+         make_tag (bp, ep - bp, TRUE,
+                   lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
         }
        else if (LOOKING_AT (bp, "defineps"))
-       get_tag (bp);
+       get_tag (bp, NULL);
      }
  }
  
@@ -4714,10 +4942,10 @@ Scheme_functions (inf)
           /* Skip over open parens and white space */
           while (notinname (*bp))
             bp++;
-         get_tag (bp);
+         get_tag (bp, NULL);
         }
        if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
-       get_tag (bp);
+       get_tag (bp, NULL);
      }
  }
  
@@ -4779,8 +5007,7 @@ TeX_commands (inf)
             if (strneq (cp, key->buffer, key->len))
               {
                 register char *p;
-               char *name;
-               int linelen;
+               int namelen, linelen;
                 bool opgrp = FALSE;
  
                 cp = skip_spaces (cp + key->len);
@@ -4794,7 +5021,7 @@ TeX_commands (inf)
                       *p != TEX_opgrp && *p != TEX_clgrp);
                      p++)
                   continue;
-               name = savenstr (cp, p-cp);
+               namelen = p - cp;
                 linelen = lb.len;
                 if (!opgrp || *p == TEX_clgrp)
                   {
@@ -4802,7 +5029,8 @@ TeX_commands (inf)
                       *p++;
                     linelen = p - lb.buffer + 1;
                   }
-               pfnote (name, TRUE, lb.buffer, linelen, lineno, linecharno);
+               make_tag (cp, namelen, TRUE,
+                         lb.buffer, linelen, lineno, linecharno);
                 goto tex_next_line; /* We only tag a line once */
               }
         }
@@ -4912,8 +5140,134 @@ Texinfo_nodes (inf)
         start = cp;
         while (*cp != '\0' && *cp != ',')
           cp++;
-       pfnote (savenstr (start, cp - start), TRUE,
-               lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+       make_tag (start, cp - start, TRUE,
+                 lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+      }
+}
+
+\f
+/* Similar to LOOKING_AT but does not use notinname, does not skip */
+#define LOOKING_AT_NOCASE(cp, kw)      /* kw is a constant string */   \
+  (strncaseeq ((cp), kw, sizeof(kw)-1) /* cp points at kw */           \
+   && ((cp) += sizeof(kw)-1))          /* skip spaces */
+
+/*
+ * HTML support.
+ * Contents of <title>, <h1>, <h2>, <h3> are tags.
+ * Contents of <a name=xxx> are tags with name xxx.
+ *
+ * Francesco Potortì, 2002.
+ */
+static void
+HTML_labels (inf)
+     FILE * inf;
+{
+  bool getnext = FALSE;                /* next text outside of HTML tags is a tag */
+  bool skiptag = FALSE;                /* skip to the end of the current HTML tag */
+  bool intag = FALSE;          /* inside an html tag, looking for ID= */
+  bool inanchor = FALSE;       /* when INTAG, is an anchor, look for NAME= */
+  char *end;
+
+
+  linebuffer_setlen (&token_name, 0); /* no name in buffer */
+
+  LOOP_ON_INPUT_LINES (inf, lb, dbp)
+    for (;;)                   /* loop on the same line */
+      {
+       if (skiptag)            /* skip HTML tag */
+         {
+           while (*dbp != '\0' && *dbp != '>')
+             dbp++;
+           if (*dbp == '>')
+             {
+               dbp += 1;
+               skiptag = FALSE;
+               continue;       /* look on the same line */
+             }
+           break;              /* go to next line */
+         }
+
+       else if (intag) /* look for "name=" or "id=" */
+         {
+           while (*dbp != '\0' && *dbp != '>'
+                  && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
+             dbp++;
+           if (*dbp == '\0')
+             break;            /* go to next line */
+           if (*dbp == '>')
+             {
+               dbp += 1;
+               intag = FALSE;
+               continue;       /* look on the same line */
+             }
+           if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
+               || LOOKING_AT_NOCASE (dbp, "id="))
+             {
+               bool quoted = (dbp[0] == '"');
+
+               if (quoted)
+                 for (end = ++dbp; *end != '\0' && *end != '"'; end++)
+                   continue;
+               else
+                 for (end = dbp; *end != '\0' && intoken (*end); end++)
+                   continue;
+               linebuffer_setlen (&token_name, end - dbp);
+               strncpy (token_name.buffer, dbp, end - dbp);
+               token_name.buffer[end - dbp] = '\0';
+
+               dbp = end;
+               intag = FALSE;  /* we found what we looked for */
+               skiptag = TRUE; /* skip to the end of the tag */
+               getnext = TRUE; /* then grab the text */
+               continue;       /* look on the same line */
+             }
+           dbp += 1;
+         }
+
+       else if (getnext)       /* grab next tokens and tag them */
+         {
+           dbp = skip_spaces (dbp);
+           if (*dbp == '\0')
+             break;            /* go to next line */
+           if (*dbp == '<')
+             {
+               intag = TRUE;
+               inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
+               continue;       /* look on the same line */
+             }
+
+           for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
+             continue;
+           make_tag (token_name.buffer, token_name.len, TRUE,
+                     dbp, end - dbp, lineno, linecharno);
+           linebuffer_setlen (&token_name, 0); /* no name in buffer */
+           getnext = FALSE;
+           break;              /* go to next line */
+         }
+
+       else                    /* look for an interesting HTML tag */
+         {
+           while (*dbp != '\0' && *dbp != '<')
+             dbp++;
+           if (*dbp == '\0')
+             break;            /* go to next line */
+           intag = TRUE;
+           if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
+             {
+               inanchor = TRUE;
+               continue;       /* look on the same line */
+             }
+           else if (LOOKING_AT_NOCASE (dbp, "<title>")
+                    || LOOKING_AT_NOCASE (dbp, "<h1>")
+                    || LOOKING_AT_NOCASE (dbp, "<h2>")
+                    || LOOKING_AT_NOCASE (dbp, "<h3>"))
+             {
+               intag = FALSE;
+               getnext = TRUE;
+               continue;       /* look on the same line */
+             }
+           dbp += 1;
+         }
        }
  }
  
@@ -5016,7 +5370,7 @@ prolog_pr (s, last)
           || len != strlen (last)
           || !strneq (s, last, len)))
         {
-         pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
+         make_tag (s, len, TRUE, s, pos, lineno, linecharno);
           return len;
         }
    else
@@ -5092,7 +5446,7 @@ prolog_atom (s, pos)
   */
  static int erlang_func __P((char *, char *));
  static void erlang_attribute __P((char *));
-static int erlang_atom __P((char *, int));
+static int erlang_atom __P((char *));
  
  static void
  Erlang_functions (inf)
@@ -5157,7 +5511,7 @@ erlang_func (s, last)
    int pos;
    int len;
  
-  pos = erlang_atom (s, 0);
+  pos = erlang_atom (s);
    if (pos < 1)
      return 0;
  
@@ -5170,7 +5524,7 @@ erlang_func (s, last)
           || len != (int)strlen (last)
           || !strneq (s, last, len)))
         {
-         pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
+         make_tag (s, len, TRUE, s, pos, lineno, linecharno);
           return len;
         }
  
@@ -5191,19 +5545,14 @@ static void
  erlang_attribute (s)
       char *s;
  {
-  int pos;
-  int len;
+  char *cp = s;
  
-  if (LOOKING_AT (s, "-define") || LOOKING_AT (s, "-record"))
+  if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
+      && *cp++ == '(')
      {
-      if (s[pos++] == '(')
-       {
-         pos = skip_spaces (s + pos) - s;
-         len = erlang_atom (s, pos);
-         if (len != 0)
-           pfnote (savenstr (& s[pos], len), TRUE,
-                   s, pos + len, lineno, linecharno);
-       }
+      int len = erlang_atom (skip_spaces (cp));
+      if (len > 0)
+       make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
      }
    return;
  }
@@ -5214,66 +5563,46 @@ erlang_attribute (s)
   * Return the number of bytes consumed, or -1 if there was an error.
   */
  static int
-erlang_atom (s, pos)
+erlang_atom (s)
       char *s;
-     int pos;
  {
-  int origpos;
-
-  origpos = pos;
+  int pos = 0;
  
    if (ISALPHA (s[pos]) || s[pos] == '_')
      {
        /* The atom is unquoted. */
-      pos++;
-      while (ISALNUM (s[pos]) || s[pos] == '_')
+      do
         pos++;
-      return pos - origpos;
+      while (ISALNUM (s[pos]) || s[pos] == '_');
      }
    else if (s[pos] == '\'')
      {
+      for (pos++; s[pos] != '\''; pos++)
+       if (s[pos] == '\0'      /* multiline quoted atoms are ignored */
+           || (s[pos] == '\\' && s[++pos] == '\0'))
+         return 0;
        pos++;
-
-      for (;;)
-       {
-         if (s[pos] == '\'')
-           {
-             pos++;
-             break;
-           }
-         else if (s[pos] == '\0')
-           /* Multiline quoted atoms are ignored. */
-           return -1;
-         else if (s[pos] == '\\')
-           {
-             if (s[pos+1] == '\0')
-               return -1;
-             pos += 2;
-           }
-         else
-           pos++;
-       }
-      return pos - origpos;
      }
-  else
-    return -1;
+
+  return pos;
  }
  
  \f
  #ifdef ETAGS_REGEXPS
  
  static char *scan_separators __P((char *));
-static void analyse_regex __P((char *, bool));
-static void add_regex __P((char *, bool, language *));
+static void add_regex __P((char *, language *));
  static char *substitute __P((char *, char *, struct re_registers *));
  
-/* Take a string like "/blah/" and turn it into "blah", making sure
-   that the first and last characters are the same, and handling
-   quoted separator characters.  Actually, stops on the occurrence of
-   an unquoted separator.  Also turns "\t" into a Tab character, and
-   similarly for all character escape sequences supported by Gcc.
-   Returns pointer to terminating separator.  Works in place.  Null
-   terminates name string. */
+/*
+ * Take a string like "/blah/" and turn it into "blah", verifying
+ * that the first and last characters are the same, and handling
+ * quoted separator characters.  Actually, stops on the occurrence of
+ * an unquoted separator.  Also process \t, \n, etc. and turn into
+ * appropriate characters. Works in place.  Null terminates name string.
+ * Returns pointer to terminating separator, or NULL for
+ * unterminated regexps.
+ */
  static char *
  scan_separators (name)
       char *name;
@@ -5288,15 +5617,15 @@ scan_separators (name)
         {
           switch (*name)
             {
-           case 'a': *copyto++ = '\007'; break;
-           case 'b': *copyto++ = '\b'; break;
-           case 'd': *copyto++ = 0177; break;
-           case 'e': *copyto++ = 033; break;
-           case 'f': *copyto++ = '\f'; break;
-           case 'n': *copyto++ = '\n'; break;
-           case 'r': *copyto++ = '\r'; break;
-           case 't': *copyto++ = '\t'; break;
-           case 'v': *copyto++ = '\v'; break;
+           case 'a': *copyto++ = '\007'; break; /* BEL (bell)           */
+           case 'b': *copyto++ = '\b'; break;   /* BS (back space)      */
+           case 'd': *copyto++ = 0177; break;   /* DEL (delete)         */
+           case 'e': *copyto++ = 033; break;    /* ESC (delete)         */
+           case 'f': *copyto++ = '\f'; break;   /* FF (form feed)       */
+           case 'n': *copyto++ = '\n'; break;   /* NL (new line)        */
+           case 'r': *copyto++ = '\r'; break;   /* CR (carriage return) */
+           case 't': *copyto++ = '\t'; break;   /* TAB (horizontal tab) */
+           case 'v': *copyto++ = '\v'; break;   /* VT (vertical tab)    */
             default:
               if (*name == sep)
                 *copyto++ = sep;
@@ -5317,6 +5646,8 @@ scan_separators (name)
        else
         *copyto++ = *name;
      }
+  if (*name != sep)
+    name = NULL;               /* signal unterminated regexp */
  
    /* Terminate copied string. */
    *copyto = '\0';
@@ -5326,13 +5657,12 @@ scan_separators (name)
  /* Look at the argument of --regex or --no-regex and do the right
     thing.  Same for each line of a regexp file. */
  static void
-analyse_regex (regex_arg, ignore_case)
+analyse_regex (regex_arg)
       char *regex_arg;
-     bool ignore_case;
  {
    if (regex_arg == NULL)
      {
-      free_patterns ();                /* --no-regex: remove existing regexps */
+      free_regexps ();         /* --no-regex: remove existing regexps */
        return;
      }
  
@@ -5360,9 +5690,9 @@ analyse_regex (regex_arg, ignore_case)
             pfatal (regexfile);
             return;
           }
-       initbuffer (&regexbuf);
+       linebuffer_init (&regexbuf);
         while (readline_internal (&regexbuf, regexfp) > 0)
-         analyse_regex (regexbuf.buffer, ignore_case);
+         analyse_regex (regexbuf.buffer);
         free (regexbuf.buffer);
         fclose (regexfp);
        }
@@ -5381,70 +5711,136 @@ analyse_regex (regex_arg, ignore_case)
               error ("unterminated language name in regex: %s", regex_arg);
               return;
             }
-       *cp = '\0';
+       *cp++ = '\0';
         lang = get_language_from_langname (lang_name);
         if (lang == NULL)
           return;
-       add_regex (cp + 1, ignore_case, lang);
+       add_regex (cp, lang);
        }
        break;
  
        /* Regexp to be used for any language. */
      default:
-      add_regex (regex_arg, ignore_case, NULL);
+      add_regex (regex_arg, NULL);
        break;
      }
  }
  
-/* Turn a name, which is an ed-style (but Emacs syntax) regular
-   expression, into a real regular expression by compiling it. */
+/* Separate the regexp pattern, compile it,
+   and care for optional name and modifiers. */
  static void
-add_regex (regexp_pattern, ignore_case, lang)
+add_regex (regexp_pattern, lang)
       char *regexp_pattern;
-     bool ignore_case;
       language *lang;
  {
    static struct re_pattern_buffer zeropattern;
-  char *name;
+  char sep, *pat, *name, *modifiers;
    const char *err;
    struct re_pattern_buffer *patbuf;
-  pattern *pp;
+  regexp *rp;
+  bool
+    force_explicit_name = TRUE, /* do not use implicit tag names */
+    ignore_case = FALSE,       /* case is significant */
+    multi_line = FALSE,                /* matches are done one line at a time */
+    single_line = FALSE;       /* dot does not match newline */
  
  
-  if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
+  if (strlen(regexp_pattern) < 3)
      {
-      error ("%s: unterminated regexp", regexp_pattern);
+      error ("null regexp", (char *)NULL);
        return;
      }
+  sep = regexp_pattern[0];
    name = scan_separators (regexp_pattern);
-  if (regexp_pattern[0] == '\0')
+  if (name == NULL)
      {
-      error ("null regexp", (char *)NULL);
+      error ("%s: unterminated regexp", regexp_pattern);
        return;
      }
-  (void) scan_separators (name);
+  if (name[1] == sep)
+    {
+      error ("null name for regexp \"%s\"", regexp_pattern);
+      return;
+    }
+  modifiers = scan_separators (name);
+  if (modifiers == NULL)       /* no terminating separator --> no name */
+    {
+      modifiers = name;
+      name = "";
+    }
+  else
+    modifiers += 1;            /* skip separator */
+
+  /* Parse regex modifiers. */
+  for (; modifiers[0] != '\0'; modifiers++)
+    switch (modifiers[0])
+      {
+      case 'N':
+       if (modifiers == name)
+         error ("forcing explicit tag name but no name, ignoring", NULL);
+       force_explicit_name = TRUE;
+       break;
+      case 'i':
+       ignore_case = TRUE;
+       break;
+      case 's':
+       single_line = TRUE;
+       /* FALLTHRU */
+      case 'm':
+       multi_line = TRUE;
+       need_filebuf = TRUE;
+       break;
+      default:
+       {
+         char wrongmod [2];
+         wrongmod[0] = modifiers[0];
+         wrongmod[1] = '\0';
+         error ("invalid regexp modifier `%s', ignoring", wrongmod);
+       }
+       break;
+      }
  
    patbuf = xnew (1, struct re_pattern_buffer);
    *patbuf = zeropattern;
    if (ignore_case)
-    patbuf->translate = lc_trans;      /* translation table to fold case  */
+    {
+      static char lc_trans[CHARS];
+      int i;
+      for (i = 0; i < CHARS; i++)
+       lc_trans[i] = lowcase (i);
+      patbuf->translate = lc_trans;    /* translation table to fold case  */
+    }
+
+  if (multi_line)
+    pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
+  else
+    pat = regexp_pattern;
  
-  err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
+  if (single_line)
+    re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
+  else
+    re_set_syntax (RE_SYNTAX_EMACS);
+
+  err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
+  if (multi_line)
+    free (pat);
    if (err != NULL)
      {
        error ("%s while compiling pattern", err);
        return;
      }
  
-  pp = p_head;
-  p_head = xnew (1, pattern);
-  p_head->regex = savestr (regexp_pattern);
-  p_head->p_next = pp;
+  rp = p_head;
+  p_head = xnew (1, regexp);
+  p_head->pattern = savestr (regexp_pattern);
+  p_head->p_next = rp;
    p_head->lang = lang;
    p_head->pat = patbuf;
-  p_head->name_pattern = savestr (name);
+  p_head->name = savestr (name);
    p_head->error_signaled = FALSE;
+  p_head->force_explicit_name = force_explicit_name;
    p_head->ignore_case = ignore_case;
+  p_head->multi_line = multi_line;
  }
  
  /*
@@ -5478,6 +5874,7 @@ substitute (in, out, regs)
        size -= 1;
  
    /* Allocate space and do the substitutions. */
+  assert (size >= 0);
    result = xnew (size + 1, char);
  
    for (t = result; *out != '\0'; out++)
@@ -5492,26 +5889,111 @@ substitute (in, out, regs)
        *t++ = *out;
    *t = '\0';
  
-  assert (t <= result + size && t - result == (int)strlen (result));
+  assert (t <= result + size);
+  assert (t - result == (int)strlen (result));
  
    return result;
  }
  
-/* Deallocate all patterns. */
+/* Deallocate all regexps. */
  static void
-free_patterns ()
+free_regexps ()
  {
-  pattern *pp;
+  regexp *rp;
    while (p_head != NULL)
      {
-      pp = p_head->p_next;
-      free (p_head->regex);
-      free (p_head->name_pattern);
+      rp = p_head->p_next;
+      free (p_head->pattern);
+      free (p_head->name);
        free (p_head);
-      p_head = pp;
+      p_head = rp;
      }
    return;
  }
+
+/*
+ * Reads the whole file as a single string from `filebuf' and looks for
+ * multi-line regular expressions, creating tags on matches.
+ * readline already dealt with normal regexps.
+ *
+ * Idea by Ben Wing <ben@666.com> (2002).
+ */
+static void
+regex_tag_multiline ()
+{
+  char *buffer = filebuf.buffer;
+  regexp *rp;
+  char *name;
+
+  for (rp = p_head; rp != NULL; rp = rp->p_next)
+    {
+      int match = 0;
+
+      if (!rp->multi_line)
+       continue;               /* skip normal regexps */
+
+      /* Generic initialisations before parsing file from memory. */
+      lineno = 1;              /* reset global line number */
+      charno = 0;              /* reset global char number */
+      linecharno = 0;          /* reset global char number of line start */
+
+      /* Only use generic regexps or those for the current language. */
+      if (rp->lang != NULL && rp->lang != curfdp->lang)
+       continue;
+
+      while (match >= 0 && match < filebuf.len)
+       {
+         match = re_search (rp->pat, buffer, filebuf.len, charno,
+                            filebuf.len - match, &rp->regs);
+         switch (match)
+           {
+           case -2:
+             /* Some error. */
+             if (!rp->error_signaled)
+               {
+                 error ("regexp stack overflow while matching \"%s\"",
+                        rp->pattern);
+                 rp->error_signaled = TRUE;
+               }
+             break;
+           case -1:
+             /* No match. */
+             break;
+           default:
+             if (match == rp->regs.end[0])
+               {
+                 if (!rp->error_signaled)
+                   {
+                     error ("regexp matches the empty string: \"%s\"",
+                            rp->pattern);
+                     rp->error_signaled = TRUE;
+                   }
+                 match = -3;   /* exit from while loop */
+                 break;
+               }
+
+             /* Match occurred.  Construct a tag. */
+             while (charno < rp->regs.end[0])
+               if (buffer[charno++] == '\n')
+                 lineno++, linecharno = charno;
+             name = rp->name;
+             if (name[0] == '\0')
+               name = NULL;
+             else /* make a named tag */
+               name = substitute (buffer, rp->name, &rp->regs);
+             if (rp->force_explicit_name)
+               /* Force explicit tag name, if a name is there. */
+               pfnote (name, TRUE, buffer + linecharno,
+                       charno - linecharno + 1, lineno, linecharno);
+             else
+               make_tag (name, strlen (name), TRUE, buffer + linecharno,
+                         charno - linecharno + 1, lineno, linecharno);
+             break;
+           }
+       }
+    }
+}
+
  #endif /* ETAGS_REGEXPS */
  
  \f
@@ -5531,32 +6013,24 @@ nocase_tail (cp)
    return FALSE;
  }
  
-static char *
-get_tag (bp)
+static void
+get_tag (bp, namepp)
       register char *bp;
+     char **namepp;
  {
-  register char *cp, *name;
+  register char *cp = bp;
  
-  if (*bp == '\0')
-    return NULL;
-  /* Go till you get to white space or a syntactic break */
-  for (cp = bp + 1; !notinname (*cp); cp++)
-    continue;
-  name = savenstr (bp, cp-bp);
-  pfnote (name, TRUE,
-         lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
-  return name;
-}
+  if (*bp != '\0')
+    {
+      /* Go till you get to white space or a syntactic break */
+      for (cp = bp + 1; !notinname (*cp); cp++)
+       continue;
+      make_tag (bp, cp - bp, TRUE,
+               lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+    }
  
-/* Initialize a linebuffer for use */
-static void
-initbuffer (lbp)
-     linebuffer *lbp;
-{
-  lbp->size = (DEBUG) ? 3 : 200;
-  lbp->buffer = xnew (lbp->size, char);
-  lbp->buffer[0] = '\0';
-  lbp->len = 0;
+  if (namepp != NULL)
+    *namepp = savenstr (bp, cp - bp);
  }
  
  /*
@@ -5564,10 +6038,13 @@ initbuffer (lbp)
   * newline or CR-NL, if any.  Return the number of characters read from
   * `stream', which is the length of the line including the newline.
   *
- * On DOS or Windows we do not count the CR character, if any, before the
- * NL, in the returned length; this mirrors the behavior of emacs on those
+ * On DOS or Windows we do not count the CR character, if any before the
+ * NL, in the returned length; this mirrors the behavior of Emacs on those
   * platforms (for text files, it translates CR-NL to NL as it reads in the
   * file).
+ *
+ * If multi-line regular expressions are requested, each line read is
+ * appended to `filebuf'.
   */
  static long
  readline_internal (lbp, stream)
@@ -5626,12 +6103,28 @@ readline_internal (lbp, stream)
      }
    lbp->len = p - buffer;
  
+  if (need_filebuf             /* we need filebuf for multi-line regexps */
+      && chars_deleted > 0)    /* not at EOF */
+    {
+      while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
+       {
+         /* Expand filebuf. */
+         filebuf.size *= 2;
+         xrnew (filebuf.buffer, filebuf.size, char);
+       }
+      strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
+      filebuf.len += lbp->len;
+      filebuf.buffer[filebuf.len++] = '\n';
+      filebuf.buffer[filebuf.len] = '\0';
+    }
+
    return lbp->len + chars_deleted;
  }
  
  /*
   * Like readline_internal, above, but in addition try to match the
- * input line against relevant regular expressions.
+ * input line against relevant regular expressions and manage #line
+ * directives.
   */
  static void
  readline (lbp, stream)
@@ -5736,6 +6229,8 @@ readline (lbp, stream)
                           fdhead->infabsdir = savestr (curfdp->infabsdir);
                           fdhead->taggedfname = taggedfname;
                           fdhead->usecharno = FALSE;
+                         fdhead->prop = NULL;
+                         fdhead->written = FALSE;
                           curfdp = fdhead;
                         }
                     }
@@ -5752,8 +6247,8 @@ readline (lbp, stream)
         {
           if (result > 0)
             {
-           /* Do a tail recursion on ourselves, thus discarding the contents
-              of the line buffer. */
+             /* Do a tail recursion on ourselves, thus discarding the contents
+                of the line buffer. */
               readline (lbp, stream);
               return;
             }
@@ -5766,46 +6261,56 @@ readline (lbp, stream)
  #ifdef ETAGS_REGEXPS
    {
      int match;
-    pattern *pp;
+    regexp *rp;
+    char *name;
  
-    /* Match against relevant patterns. */
+    /* Match against relevant regexps. */
      if (lbp->len > 0)
-      for (pp = p_head; pp != NULL; pp = pp->p_next)
+      for (rp = p_head; rp != NULL; rp = rp->p_next)
         {
-         /* Only use generic regexps or those for the current language. */
-         if (pp->lang != NULL && pp->lang != fdhead->lang)
+         /* Only use generic regexps or those for the current language.
+            Also do not use multiline regexps, which is the job of
+            regex_tag_multiline. */
+         if ((rp->lang != NULL && rp->lang != fdhead->lang)
+             || rp->multi_line)
             continue;
  
-         match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
+         match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
           switch (match)
             {
             case -2:
               /* Some error. */
-             if (!pp->error_signaled)
+             if (!rp->error_signaled)
                 {
-                 error ("error while matching \"%s\"", pp->regex);
-                 pp->error_signaled = TRUE;
+                 error ("regexp stack overflow while matching \"%s\"",
+                        rp->pattern);
+                 rp->error_signaled = TRUE;
                 }
               break;
             case -1:
               /* No match. */
               break;
-           default:
-             /* Match occurred.  Construct a tag. */
-             if (pp->name_pattern[0] != '\0')
+           case 0:
+             /* Empty string matched. */
+             if (!rp->error_signaled)
                 {
-                 /* Make a named tag. */
-                 char *name = substitute (lbp->buffer,
-                                          pp->name_pattern, &pp->regs);
-                 if (name != NULL)
-                   pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
+                 error ("regexp matches the empty string: \"%s\"", rp->pattern);
+                 rp->error_signaled = TRUE;
                 }
+             break;
+           default:
+             /* Match occurred.  Construct a tag. */
+             name = rp->name;
+             if (name[0] == '\0')
+               name = NULL;
+             else /* make a named tag */
+               name = substitute (lbp->buffer, rp->name, &rp->regs);
+             if (rp->force_explicit_name)
+               /* Force explicit tag name, if a name is there. */
+               pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
               else
-               {
-                 /* Make an unnamed tag. */
-                 pfnote ((char *)NULL, TRUE,
+               make_tag (name, strlen (name), TRUE,
                           lbp->buffer, match, lineno, linecharno);
-               }
               break;
             }
         }
@@ -5884,13 +6389,12 @@ etags_strchr (sp, c)
  }
  
  /*
- * Return TRUE if the two strings are equal, ignoring case for alphabetic
- * characters.
+ * Compare two strings, ignoring case for alphabetic characters.
   *
- * Analogous to BSD's strcasecmp, included for portability.
+ * Same as BSD's strcasecmp, included for portability.
   */
-static bool
-strcaseeq (s1, s2)
+static int
+etags_strcasecmp (s1, s2)
       register const char *s1;
       register const char *s2;
  {
@@ -5900,7 +6404,35 @@ strcaseeq (s1, s2)
              : *s1 == *s2))
      s1++, s2++;
  
-  return (*s1 == *s2);
+  return (ISALPHA (*s1) && ISALPHA (*s2)
+         ? lowcase (*s1) - lowcase (*s2)
+         : *s1 - *s2);
+}
+
+/*
+ * Compare two strings, ignoring case for alphabetic characters.
+ * Stop after a given number of characters
+ *
+ * Same as BSD's strncasecmp, included for portability.
+ */
+static int
+etags_strncasecmp (s1, s2, n)
+     register const char *s1;
+     register const char *s2;
+     register int n;
+{
+  while (*s1 != '\0' && n-- > 0
+        && (ISALPHA (*s1) && ISALPHA (*s2)
+            ? lowcase (*s1) == lowcase (*s2)
+            : *s1 == *s2))
+    s1++, s2++;
+
+  if (n < 0)
+    return 0;
+  else
+    return (ISALPHA (*s1) && ISALPHA (*s2)
+           ? lowcase (*s1) - lowcase (*s2)
+           : *s1 - *s2);
  }
  
  /* Skip spaces, return new pointer. */
@@ -5929,7 +6461,7 @@ fatal (s1, s2)
       char *s1, *s2;
  {
    error (s1, s2);
-  exit (BAD);
+  exit (EXIT_FAILURE);
  }
  
  static void
@@ -5937,21 +6469,21 @@ pfatal (s1)
       char *s1;
  {
    perror (s1);
-  exit (BAD);
+  exit (EXIT_FAILURE);
  }
  
  static void
  suggest_asking_for_help ()
  {
-  fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
-          progname,
+
  #ifdef LONG_OPTIONS
-          "--help"
+fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
+        progname, "--help");
  #else
-          "-h"
+fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
+        progname, "-h");
  #endif
-          );
-  exit (BAD);
+  exit (EXIT_FAILURE);
  }
  
  /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
@@ -6021,7 +6553,7 @@ etags_getcwd ()
    linebuffer path;
    FILE *pipe;
  
-  initbuffer (&path);
+  linebuffer_init (&path);
    pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
    if (pipe == NULL || readline_internal (&path, pipe) == 0)
      pfatal ("pwd");
@@ -6187,6 +6719,18 @@ canonicalize_filename (fn)
  #endif
  }
  
+\f
+/* Initialize a linebuffer for use */
+static void
+linebuffer_init (lbp)
+     linebuffer *lbp;
+{
+  lbp->size = (DEBUG) ? 3 : 200;
+  lbp->buffer = xnew (lbp->size, char);
+  lbp->buffer[0] = '\0';
+  lbp->len = 0;
+}
+
  /* Set the minimum size of a string contained in a linebuffer. */
  static void
  linebuffer_setlen (lbp, toksize)
@@ -6201,7 +6745,7 @@ linebuffer_setlen (lbp, toksize)
    lbp->len = toksize;
  }
  
-/* Like malloc but get fatal error if memory is exhausted.  */
+/* Like malloc but get fatal error if memory is exhausted. */
  static PTR
  xmalloc (size)
       unsigned int size;
@@ -6229,6 +6773,11 @@ xrealloc (ptr, size)
   * indent-tabs-mode: t
   * tab-width: 8
   * fill-column: 79
- * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node")
+ * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
   * End:
   */
+
+/* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
+   (do not change this comment) */
+
+/* etags.c ends here */