X-Git-Url: https://git.hcoop.net/bpt/emacs.git/blobdiff_plain/e7d3b099a25e2a895b0b9ff3e1f7100082f170a6..7630911d6162623297a052bd75d8866440b73a68:/lib-src/etags.c

diff --git a/lib-src/etags.c b/lib-src/etags.c
index 6b4b379f13..829fc97fbf 100644
--- a/lib-src/etags.c
+++ b/lib-src/etags.c
@@ -2,21 +2,21 @@
    Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
    Free Software Foundation, Inc. and Ken Arnold
 
-This file is not considered part of GNU Emacs.
+ This file is not considered part of GNU Emacs.
 
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
 
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
 
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software Foundation,
-Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
 
 /*
  * Authors:
@@ -27,14 +27,15 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  * 1989	Sam Kendall added C++.
  * 1992 Joseph B. Wells improved C and C++ parsing.
  * 1993	Francesco Potortì reorganised C and C++.
- * 1994	Regexp tags by Tom Tromey.
+ * 1994	Line-by-line regexp tags by Tom Tromey.
  * 2001 Nested classes by Francesco Potortì (concept by Mykola Dzyuba).
  * 2002 #line directives by Francesco Potortì.
  *
- *	Francesco Potortì <pot@gnu.org> has maintained it since 1993.
+ * Francesco Potortì <pot@gnu.org> has maintained and improved it since 1993.
+ *
  */
 
-char pot_etags_version[] = "@(#) pot revision number is 16.10";
+char pot_etags_version[] = "@(#) pot revision number is 16.56";
 
 #define	TRUE	1
 #define	FALSE	0
@@ -176,25 +177,18 @@ If you want regular expression support, you should delete this notice and
 # define CTAGS FALSE
 #endif
 
-/* Exit codes for success and failure.  */
-#ifdef VMS
-# define	GOOD	1
-# define	BAD	0
-#else
-# define	GOOD	0
-# define	BAD	1
-#endif
-
 #define streq(s,t)	(assert((s)!=NULL || (t)!=NULL), !strcmp (s, t))
+#define strcaseeq(s,t)	(assert((s)!=NULL && (t)!=NULL), !etags_strcasecmp (s, t))
 #define strneq(s,t,n)	(assert((s)!=NULL || (t)!=NULL), !strncmp (s, t, n))
+#define strncaseeq(s,t,n) (assert((s)!=NULL && (t)!=NULL), !etags_strncasecmp (s, t, n))
 
 #define CHARS 256		/* 2^sizeof(char) */
 #define CHAR(x)		((unsigned int)(x) & (CHARS - 1))
-#define	iswhite(c)	(_wht[CHAR(c)]) /* c is white */
-#define notinname(c)	(_nin[CHAR(c)]) /* c is not in a name */
-#define	begtoken(c)	(_btk[CHAR(c)]) /* c can start token */
-#define	intoken(c)	(_itk[CHAR(c)]) /* c can be in token */
-#define	endtoken(c)	(_etk[CHAR(c)]) /* c ends tokens */
+#define	iswhite(c)	(_wht[CHAR(c)]) /* c is white (see white) */
+#define notinname(c)	(_nin[CHAR(c)]) /* c is not in a name (see nonam) */
+#define	begtoken(c)	(_btk[CHAR(c)]) /* c can start token (see begtk) */
+#define	intoken(c)	(_itk[CHAR(c)]) /* c can be in token (see midtk) */
+#define	endtoken(c)	(_etk[CHAR(c)]) /* c ends tokens (see endtk) */
 
 #define ISALNUM(c)	isalnum (CHAR(c))
 #define ISALPHA(c)	isalpha (CHAR(c))
@@ -236,11 +230,12 @@ typedef struct
 typedef struct
 {
   char *name;			/* language name */
-  bool metasource;		/* source used to generate other sources */
+  char *help;                   /* detailed help for the language */
   Lang_function *function;	/* parse function */
-  char **filenames;		/* names of this language's files */
   char **suffixes;		/* name suffixes of this language's files */
+  char **filenames;		/* names of this language's files */
   char **interpreters;		/* interpreters for this language */
+  bool metasource;		/* source used to generate other sources */
 } language;
 
 typedef struct fdesc
@@ -253,6 +248,7 @@ typedef struct fdesc
   language *lang;		/* language of file */
   char *prop;			/* file properties to write in tagfile */
   bool usecharno;		/* etags tags shall contain char number */
+  bool written;			/* entry written in the tags file */
 } fdesc;
 
 typedef struct node_st
@@ -260,9 +256,9 @@ typedef struct node_st
   struct node_st *left, *right;	/* left and right sons */
   fdesc *fdp;			/* description of file to whom tag belongs */
   char *name;			/* tag name */
-  char *pat;			/* search pattern */
+  char *regex;			/* search regexp */
   bool valid;			/* write this tag on the tag file */
-  bool is_func;			/* function tag: use pattern in CTAGS mode */
+  bool is_func;			/* function tag: use regexp in CTAGS mode */
   bool been_warned;		/* warning already given for duplicated tag */
   int lno;			/* line number tag is on */
   long cno;			/* character number line starts on */
@@ -288,9 +284,9 @@ typedef struct
   enum {
     at_language,		/* a language specification */
     at_regexp,			/* a regular expression */
-    at_icregexp,		/* same, but with case ignored */
     at_filename,		/* a file name */
-    at_stdin			/* read from stdin here */
+    at_stdin,			/* read from stdin here */
+    at_end			/* stop parsing the list */
   } arg_type;			/* argument type */
   language *lang;		/* language associated with the argument */
   char *what;			/* the argument itself */
@@ -298,17 +294,19 @@ typedef struct
 
 #ifdef ETAGS_REGEXPS
 /* Structure defining a regular expression. */
-typedef struct pattern
+typedef struct regexp
 {
-  struct pattern *p_next;
-  language *lang;
-  char *regex;
-  struct re_pattern_buffer *pat;
-  struct re_registers regs;
-  char *name_pattern;
-  bool error_signaled;
-  bool ignore_case;
-} pattern;
+  struct regexp *p_next;	/* pointer to next in list */
+  language *lang;		/* if set, use only for this language */
+  char *pattern;		/* the regexp pattern */
+  char *name;			/* tag name */
+  struct re_pattern_buffer *pat; /* the compiled pattern */
+  struct re_registers regs;	/* re registers */
+  bool error_signaled;		/* already signaled for this regexp */
+  bool force_explicit_name;	/* do not allow implict tag name */
+  bool ignore_case;		/* ignore case when matching */
+  bool multi_line;		/* do a multi-line match on the whole file */
+} regexp;
 #endif /* ETAGS_REGEXPS */
 
 
@@ -326,23 +324,24 @@ static void Cplusplus_entries __P((FILE *));
 static void Cstar_entries __P((FILE *));
 static void Erlang_functions __P((FILE *));
 static void Fortran_functions __P((FILE *));
-static void Yacc_entries __P((FILE *));
+static void HTML_labels __P((FILE *));
 static void Lisp_functions __P((FILE *));
 static void Makefile_targets __P((FILE *));
 static void Pascal_functions __P((FILE *));
 static void Perl_functions __P((FILE *));
 static void PHP_functions __P((FILE *));
-static void Postscript_functions __P((FILE *));
+static void PS_functions __P((FILE *));
 static void Prolog_functions __P((FILE *));
 static void Python_functions __P((FILE *));
 static void Scheme_functions __P((FILE *));
 static void TeX_commands __P((FILE *));
 static void Texinfo_nodes __P((FILE *));
+static void Yacc_entries __P((FILE *));
 static void just_read_file __P((FILE *));
 
 static void print_language_names __P((void));
 static void print_version __P((void));
-static void print_help __P((void));
+static void print_help __P((argument *));
 int main __P((int, char **));
 
 static compressor *get_compressor_from_suffix __P((char *, char **));
@@ -352,12 +351,12 @@ static language *get_language_from_filename __P((char *, bool));
 static void readline __P((linebuffer *, FILE *));
 static long readline_internal __P((linebuffer *, FILE *));
 static bool nocase_tail __P((char *));
-static char *get_tag __P((char *));
+static void get_tag __P((char *, char **));
 
 #ifdef ETAGS_REGEXPS
-static void analyse_regex __P((char *, bool));
-static void add_regex __P((char *, bool, language *));
-static void free_patterns __P((void));
+static void analyse_regex __P((char *));
+static void free_regexps __P((void));
+static void regex_tag_multiline __P((void));
 #endif /* ETAGS_REGEXPS */
 static void error __P((const char *, const char *));
 static void suggest_asking_for_help __P((void));
@@ -366,14 +365,13 @@ static void pfatal __P((char *));
 static void add_node __P((node *, node **));
 
 static void init __P((void));
-static void initbuffer __P((linebuffer *));
 static void process_file_name __P((char *, language *));
 static void process_file __P((FILE *, char *, language *));
 static void find_entries __P((FILE *));
 static void free_tree __P((node *));
 static void free_fdesc __P((fdesc *));
 static void pfnote __P((char *, bool, char *, int, int, long));
-static void new_pfnote __P((char *, int, bool, char *, int, int, long));
+static void make_tag __P((char *, int, bool, char *, int, int, long));
 static void invalidate_nodes __P((fdesc *, node **));
 static void put_entries __P((node *));
 
@@ -384,13 +382,15 @@ static char *savenstr __P((char *, int));
 static char *savestr __P((char *));
 static char *etags_strchr __P((const char *, int));
 static char *etags_strrchr __P((const char *, int));
-static bool strcaseeq __P((const char *, const char *));
+static int etags_strcasecmp __P((const char *, const char *));
+static int etags_strncasecmp __P((const char *, const char *, int));
 static char *etags_getcwd __P((void));
 static char *relative_filename __P((char *, char *));
 static char *absolute_filename __P((char *, char *));
 static char *absolute_dirname __P((char *, char *));
 static bool filename_is_absolute __P((char *f));
 static void canonicalize_filename __P((char *));
+static void linebuffer_init __P((linebuffer *));
 static void linebuffer_setlen __P((linebuffer *, int));
 static PTR xmalloc __P((unsigned int));
 static PTR xrealloc __P((char *, unsigned int));
@@ -417,6 +417,8 @@ static node *nodehead;		/* the head of the binary tree of tags */
 static node *last_node;		/* the last node created */
 
 static linebuffer lb;		/* the current line */
+static linebuffer filebuf;	/* a buffer containing the whole file */
+static linebuffer token_name;	/* a buffer containing a tag name */
 
 /* boolean "functions" (see init)	*/
 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
@@ -424,7 +426,7 @@ static char
   /* white chars */
   *white = " \f\t\n\r\v",
   /* not in a name */
-  *nonam = " \f\t\n\r()=,;",
+  *nonam = " \f\t\n\r()=,;",	/* look at make_tag before modifying! */
   /* token ending chars */
   *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
   /* token starting chars */
@@ -450,20 +452,17 @@ static bool vgrind_style;	/* -v: create vgrind style index output */
 static bool no_warnings;	/* -w: suppress warnings */
 static bool cxref_style;	/* -x: create cxref style output */
 static bool cplusplus;		/* .[hc] means C++, not C */
-static bool noindentypedefs;	/* -I: ignore indentation in C */
+static bool ignoreindent;	/* -I: ignore indentation in C */
 static bool packages_only;	/* --packages-only: in Ada, only tag packages*/
 
 #define STDIN 0x1001		/* returned by getopt_long on --parse-stdin */
 static bool parsing_stdin;	/* --parse-stdin used */
 
 #ifdef ETAGS_REGEXPS
-/* List of all regexps. */
-static pattern *p_head;
-
-/* How many characters in the character set.  (From regex.c.)  */
-#define CHAR_SET_SIZE 256
-/* Translation table for case-insensitive matching. */
-static char lc_trans[CHAR_SET_SIZE];
+static regexp *p_head;		/* list of all regexps */
+static bool need_filebuf;	/* some regexes are multi-line */
+#else
+# define need_filebuf FALSE
 #endif /* ETAGS_REGEXPS */
 
 #ifdef LONG_OPTIONS
@@ -526,6 +525,22 @@ static compressor compressors[] =
 /* Ada code */
 static char *Ada_suffixes [] =
   { "ads", "adb", "ada", NULL };
+static char Ada_help [] =
+"In Ada code, functions, procedures, packages, tasks and types are\n\
+tags.  Use the `--packages-only' option to create tags for\n\
+packages only.\n\
+Ada tag names have suffixes indicating the type of entity:\n\
+	Entity type:	Qualifier:\n\
+	------------	----------\n\
+	function	/f\n\
+	procedure	/p\n\
+	package spec	/s\n\
+	package body	/b\n\
+	type		/t\n\
+	task		/k\n\
+Thus, `M-x find-tag <RET> bidule/b <RET>' will go directly to the\n\
+body of the package `bidule', while `M-x find-tag <RET> bidule <RET>'\n\
+will just search for any tag `bidule'.";
 
 /* Assembly code */
 static char *Asm_suffixes [] =
@@ -539,79 +554,187 @@ static char *Asm_suffixes [] =
     "src", /* BSO/Tasking C compiler output */
     NULL
   };
+static char Asm_help [] =
+"In assembler code, labels appearing at the beginning of a line,\n\
+followed by a colon, are tags.";
+
 
 /* Note that .c and .h can be considered C++, if the --c++ flag was
-   given, or if the `class' keyowrd is met inside the file.
+   given, or if the `class' or `template' keyowrds are met inside the file.
    That is why default_C_entries is called for these. */
 static char *default_C_suffixes [] =
   { "c", "h", NULL };
+static char default_C_help [] =
+"In C code, any C function or typedef is a tag, and so are\n\
+definitions of `struct', `union' and `enum'.  `#define' macro\n\
+definitions and `enum' constants are tags unless you specify\n\
+`--no-defines'.  Global variables are tags unless you specify\n\
+`--no-globals'.  Use of `--no-globals' and `--no-defines'\n\
+can make the tags table file much smaller.\n\
+You can tag function declarations and external variables by\n\
+using `--declarations', and struct members by using `--members'.";
 
 static char *Cplusplus_suffixes [] =
   { "C", "c++", "cc", "cpp", "cxx", "H", "h++", "hh", "hpp", "hxx",
     "M",			/* Objective C++ */
     "pdb",			/* Postscript with C syntax */
     NULL };
+static char Cplusplus_help [] =
+"In C++ code, all the tag constructs of C code are tagged.  (Use\n\
+--help --lang=c --lang=c++ for full help.)\n\
+In addition to C tags, member functions are also recognized, and\n\
+optionally member variables if you use the `--members' option.\n\
+Tags for variables and functions in classes are named `CLASS::VARIABLE'\n\
+and `CLASS::FUNCTION'.  `operator' definitions have tag names like\n\
+`operator+'.";
 
 static char *Cjava_suffixes [] =
   { "java", NULL };
+static char Cjava_help [] =
+"In Java code, all the tags constructs of C and C++ code are\n\
+tagged.  (Use --help --lang=c --lang=c++ --lang=java for full help.)";
+
 
 static char *Cobol_suffixes [] =
   { "COB", "cob", NULL };
+static char Cobol_help [] =
+"In Cobol code, tags are paragraph names; that is, any word\n\
+starting in column 8 and followed by a period.";
 
 static char *Cstar_suffixes [] =
   { "cs", "hs", NULL };
 
 static char *Erlang_suffixes [] =
   { "erl", "hrl", NULL };
+static char Erlang_help [] =
+"In Erlang code, the tags are the functions, records and macros\n\
+defined in the file.";
 
 static char *Fortran_suffixes [] =
   { "F", "f", "f90", "for", NULL };
+static char Fortran_help [] =
+"In Fortran code, functions, subroutines and block data are tags.";
+
+static char *HTML_suffixes [] =
+  { "htm", "html", "shtml", NULL };
+static char HTML_help [] =
+"In HTML input files, the tags are the `title' and the `h1', `h2',\n\
+`h3' headers.  Also, tags are `name=' in anchors and all\n\
+occurrences of `id='.";
 
 static char *Lisp_suffixes [] =
   { "cl", "clisp", "el", "l", "lisp", "LSP", "lsp", "ml", NULL };
+static char Lisp_help [] =
+"In Lisp code, any function defined with `defun', any variable\n\
+defined with `defvar' or `defconst', and in general the first\n\
+argument of any expression that starts with `(def' in column zero\n\
+is a tag.";
 
 static char *Makefile_filenames [] =
   { "Makefile", "makefile", "GNUMakefile", "Makefile.in", "Makefile.am", NULL};
+static char Makefile_help [] =
+"In makefiles, targets are tags; additionally, variables are tags\n\
+unless you specify `--no-globals'.";
+
+static char *Objc_suffixes [] =
+  { "lm",			/* Objective lex file */
+    "m",			/* Objective C file */
+     NULL };
+static char Objc_help [] =
+"In Objective C code, tags include Objective C definitions for classes,\n\
+class categories, methods and protocols.  Tags for variables and\n\
+functions in classes are named `CLASS::VARIABLE' and `CLASS::FUNCTION'.";
 
 static char *Pascal_suffixes [] =
   { "p", "pas", NULL };
+static char Pascal_help [] =
+"In Pascal code, the tags are the functions and procedures defined\n\
+in the file.";
 
 static char *Perl_suffixes [] =
   { "pl", "pm", NULL };
-
 static char *Perl_interpreters [] =
   { "perl", "@PERL@", NULL };
+static char Perl_help [] =
+"In Perl code, the tags are the packages, subroutines and variables\n\
+defined by the `package', `sub', `my' and `local' keywords.  Use\n\
+`--globals' if you want to tag global variables.  Tags for\n\
+subroutines are named `PACKAGE::SUB'.  The name for subroutines\n\
+defined in the default package is `main::SUB'.";
 
 static char *PHP_suffixes [] =
   { "php", "php3", "php4", NULL };
+static char PHP_help [] =
+"In PHP code, tags are functions, classes and defines.  When using\n\
+the `--members' option, vars are tags too.";
 
 static char *plain_C_suffixes [] =
-  { "lm",			/* Objective lex file */
-    "m",			/* Objective C file */
-    "pc",			/* Pro*C file */
+  { "pc",			/* Pro*C file */
      NULL };
 
-static char *Postscript_suffixes [] =
+static char *PS_suffixes [] =
   { "ps", "psw", NULL };	/* .psw is for PSWrap */
+static char PS_help [] =
+"In PostScript code, the tags are the functions.";
 
 static char *Prolog_suffixes [] =
   { "prolog", NULL };
+static char Prolog_help [] =
+"In Prolog code, tags are predicates and rules at the beginning of\n\
+line.";
 
 static char *Python_suffixes [] =
   { "py", NULL };
+static char Python_help [] =
+"In Python code, `def' or `class' at the beginning of a line\n\
+generate a tag.";
 
 /* Can't do the `SCM' or `scm' prefix with a version number. */
 static char *Scheme_suffixes [] =
   { "oak", "sch", "scheme", "SCM", "scm", "SM", "sm", "ss", "t", NULL };
+static char Scheme_help [] =
+"In Scheme code, tags include anything defined with `def' or with a\n\
+construct whose name starts with `def'.  They also include\n\
+variables set with `set!' at top level in the file.";
 
 static char *TeX_suffixes [] =
   { "bib", "clo", "cls", "ltx", "sty", "TeX", "tex", NULL };
+static char TeX_help [] =
+"In LaTeX text, the argument of any of the commands `\\chapter',\n\
+`\\section', `\\subsection', `\\subsubsection', `\\eqno', `\\label',\n\
+`\\ref', `\\cite', `\\bibitem', `\\part', `\\appendix', `\\entry',\n\
+`\\index', `\\def', `\\newcommand', `\\renewcommand',\n\
+`\\newenvironment' or `\\renewenvironment' is a tag.\n\
+\n\
+Other commands can be specified by setting the environment variable\n\
+`TEXTAGS' to a colon-separated list like, for example,\n\
+     TEXTAGS=\"mycommand:myothercommand\".";
+
 
 static char *Texinfo_suffixes [] =
   { "texi", "texinfo", "txi", NULL };
+static char Texinfo_help [] =
+"for texinfo files, lines starting with @node are tagged.";
 
 static char *Yacc_suffixes [] =
   { "y", "y++", "ym", "yxx", "yy", NULL }; /* .ym is Objective yacc file */
+static char Yacc_help [] =
+"In Bison or Yacc input files, each rule defines as a tag the\n\
+nonterminal it constructs.  The portions of the file that contain\n\
+C code are parsed as C code (use --help --lang=c --lang=yacc\n\
+for full help).";
+
+static char auto_help [] =
+"`auto' is not a real language, it indicates to use\n\
+a default language for files base on file name suffix and file contents.";
+
+static char none_help [] =
+"`none' is not a real language, it indicates to only do\n\
+regexp processing on files.";
+
+static char no_lang_help [] =
+"No detailed help available for this language.";
+
 
 /*
  * Table of languages.
@@ -622,31 +745,33 @@ static char *Yacc_suffixes [] =
 
 static language lang_names [] =
 {
-  { "ada",      FALSE, Ada_funcs,            NULL, Ada_suffixes,        NULL },
-  { "asm",      FALSE, Asm_labels,           NULL, Asm_suffixes,        NULL },
-  { "c",        FALSE, default_C_entries,    NULL, default_C_suffixes,  NULL },
-  { "c++",      FALSE, Cplusplus_entries,    NULL, Cplusplus_suffixes,  NULL },
-  { "c*",       FALSE, Cstar_entries,        NULL, Cstar_suffixes,      NULL },
-  { "cobol",    FALSE, Cobol_paragraphs,     NULL, Cobol_suffixes,      NULL },
-  { "erlang",   FALSE, Erlang_functions,     NULL, Erlang_suffixes,     NULL },
-  { "fortran",  FALSE, Fortran_functions,    NULL, Fortran_suffixes,    NULL },
-  { "java",     FALSE, Cjava_entries,        NULL, Cjava_suffixes,      NULL },
-  { "lisp",     FALSE, Lisp_functions,       NULL, Lisp_suffixes,       NULL },
-  { "makefile", FALSE, Makefile_targets,     Makefile_filenames, NULL,  NULL },
-  { "pascal",   FALSE, Pascal_functions,     NULL, Pascal_suffixes,     NULL },
-  { "perl",     FALSE, Perl_functions,NULL, Perl_suffixes, Perl_interpreters },
-  { "php",      FALSE, PHP_functions,        NULL, PHP_suffixes,        NULL },
-  { "postscript",FALSE, Postscript_functions,NULL, Postscript_suffixes, NULL },
-  { "proc",     FALSE, plain_C_entries,      NULL, plain_C_suffixes,    NULL },
-  { "prolog",   FALSE, Prolog_functions,     NULL, Prolog_suffixes,     NULL },
-  { "python",   FALSE, Python_functions,     NULL, Python_suffixes,     NULL },
-  { "scheme",   FALSE, Scheme_functions,     NULL, Scheme_suffixes,     NULL },
-  { "tex",      FALSE, TeX_commands,         NULL, TeX_suffixes,        NULL },
-  { "texinfo",  FALSE, Texinfo_nodes,        NULL, Texinfo_suffixes,    NULL },
-  { "yacc",      TRUE, Yacc_entries,         NULL, Yacc_suffixes,       NULL },
-  { "auto", FALSE, NULL },             /* default guessing scheme */
-  { "none", FALSE, just_read_file },   /* regexp matching only */
-  { NULL, FALSE, NULL }                /* end of list */
+  { "ada",       Ada_help,       Ada_funcs,         Ada_suffixes       },
+  { "asm",       Asm_help,       Asm_labels,        Asm_suffixes       },
+  { "c",         default_C_help, default_C_entries, default_C_suffixes },
+  { "c++",       Cplusplus_help, Cplusplus_entries, Cplusplus_suffixes },
+  { "c*",        no_lang_help,   Cstar_entries,     Cstar_suffixes     },
+  { "cobol",     Cobol_help,     Cobol_paragraphs,  Cobol_suffixes     },
+  { "erlang",    Erlang_help,    Erlang_functions,  Erlang_suffixes    },
+  { "fortran",   Fortran_help,   Fortran_functions, Fortran_suffixes   },
+  { "html",      HTML_help,      HTML_labels,       HTML_suffixes      },
+  { "java",      Cjava_help,     Cjava_entries,     Cjava_suffixes     },
+  { "lisp",      Lisp_help,      Lisp_functions,    Lisp_suffixes      },
+  { "makefile",  Makefile_help,Makefile_targets,NULL,Makefile_filenames},
+  { "objc",      Objc_help,      plain_C_entries,   Objc_suffixes      },
+  { "pascal",    Pascal_help,    Pascal_functions,  Pascal_suffixes    },
+  { "perl",Perl_help,Perl_functions,Perl_suffixes,NULL,Perl_interpreters},
+  { "php",       PHP_help,       PHP_functions,     PHP_suffixes       },
+  { "postscript",PS_help,        PS_functions,      PS_suffixes        },
+  { "proc",      no_lang_help,   plain_C_entries,   plain_C_suffixes   },
+  { "prolog",    Prolog_help,    Prolog_functions,  Prolog_suffixes    },
+  { "python",    Python_help,    Python_functions,  Python_suffixes    },
+  { "scheme",    Scheme_help,    Scheme_functions,  Scheme_suffixes    },
+  { "tex",       TeX_help,       TeX_commands,      TeX_suffixes       },
+  { "texinfo",   Texinfo_help,   Texinfo_nodes,     Texinfo_suffixes   },
+  { "yacc",      Yacc_help,Yacc_entries,Yacc_suffixes,NULL,NULL,TRUE},
+  { "auto",      auto_help },                      /* default guessing scheme */
+  { "none",      none_help,      just_read_file }, /* regexp matching only */
+  { NULL }                /* end of list */
 };
 
 
@@ -669,21 +794,25 @@ default file names and dot suffixes:");
 	  printf (" .%s", *ext);
       puts ("");
     }
-  puts ("Where `auto' means use default language for files based on file\n\
+  puts ("where `auto' means use default language for files based on file\n\
 name suffix, and `none' means only do regexp processing on files.\n\
 If no language is specified and no matching suffix is found,\n\
 the first line of the file is read for a sharp-bang (#!) sequence\n\
 followed by the name of an interpreter.  If no such sequence is found,\n\
 Fortran is tried first; if no tags are found, C is tried next.\n\
-When parsing any C file, a \"class\" keyword switches to C++.\n\
-Compressed files are supported using gzip and bzip2.");
+When parsing any C file, a \"class\" or \"template\" keyword\n\
+switches to C++.");
+  puts ("Compressed files are supported using gzip and bzip2.\n\
+\n\
+For detailed help on a given language use, for example,\n\
+etags --help --lang=ada.");
 }
 
 #ifndef EMACS_NAME
-# define EMACS_NAME "GNU Emacs"
+# define EMACS_NAME "standalone"
 #endif
 #ifndef VERSION
-# define VERSION "21"
+# define VERSION "version"
 #endif
 static void
 print_version ()
@@ -692,12 +821,27 @@ print_version ()
   puts ("Copyright (C) 2002 Free Software Foundation, Inc. and Ken Arnold");
   puts ("This program is distributed under the same terms as Emacs");
 
-  exit (GOOD);
+  exit (EXIT_SUCCESS);
 }
 
 static void
-print_help ()
+print_help (argbuffer)
+     argument *argbuffer;
 {
+  bool help_for_lang = FALSE;
+
+  for (; argbuffer->arg_type != at_end; argbuffer++)
+    if (argbuffer->arg_type == at_language)
+      {
+	if (help_for_lang)
+	  puts ("");
+	puts (argbuffer->lang->help);
+	help_for_lang = TRUE;
+      }
+
+  if (help_for_lang)
+    exit (EXIT_SUCCESS);
+
   printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
 \n\
 These are the options accepted by %s.\n", progname, progname);
@@ -726,7 +870,7 @@ Relative ones are stored relative to the output file's directory.\n");
   /* This option is mostly obsolete, because etags can now automatically
      detect C++.  Retained for backward compatibility and for debugging and
      experimentation.  In principle, we could want to tag as C++ even
-     before any "class" keyword.
+     before any "class" or "template" keyword.
   puts ("-C, --c++\n\
         Treat files whose name suffix defaults to C language as C++ files.");
   */
@@ -765,27 +909,27 @@ Relative ones are stored relative to the output file's directory.\n");
 	Do not create tag entries for global variables in some\n\
 	languages.  This makes the tags file smaller.");
   puts ("--members\n\
-	Create tag entries for member variables in C and derived languages.");
+	Create tag entries for members of structures in some languages.");
 
 #ifdef ETAGS_REGEXPS
-  puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
-        Make a tag for each line matching pattern REGEXP in the following\n\
- 	files.  {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
-	regexfile is a file containing one REGEXP per line.\n\
-	REGEXP is anchored (as if preceded by ^).\n\
-	The form /REGEXP/NAME/ creates a named tag.\n\
+  puts ("-r REGEXP, --regex=REGEXP or --regex=@regexfile\n\
+        Make a tag for each line matching a regular expression pattern\n\
+	in the following files.  {LANGUAGE}REGEXP uses REGEXP for LANGUAGE\n\
+	files only.  REGEXFILE is a file containing one REGEXP per line.\n\
+	REGEXP takes the form /TAGREGEXP/TAGNAME/MODS, where TAGNAME/ is\n\
+	optional.  The TAGREGEXP pattern is anchored (as if preceded by ^).");
+  puts ("	If TAGNAME/ is present, the tags created are named.\n\
 	For example Tcl named tags can be created with:\n\
-	--regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
-  puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
-        Like -r, --regex but ignore case when matching expressions.");
+	  --regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".\n\
+	MODS are optional one-letter modifiers: `i' means to ignore case,\n\
+	`m' means to allow multi-line matches, `s' implies `m' and\n\
+	causes dot to match any character, including newline.");
   puts ("-R, --no-regex\n\
         Don't create tags from regexps for the following files.");
 #endif /* ETAGS_REGEXPS */
   puts ("-I, --ignore-indentation\n\
-        Don't rely on indentation quite as much as normal.  Currently,\n\
-        this means not to assume that a closing brace in the first\n\
-        column is the final brace of a function or structure\n\
-        definition in C and C++.");
+        In C and C++ do not assume that a closing brace in the first\n\
+        column is the final brace of a function or structure definition.");
   puts ("-o FILE, --output=FILE\n\
         Write the tags to FILE.");
   puts ("--parse-stdin=NAME\n\
@@ -828,14 +972,16 @@ Relative ones are stored relative to the output file's directory.\n");
   puts ("-V, --version\n\
         Print the version of the program.\n\
 -h, --help\n\
-        Print this help message.");
+        Print this help message.\n\
+        Followed by one or more `--language' options prints detailed\n\
+        help about tag generation for the specified languages.");
 
   print_language_names ();
 
   puts ("");
   puts ("Report bugs to bug-gnu-emacs@gnu.org");
 
-  exit (GOOD);
+  exit (EXIT_SUCCESS);
 }
 
 
@@ -975,6 +1121,7 @@ main (argc, argv)
   argument *argbuffer;
   int current_arg, file_count;
   linebuffer filename_lb;
+  bool help_asked = FALSE;
 #ifdef VMS
   bool got_err;
 #endif
@@ -996,14 +1143,6 @@ main (argc, argv)
      is small. */
   argbuffer = xnew (argc, argument);
 
-#ifdef ETAGS_REGEXPS
-  /* Set syntax for regular expression routines. */
-  re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
-  /* Translation table for case-insensitive search. */
-  for (i = 0; i < CHAR_SET_SIZE; i++)
-    lc_trans[i] = lowcase (i);
-#endif /* ETAGS_REGEXPS */
-
   /*
    * If etags, always find typedefs and structure tags.  Why not?
    * Also default to find macro constants, enum constants and
@@ -1061,12 +1200,13 @@ main (argc, argv)
 	  {
 	    error ("-o option may only be given once.", (char *)NULL);
 	    suggest_asking_for_help ();
+	    /* NOTREACHED */
 	  }
 	tagfile = optarg;
 	break;
       case 'I':
       case 'S':		/* for backward compatibility */
-	noindentypedefs = TRUE;
+	ignoreindent = TRUE;
 	break;
       case 'l':
 	{
@@ -1079,6 +1219,10 @@ main (argc, argv)
 	    }
 	}
 	break;
+      case 'c':
+	/* Backward compatibility: support obsolete --ignore-case-regexp. */
+	optarg = concat (optarg, "i", ""); /* memory leak here */
+	/* FALLTHRU */
       case 'r':
 	argbuffer[current_arg].arg_type = at_regexp;
 	argbuffer[current_arg].what = optarg;
@@ -1089,17 +1233,12 @@ main (argc, argv)
 	argbuffer[current_arg].what = NULL;
 	++current_arg;
 	break;
-      case 'c':
-	argbuffer[current_arg].arg_type = at_icregexp;
-	argbuffer[current_arg].what = optarg;
-	++current_arg;
-	break;
       case 'V':
 	print_version ();
 	break;
       case 'h':
       case 'H':
-	print_help ();
+	help_asked = TRUE;
 	break;
 
 	/* Etags options */
@@ -1118,9 +1257,10 @@ main (argc, argv)
       case 'w': no_warnings = TRUE;				break;
       default:
 	suggest_asking_for_help ();
+	/* NOTREACHED */
       }
 
-  for (; optind < argc; ++optind)
+  for (; optind < argc; optind++)
     {
       argbuffer[current_arg].arg_type = at_filename;
       argbuffer[current_arg].what = argv[optind];
@@ -1128,10 +1268,17 @@ main (argc, argv)
       ++file_count;
     }
 
+  argbuffer[current_arg].arg_type = at_end;
+
+  if (help_asked)
+    print_help (argbuffer);
+    /* NOTREACHED */
+
   if (nincluded_files == 0 && file_count == 0)
     {
       error ("no input files specified.", (char *)NULL);
       suggest_asking_for_help ();
+      /* NOTREACHED */
     }
 
   if (tagfile == NULL)
@@ -1150,8 +1297,10 @@ main (argc, argv)
 
   init ();			/* set up boolean "functions" */
 
-  initbuffer (&lb);
-  initbuffer (&filename_lb);
+  linebuffer_init (&lb);
+  linebuffer_init (&filename_lb);
+  linebuffer_init (&filebuf);
+  linebuffer_init (&token_name);
 
   if (!CTAGS)
     {
@@ -1174,7 +1323,7 @@ main (argc, argv)
   /*
    * Loop through files finding functions.
    */
-  for (i = 0; i < current_arg; ++i)
+  for (i = 0; i < current_arg; i++)
     {
       static language *lang;	/* non-NULL if language is forced */
       char *this_file;
@@ -1186,10 +1335,7 @@ main (argc, argv)
 	  break;
 #ifdef ETAGS_REGEXPS
 	case at_regexp:
-	  analyse_regex (argbuffer[i].what, FALSE);
-	  break;
-	case at_icregexp:
-	  analyse_regex (argbuffer[i].what, TRUE);
+	  analyse_regex (argbuffer[i].what);
 	  break;
 #endif
 	case at_filename:
@@ -1232,21 +1378,33 @@ main (argc, argv)
     }
 
 #ifdef ETAGS_REGEXPS
-  free_patterns ();
+  free_regexps ();
 #endif /* ETAGS_REGEXPS */
+  free (lb.buffer);
+  free (filebuf.buffer);
+  free (token_name.buffer);
 
   if (!CTAGS || cxref_style)
     {
-      put_entries (nodehead);
+      put_entries (nodehead);	/* write the remainig tags (ETAGS) */
       free_tree (nodehead);
       nodehead = NULL;
       if (!CTAGS)
-	while (nincluded_files-- > 0)
-	  fprintf (tagf, "\f\n%s,include\n", *included_files++);
+	{
+	  fdesc *fdp;
+
+	  /* Output file entries that have no tags. */
+	  for (fdp = fdhead; fdp != NULL; fdp = fdp->next)
+	    if (!fdp->written)
+	      fprintf (tagf, "\f\n%s,0\n", fdp->taggedfname);
+
+	  while (nincluded_files-- > 0)
+	    fprintf (tagf, "\f\n%s,include\n", *included_files++);
+	}
 
       if (fclose (tagf) == EOF)
 	pfatal (tagfile);
-      exit (GOOD);
+      exit (EXIT_SUCCESS);
     }
 
   if (update)
@@ -1265,7 +1423,7 @@ main (argc, argv)
 	  sprintf (cmd,
 		   "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
 		   tagfile, argbuffer[i].what, tagfile);
-	  if (system (cmd) != GOOD)
+	  if (system (cmd) != EXIT_SUCCESS)
 	    fatal ("failed to execute shell command", (char *)NULL);
 	}
       append_to_tagfile = TRUE;
@@ -1274,7 +1432,7 @@ main (argc, argv)
   tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
   if (tagf == NULL)
     pfatal (tagfile);
-  put_entries (nodehead);
+  put_entries (nodehead);	/* write all the tags (CTAGS) */
   free_tree (nodehead);
   nodehead = NULL;
   if (fclose (tagf) == EOF)
@@ -1286,7 +1444,7 @@ main (argc, argv)
       sprintf (cmd, "sort -o %.*s %.*s", BUFSIZ, tagfile, BUFSIZ, tagfile);
       exit (system (cmd));
     }
-  return GOOD;
+  return EXIT_SUCCESS;
 }
 
 
@@ -1571,6 +1729,7 @@ process_file (fh, fn, lang)
     }
   fdp->usecharno = TRUE;	/* use char position when making tags */
   fdp->prop = NULL;
+  fdp->written = FALSE;		/* not written on tags file yet */
 
   fdhead = fdp;
   curfdp = fdhead;		/* the current file description */
@@ -1648,7 +1807,6 @@ find_entries (inf)
      FILE *inf;
 {
   char *cp;
-  node *old_last_node;
   language *lang = curfdp->lang;
   Lang_function *parser = NULL;
 
@@ -1703,7 +1861,7 @@ find_entries (inf)
   /* We rewind here, even if inf may be a pipe.  We fail if the
      length of the first line is longer than the pipe block size,
      which is unlikely. */
-    rewind (inf);
+  rewind (inf);
 
   /* Else try to guess the language given the case insensitive file name. */
   if (parser == NULL)
@@ -1716,6 +1874,26 @@ find_entries (inf)
 	}
     }
 
+  /* Else try Fortran or C. */
+  if (parser == NULL)
+    {
+      node *old_last_node = last_node;
+
+      curfdp->lang = get_language_from_langname ("fortran");
+      find_entries (inf);
+
+      if (old_last_node == last_node)
+	/* No Fortran entries found.  Try C. */
+	{
+	  /* We do not tag if rewind fails.
+	     Only the file name will be recorded in the tags file. */
+	  rewind (inf);
+	  curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
+	  find_entries (inf);
+	}
+      return;
+    }
+
   if (!no_line_directive
       && curfdp->lang != NULL && curfdp->lang->metasource)
     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
@@ -1733,12 +1911,8 @@ find_entries (inf)
 	  {
 	    fdesc *badfdp = *fdpp;
 
-	    if (DEBUG)
-	      fprintf (stderr,
-		       "Removing references to \"%s\" obtained from \"%s\"\n",
-		       badfdp->taggedfname, badfdp->infname);
-
-	    /* Delete the tags referring to badfdp. */
+	    /* Delete the tags referring to badfdp->taggedfname
+	       that were obtained from badfdp->infname. */
 	    invalidate_nodes (badfdp, &nodehead);
 
 	    *fdpp = badfdp->next; /* remove the bad description from the list */
@@ -1748,35 +1922,89 @@ find_entries (inf)
 	  fdpp = &(*fdpp)->next; /* advance the list pointer */
     }
 
-  if (parser != NULL)
-    {
-      /* Generic initialisations before reading from file. */
-      lineno = 0;		/* reset global line number */
-      charno = 0;		/* reset global char number */
-      linecharno = 0;		/* reset global char number of line start */
+  assert (parser != NULL);
 
-      parser (inf);
-      return;
-    }
+  /* Generic initialisations before reading from file. */
+  linebuffer_setlen (&filebuf, 0); /* reset the file buffer */
 
-  /* Else try Fortran. */
-  old_last_node = last_node;
-  curfdp->lang = get_language_from_langname ("fortran");
-  find_entries (inf);
+  /* Generic initialisations before parsing file with readline. */
+  lineno = 0;		       /* reset global line number */
+  charno = 0;		       /* reset global char number */
+  linecharno = 0;	       /* reset global char number of line start */
+
+  parser (inf);
+
+#ifdef ETAGS_REGEXPS
+  regex_tag_multiline ();
+#endif /* ETAGS_REGEXPS */
+}
+
+
+/*
+ * Check whether an implicitly named tag should be created,
+ * then call `pfnote'.
+ * NAME is a string that is internally copied by this function.
+ *
+ * TAGS format specification
+ * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
+ * The following is explained in some more detail in etc/ETAGS.EBNF.
+ *
+ * make_tag creates tags with "implicit tag names" (unnamed tags)
+ * if the following are all true, assuming NONAM=" \f\t\n\r()=,;":
+ *  1. NAME does not contain any of the characters in NONAM;
+ *  2. LINESTART contains name as either a rightmost, or rightmost but
+ *     one character, substring;
+ *  3. the character, if any, immediately before NAME in LINESTART must
+ *     be a character in NONAM;
+ *  4. the character, if any, immediately after NAME in LINESTART must
+ *     also be a character in NONAM.
+ *
+ * The implementation uses the notinname() macro, which recognises the
+ * characters stored in the string `nonam'.
+ * etags.el needs to use the same characters that are in NONAM.
+ */
+static void
+make_tag (name, namelen, is_func, linestart, linelen, lno, cno)
+     char *name;		/* tag name, or NULL if unnamed */
+     int namelen;		/* tag length */
+     bool is_func;		/* tag is a function */
+     char *linestart;		/* start of the line where tag is */
+     int linelen;		/* length of the line where tag is */
+     int lno;			/* line number */
+     long cno;			/* character number */
+{
+  bool named = (name != NULL && namelen > 0);
 
-  if (old_last_node == last_node)
-    /* No Fortran entries found.  Try C. */
+  if (!CTAGS && named)		/* maybe set named to false */
+    /* Let's try to make an implicit tag name, that is, create an unnamed tag
+       such that etags.el can guess a name from it. */
     {
-      /* We do not tag if rewind fails.
-	 Only the file name will be recorded in the tags file. */
-      rewind (inf);
-      curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
-      find_entries (inf);
+      int i;
+      register char *cp = name;
+
+      for (i = 0; i < namelen; i++)
+	if (notinname (*cp++))
+	  break;
+      if (i == namelen)				/* rule #1 */
+	{
+	  cp = linestart + linelen - namelen;
+	  if (notinname (linestart[linelen-1]))
+	    cp -= 1;				/* rule #4 */
+	  if (cp >= linestart			/* rule #2 */
+	      && (cp == linestart
+		  || notinname (cp[-1]))	/* rule #3 */
+	      && strneq (name, cp, namelen))	/* rule #2 */
+	    named = FALSE;	/* use implicit tag name */
+	}
     }
-  return;
+
+  if (named)
+    name = savenstr (name, namelen);
+  else
+    name = NULL;
+  pfnote (name, is_func, linestart, linelen, lno, cno);
 }
 
-
 /* Record a tag. */
 static void
 pfnote (name, is_func, linestart, linelen, lno, cno)
@@ -1789,6 +2017,7 @@ pfnote (name, is_func, linestart, linelen, lno, cno)
 {
   register node *np;
 
+  assert (name == NULL || name[0] != '\0');
   if (CTAGS && name == NULL)
     return;
 
@@ -1823,72 +2052,16 @@ pfnote (name, is_func, linestart, linelen, lno, cno)
   if (CTAGS && !cxref_style)
     {
       if (strlen (linestart) < 50)
-	np->pat = concat (linestart, "$", "");
+	np->regex = concat (linestart, "$", "");
       else
-	np->pat = savenstr (linestart, 50);
+	np->regex = savenstr (linestart, 50);
     }
   else
-    np->pat = savenstr (linestart, linelen);
+    np->regex = savenstr (linestart, linelen);
 
   add_node (np, &nodehead);
 }
 
-/*
- * TAGS format specification
- * Idea by Sam Kendall <kendall@mv.mv.com> (1997)
- *
- * pfnote should emit the optimized form [unnamed tag] only if:
- *  1. name does not contain any of the characters " \t\r\n(),;";
- *  2. linestart contains name as either a rightmost, or rightmost but
- *     one character, substring;
- *  3. the character, if any, immediately before name in linestart must
- *     be one of the characters " \t(),;";
- *  4. the character, if any, immediately after name in linestart must
- *     also be one of the characters " \t(),;".
- *
- * The real implementation uses the notinname() macro, which recognises
- * characters slightly different from " \t\r\n(),;".  See the variable
- * `nonam'.
- */
-#define traditional_tag_style TRUE
-static void
-new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
-     char *name;		/* tag name, or NULL if unnamed */
-     int namelen;		/* tag length */
-     bool is_func;		/* tag is a function */
-     char *linestart;		/* start of the line where tag is */
-     int linelen;		/* length of the line where tag is */
-     int lno;			/* line number */
-     long cno;			/* character number */
-{
-  register char *cp;
-  bool named;
-
-  named = TRUE;
-  if (!CTAGS)
-    {
-      for (cp = name; !notinname (*cp); cp++)
-	continue;
-      if (*cp == '\0')				/* rule #1 */
-	{
-	  cp = linestart + linelen - namelen;
-	  if (notinname (linestart[linelen-1]))
-	    cp -= 1;				/* rule #4 */
-	  if (cp >= linestart			/* rule #2 */
-	      && (cp == linestart
-		  || notinname (cp[-1]))	/* rule #3 */
-	      && strneq (name, cp, namelen))	/* rule #2 */
-	    named = FALSE;	/* use unnamed tag */
-	}
-    }
-
-  if (named)
-    name = savenstr (name, namelen);
-  else
-    name = NULL;
-  pfnote (name, is_func, linestart, linelen, lno, cno);
-}
-
 /*
  * free_tree ()
  *	recurse on left children, iterate on right children.
@@ -1903,7 +2076,7 @@ free_tree (np)
       free_tree (np->left);
       if (np->name != NULL)
 	free (np->name);
-      free (np->pat);
+      free (np->regex);
       free (np);
       np = node_right;
     }
@@ -2030,20 +2203,22 @@ invalidate_nodes (badfdp, npp)
       if (np->left != NULL)
 	invalidate_nodes (badfdp, &np->left);
       if (np->fdp == badfdp)
-	np-> valid = FALSE;
+	np->valid = FALSE;
       if (np->right != NULL)
 	invalidate_nodes (badfdp, &np->right);
     }
   else
     {
-      node **next = &np->left;
+      assert (np->fdp != NULL);
       if (np->fdp == badfdp)
 	{
-	  *npp = *next;		/* detach the sublist from the list */
+	  *npp = np->left;	/* detach the sublist from the list */
 	  np->left = NULL;	/* isolate it */
 	  free_tree (np);	/* free it */
+	  invalidate_nodes (badfdp, npp);
 	}
-      invalidate_nodes (badfdp, next);
+      else
+	invalidate_nodes (badfdp, &np->left);
     }
 }
 
@@ -2075,15 +2250,16 @@ total_size_of_entries (np)
   register int total = 0;
 
   for (; np != NULL; np = np->right)
-    {
-      total += strlen (np->pat) + 1;		/* pat\177 */
-      if (np->name != NULL)
-	total += strlen (np->name) + 1;		/* name\001 */
-      total += number_len ((long) np->lno) + 1;	/* lno, */
-      if (np->cno != invalidcharno)		/* cno */
-	total += number_len (np->cno);
-      total += 1;				/* newline */
-    }
+    if (np->valid)
+      {
+	total += strlen (np->regex) + 1;		/* pat\177 */
+	if (np->name != NULL)
+	  total += strlen (np->name) + 1;		/* name\001 */
+	total += number_len ((long) np->lno) + 1;	/* lno, */
+	if (np->cno != invalidcharno)			/* cno */
+	  total += number_len (np->cno);
+	total += 1;					/* newline */
+      }
 
   return total;
 }
@@ -2113,8 +2289,9 @@ put_entries (np)
 	      fdp = np->fdp;
 	      fprintf (tagf, "\f\n%s,%d\n",
 		       fdp->taggedfname, total_size_of_entries (np));
+	      fdp->written = TRUE;
 	    }
-	  fputs (np->pat, tagf);
+	  fputs (np->regex, tagf);
 	  fputc ('\177', tagf);
 	  if (np->name != NULL)
 	    {
@@ -2139,7 +2316,7 @@ put_entries (np)
 			 np->name, np->fdp->taggedfname, (np->lno + 63) / 64);
 	      else
 		fprintf (stdout, "%-16s %3d %-16s %s\n",
-			 np->name, np->lno, np->fdp->taggedfname, np->pat);
+			 np->name, np->lno, np->fdp->taggedfname, np->regex);
 	    }
 	  else
 	    {
@@ -2150,7 +2327,7 @@ put_entries (np)
 		  putc (searchar, tagf);
 		  putc ('^', tagf);
 
-		  for (sp = np->pat; *sp; sp++)
+		  for (sp = np->regex; *sp; sp++)
 		    {
 		      if (*sp == '\\' || *sp == searchar)
 			putc ('\\', tagf);
@@ -2534,15 +2711,22 @@ static enum
  */
 static struct tok
 {
-  bool valid;
-  bool named;
-  int offset;
-  int length;
-  int lineno;
-  long linepos;
-  char *line;
+  char *line;			/* string containing the token */
+  int offset;			/* where the token starts in LINE */
+  int length;			/* token length */
+  /*
+    The previous members can be used to pass strings around for generic
+    purposes.  The following ones specifically refer to creating tags.  In this
+    case the token contained here is the pattern that will be used to create a
+    tag.
+  */
+  bool valid;			/* do not create a tag; the token should be
+				   invalidated whenever a state machine is
+				   reset prematurely */
+  bool named;			/* create a named tag */
+  int lineno;			/* source line number of tag */
+  long linepos;			/* source char number of tag */
 } token;			/* latest token read */
-static linebuffer token_name;	/* its name */
 
 /*
  * Variables and functions for dealing with nested structures.
@@ -2560,7 +2744,7 @@ static struct {
 } cstack;			/* stack for nested declaration tags */
 /* Current struct nesting depth (namespace, class, struct, union, enum). */
 #define nestlev		(cstack.nl)
-/* After struct keyword or in struct body, not inside an nested function. */
+/* After struct keyword or in struct body, not inside a nested function. */
 #define instruct	(structdef == snone && nestlev > 0			\
 			 && cblev == cstack.cblev[nestlev-1] + 1)
 
@@ -2778,8 +2962,8 @@ consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
       return FALSE;
     case st_C_template:
     case st_C_class:
-      if (cblev == 0
-	  && (*c_extp & C_AUTO)	/* automatic detection of C++ language */
+      if ((*c_extp & C_AUTO)	/* automatic detection of C++ language */
+	  && cblev == 0
 	  && definedef == dnone && structdef == snone
 	  && typdef == tnone && fvdef == fvnone)
 	*c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
@@ -2884,8 +3068,16 @@ consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
       fvextern = TRUE;
       /* FALLTHRU */
     case st_C_typespec:
-      if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
-	fvdef = fvnone;		/* should be useless */
+      switch  (fvdef)
+	{
+	case finlist:
+	case flistseen:
+	case fignore:
+	case vignore:
+	  break;
+	default:
+	  fvdef = fvnone;
+	}
       return FALSE;
     case st_C_ignore:
       fvextern = FALSE;
@@ -2915,8 +3107,10 @@ consider_token (str, len, c, c_extp, cblev, parlev, is_func_or_var)
 	      fvdef = vignore;
 	      return FALSE;
 	    }
-	  if ((*c_extp & C_PLPL) && strneq (str+len-10, "::operator", 10))
+	  if (len >= 10 && strneq (str+len-10, "::operator", 10))
 	    {
+	      if (*c_extp & C_AUTO) /* automatic detection of C++ */
+		*c_extp = (*c_extp | C_PLPL) & ~C_AUTO;
 	      fvdef = foperator;
 	      *is_func_or_var = TRUE;
 	      return TRUE;
@@ -2953,6 +3147,10 @@ static struct
 #define curlinepos (lbs[curndx].linepos)
 #define newlinepos (lbs[newndx].linepos)
 
+#define plainc ((c_ext & C_EXT) == C_PLAIN)
+#define cplpl (c_ext & C_PLPL)
+#define cjava ((c_ext & C_JAVA) == C_JAVA)
+
 #define CNL_SAVE_DEFINEDEF()						\
 do {									\
   curlinepos = charno;							\
@@ -2980,31 +3178,18 @@ make_C_tag (isfun)
 {
   /* This function should never be called when token.valid is FALSE, but
      we must protect against invalid input or internal errors. */
-  if (DEBUG || token.valid)
-    {
-      if (traditional_tag_style)
-	{
-	  /* This was the original code.  Now we call new_pfnote instead,
-	     which uses the new method for naming tags (see new_pfnote). */
-	  char *name = NULL;
+  if (!DEBUG && !token.valid)
+    return;
 
-	  if (CTAGS || token.named)
-	    name = savestr (token_name.buffer);
-	  if (DEBUG && !token.valid)
-	    {
-	      if (token.named)
-		name = concat (name, "##invalid##", "");
-	      else
-		name = savestr ("##invalid##");
-	    }
-	  pfnote (name, isfun, token.line,
-		  token.offset+token.length+1, token.lineno, token.linepos);
-	}
-      else
-	new_pfnote (token_name.buffer, token_name.len, isfun, token.line,
-		    token.offset+token.length+1, token.lineno, token.linepos);
-      token.valid = FALSE;
-    }
+  if (token.valid)
+    make_tag (token_name.buffer, token_name.len, isfun, token.line,
+	      token.offset+token.length+1, token.lineno, token.linepos);
+  else				/* this case is optimised away if !DEBUG */
+    make_tag (concat ("INVALID TOKEN:-->", token_name.buffer, ""),
+	      token_name.len + 17, isfun, token.line,
+	      token.offset+token.length+1, token.lineno, token.linepos);
+
+  token.valid = FALSE;
 }
 
 
@@ -3030,14 +3215,12 @@ C_entries (c_ext, inf)
   int parlev;			/* current parenthesis level */
   int typdefcblev;		/* cblev where a typedef struct body begun */
   bool incomm, inquote, inchar, quotednl, midtoken;
-  bool cplpl, cjava;
   bool yacc_rules;		/* in the rules part of a yacc file */
   struct tok savetoken;	        /* token saved during preprocessor handling */
 
 
-  initbuffer (&token_name);
-  initbuffer (&lbs[0].lb);
-  initbuffer (&lbs[1].lb);
+  linebuffer_init (&lbs[0].lb);
+  linebuffer_init (&lbs[1].lb);
   if (cstack.size == 0)
     {
       cstack.size = (DEBUG) ? 1 : 4;
@@ -3058,8 +3241,6 @@ C_entries (c_ext, inf)
   token.valid = savetoken.valid = FALSE;
   cblev = 0;
   parlev = 0;
-  cplpl = (c_ext & C_PLPL) == C_PLPL;
-  cjava = (c_ext & C_JAVA) == C_JAVA;
   if (cjava)
     { qualifier = "."; qlen = 1; }
   else
@@ -3225,13 +3406,13 @@ C_entries (c_ext, inf)
 	    {
 	      if (endtoken (c))
 		{
-		  if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
+		  if (c == ':' && *lp == ':' && begtoken (lp[1]))
+		    /* This handles :: in the middle,
+		       but not at the beginning of an identifier.
+		       Also, space-separated :: is not recognised. */
 		    {
-		      /*
-		       * This handles :: in the middle, but not at the
-		       * beginning of an identifier.  Also, space-separated
-		       * :: is not recognised.
-		       */
+		      if (c_ext & C_AUTO) /* automatic detection of C++ */
+			c_ext = (c_ext | C_PLPL) & ~C_AUTO;
 		      lp += 2;
 		      toklen += 2;
 		      c = lp[-1];
@@ -3258,7 +3439,7 @@ C_entries (c_ext, inf)
 			      toklen += lp - oldlp;
 			    }
 			  token.named = FALSE;
-			  if ((c_ext & C_EXT)	/* not pure C */
+			  if (!plainc
 			      && nestlev > 0 && definedef == dnone)
 			    /* in struct body */
 			    {
@@ -3374,8 +3555,11 @@ C_entries (c_ext, inf)
 		      fvdef = finlist;
 		      continue;
 		    case flistseen:
-		      make_C_tag (TRUE); /* a function */
-		      fvdef = fignore;
+		      if (plainc || declarations)
+			{
+			  make_C_tag (TRUE); /* a function */
+			  fvdef = fignore;
+			}
 		      break;
 		    case fvnameseen:
 		      fvdef = fvnone;
@@ -3428,7 +3612,17 @@ C_entries (c_ext, inf)
 	      break;
 	    }
 	  if (structdef == stagseen)
-	    structdef = scolonseen;
+	    {
+	      structdef = scolonseen;
+	      break;
+	    }
+	  /* Should be useless, but may be work as a safety net. */
+	  if (cplpl && fvdef == flistseen)
+	    {
+	      make_C_tag (TRUE); /* a function */
+	      fvdef = fignore;
+	      break;
+	    }
 	  break;
 	case ';':
 	  if (definedef != dnone)
@@ -3447,7 +3641,7 @@ C_entries (c_ext, inf)
 	      switch (fvdef)
 		{
 		case fignore:
-		  if (typdef == tignore)
+		  if (typdef == tignore || cplpl)
 		    fvdef = fvnone;
 		  break;
 		case fvnameseen:
@@ -3459,20 +3653,17 @@ C_entries (c_ext, inf)
 		  token.valid = FALSE;
 		  break;
 		case flistseen:
-		  if ((declarations && typdef == tnone && !instruct)
-		      || (members && typdef != tignore && instruct))
+		  if (declarations
+		      && (typdef == tnone || (typdef != tignore && instruct)))
 		    make_C_tag (TRUE);  /* a function declaration */
 		  /* FALLTHRU */
 		default:
 		  fvextern = FALSE;
 		  fvdef = fvnone;
 		  if (declarations
-		      && structdef == stagseen && (c_ext & C_PLPL))
+		       && cplpl && structdef == stagseen)
 		    make_C_tag (FALSE);	/* forward declaration */
 		  else
-		    /* The following instruction invalidates the token.
-		       Probably the token should be invalidated in all other
-		       cases where some state machine is reset prematurely. */
 		    token.valid = FALSE;
 		} /* switch (fvdef) */
 	      /* FALLTHRU */
@@ -3675,18 +3866,25 @@ C_entries (c_ext, inf)
 	  if (definedef != dnone)
 	    break;
 	  if (fvdef == fstartlist)
-	    fvdef = fvnone;	/* avoid tagging `foo' in `foo (*bar()) ()' */
+	    {
+	      fvdef = fvnone;	/* avoid tagging `foo' in `foo (*bar()) ()' */
+	      token.valid = FALSE;
+	    }
 	  break;
 	case '}':
 	  if (definedef != dnone)
 	    break;
-	  if (!noindentypedefs && lp == newlb.buffer + 1)
+	  if (!ignoreindent && lp == newlb.buffer + 1)
 	    {
+	      if (cblev != 0)
+		token.valid = FALSE;
 	      cblev = 0;	/* reset curly brace level if first column */
 	      parlev = 0;	/* also reset paren level, just in case... */
 	    }
 	  else if (cblev > 0)
 	    cblev--;
+	  else
+	    token.valid = FALSE; /* something gone amiss, token unreliable */
 	  popclass_above (cblev);
 	  structdef = snone;
 	  /* Only if typdef == tinbody is typdefcblev significant. */
@@ -3770,7 +3968,6 @@ C_entries (c_ext, inf)
 
     } /* while not eof */
 
-  free (token_name.buffer);
   free (lbs[0].lb.buffer);
   free (lbs[1].lb.buffer);
 }
@@ -3831,8 +4028,9 @@ Yacc_entries (inf)
 #define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)	\
   for (;			/* loop initialization */		\
        !feof (file_pointer)	/* loop test */				\
-       && (char_pointer = lb.buffer, /* instructions at start of loop */ \
-	   readline (&line_buffer, file_pointer),			\
+       &&			/* instructions at start of loop */	\
+	  (readline (&line_buffer, file_pointer),			\
+           char_pointer = line_buffer.buffer,				\
 	   TRUE);							\
       )
 #define LOOKING_AT(cp, keyword)	/* keyword is a constant string */	\
@@ -3903,8 +4101,8 @@ F_getit (inf)
     return;
   for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
     continue;
-  pfnote (savenstr (dbp, cp-dbp), TRUE,
-	  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+  make_tag (dbp, cp-dbp, TRUE,
+	    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
 }
 
 
@@ -3971,8 +4169,8 @@ Fortran_functions (inf)
 	    {
 	      dbp = skip_spaces (dbp);
 	      if (*dbp == '\0')	/* assume un-named */
-		pfnote (savestr ("blockdata"), TRUE,
-			lb.buffer, dbp - lb.buffer, lineno, linecharno);
+		make_tag ("blockdata", 9, TRUE,
+			  lb.buffer, dbp - lb.buffer, lineno, linecharno);
 	      else
 		F_getit (inf);	/* look for name */
 	    }
@@ -3985,7 +4183,7 @@ Fortran_functions (inf)
 /*
  * Ada parsing
  * Original code by
- * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be> (1998)
+ * Philippe Waroquiers (1998)
  */
 
 static void Ada_getit __P((FILE *, char *));
@@ -4048,7 +4246,9 @@ Ada_getit (inf, name_qualifier)
       *cp = '\0';
       name = concat (dbp, name_qualifier, "");
       *cp = c;
-      pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+      make_tag (name, strlen (name), TRUE,
+		lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+      free (name);
       if (c == '"')
 	dbp = cp + 1;
       return;
@@ -4060,6 +4260,7 @@ Ada_funcs (inf)
      FILE *inf;
 {
   bool inquote = FALSE;
+  bool skip_till_semicolumn = FALSE;
 
   LOOP_ON_INPUT_LINES (inf, lb, dbp)
     {
@@ -4096,6 +4297,14 @@ Ada_funcs (inf)
 	      continue;
 	    }
 
+	  if (skip_till_semicolumn)
+	    {
+	      if (*dbp == ';')
+		skip_till_semicolumn = FALSE;
+	      dbp++;
+	      continue;         /* advance char */
+	    }
+
 	  /* Search for beginning of a token.  */
 	  if (!begtoken (*dbp))
 	    {
@@ -4122,6 +4331,16 @@ Ada_funcs (inf)
 	      else
 		break;		/* from switch */
 	      continue;		/* advance char */
+
+	    case 'u':
+	      if (typedefs && !packages_only && nocase_tail ("use"))
+		{
+		  /* when tagging types, avoid tagging  use type Pack.Typename;
+		     for this, we will skip everything till a ; */
+		  skip_till_semicolumn = TRUE;
+		  continue;     /* advance char */
+		}
+
 	    case 't':
 	      if (!packages_only && nocase_tail ("task"))
 		Ada_getit (inf, "/k");
@@ -4167,11 +4386,9 @@ Asm_labels (inf)
  	  while (ISALNUM (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
  	    cp++;
  	  if (*cp == ':' || iswhite (*cp))
- 	    {
- 	      /* Found end of label, so copy it and add it to the table. */
- 	      pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
+	    /* Found end of label, so copy it and add it to the table. */
+	    make_tag (lb.buffer, cp - lb.buffer, TRUE,
 		      lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
- 	    }
  	}
     }
 }
@@ -4199,56 +4416,63 @@ Perl_functions (inf)
       if (LOOKING_AT (cp, "package"))
 	{
 	  free (package);
-	  package = get_tag (cp);
-	  if (package == NULL)	/* can't parse package name */
-	    package = savestr ("");
-	  else
-	    package = savestr(package);	/* make a copy */
+	  get_tag (cp, &package);
 	}
       else if (LOOKING_AT (cp, "sub"))
 	{
-	  char *name, *fullname, *pos;
+	  char *pos;
 	  char *sp = cp;
 
 	  while (!notinname (*cp))
 	    cp++;
 	  if (cp == sp)
-	    continue;
-	  name = savenstr (sp, cp-sp);
-	  if ((pos = etags_strchr (name, ':')) != NULL && pos[1] == ':')
-	    fullname = name;
+	    continue;		/* nothing found */
+	  if ((pos = etags_strchr (sp, ':')) != NULL
+	      && pos < cp && pos[1] == ':')
+	    /* The name is already qualified. */
+	    make_tag (sp, cp - sp, TRUE,
+		      lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
 	  else
-	    fullname = concat (package, "::", name);
-	  pfnote (fullname, TRUE,
-		  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
-	  if (name != fullname)
-	    free (name);
+	    /* Qualify it. */
+	    {
+	      char savechar, *name;
+
+	      savechar = *cp;
+	      *cp = '\0';
+	      name = concat (package, "::", sp);
+	      *cp = savechar;
+	      make_tag (name, strlen(name), TRUE,
+			lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+	      free (name);
+	    }
  	}
-       else if (globals		/* only if tagging global vars is enabled */
-		&& (LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local")))
+       else if (globals)	/* only if we are tagging global vars */
  	{
+	  /* Skip a qualifier, if any. */
+	  bool qual = LOOKING_AT (cp, "my") || LOOKING_AT (cp, "local");
  	  /* After "my" or "local", but before any following paren or space. */
- 	  char *varname = NULL;
+	  char *varstart = cp;
 
- 	  if (*cp == '$' || *cp == '@' || *cp == '%')
+ 	  if (qual		/* should this be removed?  If yes, how? */
+	      && (*cp == '$' || *cp == '@' || *cp == '%'))
  	    {
- 	      char* varstart = ++cp;
- 	      while (ISALNUM (*cp) || *cp == '_')
+ 	      varstart += 1;
+	      do
  		cp++;
- 	      varname = savenstr (varstart, cp-varstart);
+ 	      while (ISALNUM (*cp) || *cp == '_');
  	    }
- 	  else
+ 	  else if (qual)
  	    {
  	      /* Should be examining a variable list at this point;
  		 could insist on seeing an open parenthesis. */
  	      while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
  		cp++;
  	    }
+	  else
+	    continue;
 
- 	  /* Perhaps I should back cp up one character, so the TAGS table
- 	     doesn't mention (and so depend upon) the following char. */
- 	  pfnote (varname, FALSE,
-		  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ 	  make_tag (varstart, cp - varstart, FALSE,
+		    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
 	}
     }
 }
@@ -4274,8 +4498,8 @@ Python_functions (inf)
 	  char *name = cp;
 	  while (!notinname (*cp) && *cp != ':')
 	    cp++;
-	  pfnote (savenstr (name, cp-name), TRUE,
-		  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+	  make_tag (name, cp - name, TRUE,
+		    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
 	}
     }
 }
@@ -4307,8 +4531,8 @@ PHP_functions (inf)
 	{
 	  while (!notinname (*cp))
 	    cp++;
-	  pfnote (savenstr (name, cp-name), TRUE,
-		  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+	  make_tag (name, cp - name, TRUE,
+		    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
 	  search_identifier = FALSE;
 	}
       else if (LOOKING_AT (cp, "function"))
@@ -4320,8 +4544,8 @@ PHP_functions (inf)
 	      name = cp;
 	      while (!notinname (*cp))
 		cp++;
-	      pfnote (savenstr (name, cp-name), TRUE,
-		      lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+	      make_tag (name, cp - name, TRUE,
+			lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
 	    }
 	  else
 	    search_identifier = TRUE;
@@ -4333,8 +4557,8 @@ PHP_functions (inf)
 	      name = cp;
 	      while (*cp != '\0' && !iswhite (*cp))
 		cp++;
-	      pfnote (savenstr (name, cp-name), FALSE,
-		      lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+	      make_tag (name, cp - name, FALSE,
+			lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
 	    }
 	  else
 	    search_identifier = TRUE;
@@ -4348,8 +4572,8 @@ PHP_functions (inf)
 	  name = cp;
 	  while (*cp != quote && *cp != '\0')
 	    cp++;
-	  pfnote (savenstr (name, cp-name), FALSE,
-		  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+	  make_tag (name, cp - name, FALSE,
+		    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
 	}
       else if (members
 	       && LOOKING_AT (cp, "var")
@@ -4358,8 +4582,8 @@ PHP_functions (inf)
 	  name = cp;
 	  while (!notinname(*cp))
 	    cp++;
-	  pfnote (savenstr (name, cp-name), FALSE,
-		  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+	  make_tag (name, cp - name, FALSE,
+		    lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
 	}
     }
 }
@@ -4390,15 +4614,15 @@ Cobol_paragraphs (inf)
       for (ep = bp; ISALNUM (*ep) || *ep == '-'; ep++)
 	continue;
       if (*ep++ == '.')
-	pfnote (savenstr (bp, ep-bp), TRUE,
-		lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
+	make_tag (bp, ep - bp, TRUE,
+		  lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
     }
 }
 
 
 /*
  * Makefile support
- * Idea by Assar Westerlund <assar@sics.se> (2001)
+ * Ideas by Assar Westerlund <assar@sics.se> (2001)
  */
 static void
 Makefile_targets (inf)
@@ -4412,9 +4636,9 @@ Makefile_targets (inf)
 	continue;
       while (*bp != '\0' && *bp != '=' && *bp != ':')
 	bp++;
-      if (*bp == ':')
-	pfnote (savenstr (lb.buffer, bp - lb.buffer), TRUE,
-		lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
+      if (*bp == ':' || (globals && *bp == '='))
+	make_tag (lb.buffer, bp - lb.buffer, TRUE,
+		  lb.buffer, bp - lb.buffer + 1, lineno, linecharno);
     }
 }
 
@@ -4434,8 +4658,8 @@ Pascal_functions (inf)
 {
   linebuffer tline;		/* mostly copied from C_entries */
   long save_lcno;
-  int save_lineno, save_len;
-  char c, *cp, *namebuf;
+  int save_lineno, namelen, taglen;
+  char c, *name;
 
   bool				/* each of these flags is TRUE iff: */
     incomment,			/* point is inside a comment */
@@ -4449,15 +4673,15 @@ Pascal_functions (inf)
 				   is a FORWARD/EXTERN to be ignored, or
 				   whether it is a real tag */
 
-  save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
-  namebuf = NULL;		/* keep compiler quiet */
+  save_lcno = save_lineno = namelen = taglen = 0; /* keep compiler quiet */
+  name = NULL;			/* keep compiler quiet */
   dbp = lb.buffer;
   *dbp = '\0';
-  initbuffer (&tline);
+  linebuffer_init (&tline);
 
   incomment = inquote = FALSE;
   found_tag = FALSE;		/* have a proc name; check if extern */
-  get_tagname = FALSE;		/* have found "procedure" keyword    */
+  get_tagname = FALSE;		/* found "procedure" keyword	     */
   inparms = FALSE;		/* found '(' after "proc"            */
   verify_tag = FALSE;		/* check if "extern" is ahead        */
 
@@ -4526,7 +4750,7 @@ Pascal_functions (inf)
 	  }
       if (found_tag && verify_tag && (*dbp != ' '))
 	{
-	  /* check if this is an "extern" declaration */
+	  /* Check if this is an "extern" declaration. */
 	  if (*dbp == '\0')
 	    continue;
 	  if (lowcase (*dbp == 'e'))
@@ -4539,7 +4763,7 @@ Pascal_functions (inf)
 	    }
 	  else if (lowcase (*dbp) == 'f')
 	    {
-	      if (nocase_tail ("forward")) /*  check for forward reference */
+	      if (nocase_tail ("forward")) /* check for forward reference */
 		{
 		  found_tag = FALSE;
 		  verify_tag = FALSE;
@@ -4549,37 +4773,41 @@ Pascal_functions (inf)
 	    {
 	      found_tag = FALSE;
 	      verify_tag = FALSE;
-	      pfnote (namebuf, TRUE,
-		      tline.buffer, save_len, save_lineno, save_lcno);
+	      make_tag (name, namelen, TRUE,
+			tline.buffer, taglen, save_lineno, save_lcno);
 	      continue;
 	    }
 	}
       if (get_tagname)		/* grab name of proc or fn */
 	{
+	  char *cp;
+
 	  if (*dbp == '\0')
 	    continue;
 
-	  /* save all values for later tagging */
+	  /* Find block name. */
+	  for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
+	    continue;
+
+	  /* Save all values for later tagging. */
 	  linebuffer_setlen (&tline, lb.len);
 	  strcpy (tline.buffer, lb.buffer);
 	  save_lineno = lineno;
 	  save_lcno = linecharno;
+	  name = tline.buffer + (dbp - lb.buffer);
+	  namelen = cp - dbp;
+	  taglen = cp - lb.buffer + 1;
 
-	  /* grab block name */
-	  for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
-	    continue;
-	  namebuf = savenstr (dbp, cp-dbp);
 	  dbp = cp;		/* set dbp to e-o-token */
-	  save_len = dbp - lb.buffer + 1;
 	  get_tagname = FALSE;
 	  found_tag = TRUE;
 	  continue;
 
-	  /* and proceed to check for "extern" */
+	  /* And proceed to check for "extern". */
 	}
       else if (!incomment && !inquote && !found_tag)
 	{
-	  /* check for proc/fn keywords */
+	  /* Check for proc/fn keywords. */
 	  switch (lowcase (c))
 	    {
 	    case 'p':
@@ -4592,7 +4820,7 @@ Pascal_functions (inf)
 	      continue;
 	    }
 	}
-    }				/* while not eof */
+    } /* while not eof */
 
   free (tline.buffer);
 }
@@ -4618,7 +4846,7 @@ L_getit ()
       /* Ok, then skip "(" before name in (defstruct (foo)) */
       dbp = skip_spaces (dbp);
   }
-  get_tag (dbp);
+  get_tag (dbp, NULL);
 }
 
 static void
@@ -4669,7 +4897,7 @@ Lisp_functions (inf)
  *   Masatake Yamato <masata-y@is.aist-nara.ac.jp> (1999)
  */
 static void
-Postscript_functions (inf)
+PS_functions (inf)
      FILE *inf;
 {
   register char *bp, *ep;
@@ -4682,11 +4910,11 @@ Postscript_functions (inf)
 	       *ep != '\0' && *ep != ' ' && *ep != '{';
 	       ep++)
 	    continue;
-	  pfnote (savenstr (bp, ep-bp), TRUE,
-		  lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
+	  make_tag (bp, ep - bp, TRUE,
+		    lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
 	}
       else if (LOOKING_AT (bp, "defineps"))
-	get_tag (bp);
+	get_tag (bp, NULL);
     }
 }
 
@@ -4714,10 +4942,10 @@ Scheme_functions (inf)
 	  /* Skip over open parens and white space */
 	  while (notinname (*bp))
 	    bp++;
-	  get_tag (bp);
+	  get_tag (bp, NULL);
 	}
       if (LOOKING_AT (bp, "(SET!") || LOOKING_AT (bp, "(set!"))
-	get_tag (bp);
+	get_tag (bp, NULL);
     }
 }
 
@@ -4779,8 +5007,7 @@ TeX_commands (inf)
 	    if (strneq (cp, key->buffer, key->len))
 	      {
 		register char *p;
-		char *name;
-		int linelen;
+		int namelen, linelen;
 		bool opgrp = FALSE;
 
 		cp = skip_spaces (cp + key->len);
@@ -4794,7 +5021,7 @@ TeX_commands (inf)
 		      *p != TEX_opgrp && *p != TEX_clgrp);
 		     p++)
 		  continue;
-		name = savenstr (cp, p-cp);
+		namelen = p - cp;
 		linelen = lb.len;
 		if (!opgrp || *p == TEX_clgrp)
 		  {
@@ -4802,7 +5029,8 @@ TeX_commands (inf)
 		      *p++;
 		    linelen = p - lb.buffer + 1;
 		  }
-		pfnote (name, TRUE, lb.buffer, linelen, lineno, linecharno);
+		make_tag (cp, namelen, TRUE,
+			  lb.buffer, linelen, lineno, linecharno);
 		goto tex_next_line; /* We only tag a line once */
 	      }
 	}
@@ -4912,8 +5140,134 @@ Texinfo_nodes (inf)
 	start = cp;
 	while (*cp != '\0' && *cp != ',')
 	  cp++;
-	pfnote (savenstr (start, cp - start), TRUE,
-		lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+	make_tag (start, cp - start, TRUE,
+		  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+      }
+}
+
+
+/* Similar to LOOKING_AT but does not use notinname, does not skip */
+#define LOOKING_AT_NOCASE(cp, kw)	/* kw is a constant string */	\
+  (strncaseeq ((cp), kw, sizeof(kw)-1)	/* cp points at kw */		\
+   && ((cp) += sizeof(kw)-1))		/* skip spaces */
+
+/*
+ * HTML support.
+ * Contents of <title>, <h1>, <h2>, <h3> are tags.
+ * Contents of <a name=xxx> are tags with name xxx.
+ *
+ * Francesco Potortì, 2002.
+ */
+static void
+HTML_labels (inf)
+     FILE * inf;
+{
+  bool getnext = FALSE;		/* next text outside of HTML tags is a tag */
+  bool skiptag = FALSE;		/* skip to the end of the current HTML tag */
+  bool intag = FALSE;		/* inside an html tag, looking for ID= */
+  bool inanchor = FALSE;	/* when INTAG, is an anchor, look for NAME= */
+  char *end;
+
+
+  linebuffer_setlen (&token_name, 0); /* no name in buffer */
+
+  LOOP_ON_INPUT_LINES (inf, lb, dbp)
+    for (;;)			/* loop on the same line */
+      {
+	if (skiptag)		/* skip HTML tag */
+	  {
+	    while (*dbp != '\0' && *dbp != '>')
+	      dbp++;
+	    if (*dbp == '>')
+	      {
+		dbp += 1;
+		skiptag = FALSE;
+		continue;	/* look on the same line */
+	      }
+	    break;		/* go to next line */
+	  }
+
+	else if (intag)	/* look for "name=" or "id=" */
+	  {
+	    while (*dbp != '\0' && *dbp != '>'
+		   && lowcase (*dbp) != 'n' && lowcase (*dbp) != 'i')
+	      dbp++;
+	    if (*dbp == '\0')
+	      break;		/* go to next line */
+	    if (*dbp == '>')
+	      {
+		dbp += 1;
+		intag = FALSE;
+		continue;	/* look on the same line */
+	      }
+	    if ((inanchor && LOOKING_AT_NOCASE (dbp, "name="))
+		|| LOOKING_AT_NOCASE (dbp, "id="))
+	      {
+		bool quoted = (dbp[0] == '"');
+
+		if (quoted)
+		  for (end = ++dbp; *end != '\0' && *end != '"'; end++)
+		    continue;
+		else
+		  for (end = dbp; *end != '\0' && intoken (*end); end++)
+		    continue;
+		linebuffer_setlen (&token_name, end - dbp);
+		strncpy (token_name.buffer, dbp, end - dbp);
+		token_name.buffer[end - dbp] = '\0';
+
+		dbp = end;
+		intag = FALSE;	/* we found what we looked for */
+		skiptag = TRUE; /* skip to the end of the tag */
+		getnext = TRUE;	/* then grab the text */
+		continue;	/* look on the same line */
+	      }
+	    dbp += 1;
+	  }
+
+	else if (getnext)	/* grab next tokens and tag them */
+	  {
+	    dbp = skip_spaces (dbp);
+	    if (*dbp == '\0')
+	      break;		/* go to next line */
+	    if (*dbp == '<')
+	      {
+		intag = TRUE;
+		inanchor = (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]));
+		continue;	/* look on the same line */
+	      }
+
+	    for (end = dbp + 1; *end != '\0' && *end != '<'; end++)
+	      continue;
+	    make_tag (token_name.buffer, token_name.len, TRUE,
+		      dbp, end - dbp, lineno, linecharno);
+	    linebuffer_setlen (&token_name, 0);	/* no name in buffer */
+	    getnext = FALSE;
+	    break;		/* go to next line */
+	  }
+
+	else			/* look for an interesting HTML tag */
+	  {
+	    while (*dbp != '\0' && *dbp != '<')
+	      dbp++;
+	    if (*dbp == '\0')
+	      break;		/* go to next line */
+	    intag = TRUE;
+	    if (lowcase (dbp[1]) == 'a' && !intoken (dbp[2]))
+	      {
+		inanchor = TRUE;
+		continue;	/* look on the same line */
+	      }
+	    else if (LOOKING_AT_NOCASE (dbp, "<title>")
+		     || LOOKING_AT_NOCASE (dbp, "<h1>")
+		     || LOOKING_AT_NOCASE (dbp, "<h2>")
+		     || LOOKING_AT_NOCASE (dbp, "<h3>"))
+	      {
+		intag = FALSE;
+		getnext = TRUE;
+		continue;	/* look on the same line */
+	      }
+	    dbp += 1;
+	  }
       }
 }
 
@@ -5016,7 +5370,7 @@ prolog_pr (s, last)
 	  || len != strlen (last)
 	  || !strneq (s, last, len)))
 	{
-	  pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
+	  make_tag (s, len, TRUE, s, pos, lineno, linecharno);
 	  return len;
 	}
   else
@@ -5092,7 +5446,7 @@ prolog_atom (s, pos)
  */
 static int erlang_func __P((char *, char *));
 static void erlang_attribute __P((char *));
-static int erlang_atom __P((char *, int));
+static int erlang_atom __P((char *));
 
 static void
 Erlang_functions (inf)
@@ -5157,7 +5511,7 @@ erlang_func (s, last)
   int pos;
   int len;
 
-  pos = erlang_atom (s, 0);
+  pos = erlang_atom (s);
   if (pos < 1)
     return 0;
 
@@ -5170,7 +5524,7 @@ erlang_func (s, last)
 	  || len != (int)strlen (last)
 	  || !strneq (s, last, len)))
 	{
-	  pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
+	  make_tag (s, len, TRUE, s, pos, lineno, linecharno);
 	  return len;
 	}
 
@@ -5191,19 +5545,14 @@ static void
 erlang_attribute (s)
      char *s;
 {
-  int pos;
-  int len;
+  char *cp = s;
 
-  if (LOOKING_AT (s, "-define") || LOOKING_AT (s, "-record"))
+  if ((LOOKING_AT (cp, "-define") || LOOKING_AT (cp, "-record"))
+      && *cp++ == '(')
     {
-      if (s[pos++] == '(')
-	{
-	  pos = skip_spaces (s + pos) - s;
-	  len = erlang_atom (s, pos);
-	  if (len != 0)
-	    pfnote (savenstr (& s[pos], len), TRUE,
-		    s, pos + len, lineno, linecharno);
-	}
+      int len = erlang_atom (skip_spaces (cp));
+      if (len > 0)
+	make_tag (cp, len, TRUE, s, cp + len - s, lineno, linecharno);
     }
   return;
 }
@@ -5214,66 +5563,46 @@ erlang_attribute (s)
  * Return the number of bytes consumed, or -1 if there was an error.
  */
 static int
-erlang_atom (s, pos)
+erlang_atom (s)
      char *s;
-     int pos;
 {
-  int origpos;
-
-  origpos = pos;
+  int pos = 0;
 
   if (ISALPHA (s[pos]) || s[pos] == '_')
     {
       /* The atom is unquoted. */
-      pos++;
-      while (ISALNUM (s[pos]) || s[pos] == '_')
+      do
 	pos++;
-      return pos - origpos;
+      while (ISALNUM (s[pos]) || s[pos] == '_');
     }
   else if (s[pos] == '\'')
     {
+      for (pos++; s[pos] != '\''; pos++)
+	if (s[pos] == '\0'	/* multiline quoted atoms are ignored */
+	    || (s[pos] == '\\' && s[++pos] == '\0'))
+	  return 0;
       pos++;
-
-      for (;;)
-	{
-	  if (s[pos] == '\'')
-	    {
-	      pos++;
-	      break;
-	    }
-	  else if (s[pos] == '\0')
-	    /* Multiline quoted atoms are ignored. */
-	    return -1;
-	  else if (s[pos] == '\\')
-	    {
-	      if (s[pos+1] == '\0')
-		return -1;
-	      pos += 2;
-	    }
-	  else
-	    pos++;
-	}
-      return pos - origpos;
     }
-  else
-    return -1;
+
+  return pos;
 }
 
 
 #ifdef ETAGS_REGEXPS
 
 static char *scan_separators __P((char *));
-static void analyse_regex __P((char *, bool));
-static void add_regex __P((char *, bool, language *));
+static void add_regex __P((char *, language *));
 static char *substitute __P((char *, char *, struct re_registers *));
 
-/* Take a string like "/blah/" and turn it into "blah", making sure
-   that the first and last characters are the same, and handling
-   quoted separator characters.  Actually, stops on the occurrence of
-   an unquoted separator.  Also turns "\t" into a Tab character, and
-   similarly for all character escape sequences supported by Gcc.
-   Returns pointer to terminating separator.  Works in place.  Null
-   terminates name string. */
+/*
+ * Take a string like "/blah/" and turn it into "blah", verifying
+ * that the first and last characters are the same, and handling
+ * quoted separator characters.  Actually, stops on the occurrence of
+ * an unquoted separator.  Also process \t, \n, etc. and turn into
+ * appropriate characters. Works in place.  Null terminates name string.
+ * Returns pointer to terminating separator, or NULL for
+ * unterminated regexps.
+ */
 static char *
 scan_separators (name)
      char *name;
@@ -5288,15 +5617,15 @@ scan_separators (name)
 	{
 	  switch (*name)
 	    {
-	    case 'a': *copyto++ = '\007'; break;
-	    case 'b': *copyto++ = '\b'; break;
-	    case 'd': *copyto++ = 0177; break;
-	    case 'e': *copyto++ = 033; break;
-	    case 'f': *copyto++ = '\f'; break;
-	    case 'n': *copyto++ = '\n'; break;
-	    case 'r': *copyto++ = '\r'; break;
-	    case 't': *copyto++ = '\t'; break;
-	    case 'v': *copyto++ = '\v'; break;
+	    case 'a': *copyto++ = '\007'; break; /* BEL (bell)		 */
+	    case 'b': *copyto++ = '\b'; break;	 /* BS (back space)	 */
+	    case 'd': *copyto++ = 0177; break;	 /* DEL (delete)	 */
+	    case 'e': *copyto++ = 033; break;	 /* ESC (delete)	 */
+	    case 'f': *copyto++ = '\f'; break;	 /* FF (form feed)	 */
+	    case 'n': *copyto++ = '\n'; break;	 /* NL (new line)	 */
+	    case 'r': *copyto++ = '\r'; break;	 /* CR (carriage return) */
+	    case 't': *copyto++ = '\t'; break;	 /* TAB (horizontal tab) */
+	    case 'v': *copyto++ = '\v'; break;	 /* VT (vertical tab)    */
 	    default:
 	      if (*name == sep)
 		*copyto++ = sep;
@@ -5317,6 +5646,8 @@ scan_separators (name)
       else
 	*copyto++ = *name;
     }
+  if (*name != sep)
+    name = NULL;		/* signal unterminated regexp */
 
   /* Terminate copied string. */
   *copyto = '\0';
@@ -5326,13 +5657,12 @@ scan_separators (name)
 /* Look at the argument of --regex or --no-regex and do the right
    thing.  Same for each line of a regexp file. */
 static void
-analyse_regex (regex_arg, ignore_case)
+analyse_regex (regex_arg)
      char *regex_arg;
-     bool ignore_case;
 {
   if (regex_arg == NULL)
     {
-      free_patterns ();		/* --no-regex: remove existing regexps */
+      free_regexps ();		/* --no-regex: remove existing regexps */
       return;
     }
 
@@ -5360,9 +5690,9 @@ analyse_regex (regex_arg, ignore_case)
 	    pfatal (regexfile);
 	    return;
 	  }
-	initbuffer (&regexbuf);
+	linebuffer_init (&regexbuf);
 	while (readline_internal (&regexbuf, regexfp) > 0)
-	  analyse_regex (regexbuf.buffer, ignore_case);
+	  analyse_regex (regexbuf.buffer);
 	free (regexbuf.buffer);
 	fclose (regexfp);
       }
@@ -5381,70 +5711,136 @@ analyse_regex (regex_arg, ignore_case)
 	      error ("unterminated language name in regex: %s", regex_arg);
 	      return;
 	    }
-	*cp = '\0';
+	*cp++ = '\0';
 	lang = get_language_from_langname (lang_name);
 	if (lang == NULL)
 	  return;
-	add_regex (cp + 1, ignore_case, lang);
+	add_regex (cp, lang);
       }
       break;
 
       /* Regexp to be used for any language. */
     default:
-      add_regex (regex_arg, ignore_case, NULL);
+      add_regex (regex_arg, NULL);
       break;
     }
 }
 
-/* Turn a name, which is an ed-style (but Emacs syntax) regular
-   expression, into a real regular expression by compiling it. */
+/* Separate the regexp pattern, compile it,
+   and care for optional name and modifiers. */
 static void
-add_regex (regexp_pattern, ignore_case, lang)
+add_regex (regexp_pattern, lang)
      char *regexp_pattern;
-     bool ignore_case;
      language *lang;
 {
   static struct re_pattern_buffer zeropattern;
-  char *name;
+  char sep, *pat, *name, *modifiers;
   const char *err;
   struct re_pattern_buffer *patbuf;
-  pattern *pp;
+  regexp *rp;
+  bool
+    force_explicit_name = TRUE, /* do not use implicit tag names */
+    ignore_case = FALSE,	/* case is significant */
+    multi_line = FALSE,		/* matches are done one line at a time */
+    single_line = FALSE;	/* dot does not match newline */
 
 
-  if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
+  if (strlen(regexp_pattern) < 3)
     {
-      error ("%s: unterminated regexp", regexp_pattern);
+      error ("null regexp", (char *)NULL);
       return;
     }
+  sep = regexp_pattern[0];
   name = scan_separators (regexp_pattern);
-  if (regexp_pattern[0] == '\0')
+  if (name == NULL)
     {
-      error ("null regexp", (char *)NULL);
+      error ("%s: unterminated regexp", regexp_pattern);
       return;
     }
-  (void) scan_separators (name);
+  if (name[1] == sep)
+    {
+      error ("null name for regexp \"%s\"", regexp_pattern);
+      return;
+    }
+  modifiers = scan_separators (name);
+  if (modifiers == NULL)	/* no terminating separator --> no name */
+    {
+      modifiers = name;
+      name = "";
+    }
+  else
+    modifiers += 1;		/* skip separator */
+
+  /* Parse regex modifiers. */
+  for (; modifiers[0] != '\0'; modifiers++)
+    switch (modifiers[0])
+      {
+      case 'N':
+	if (modifiers == name)
+	  error ("forcing explicit tag name but no name, ignoring", NULL);
+	force_explicit_name = TRUE;
+	break;
+      case 'i':
+	ignore_case = TRUE;
+	break;
+      case 's':
+	single_line = TRUE;
+	/* FALLTHRU */
+      case 'm':
+	multi_line = TRUE;
+	need_filebuf = TRUE;
+	break;
+      default:
+	{
+	  char wrongmod [2];
+	  wrongmod[0] = modifiers[0];
+	  wrongmod[1] = '\0';
+	  error ("invalid regexp modifier `%s', ignoring", wrongmod);
+	}
+	break;
+      }
 
   patbuf = xnew (1, struct re_pattern_buffer);
   *patbuf = zeropattern;
   if (ignore_case)
-    patbuf->translate = lc_trans;	/* translation table to fold case  */
+    {
+      static char lc_trans[CHARS];
+      int i;
+      for (i = 0; i < CHARS; i++)
+	lc_trans[i] = lowcase (i);
+      patbuf->translate = lc_trans;	/* translation table to fold case  */
+    }
+
+  if (multi_line)
+    pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
+  else
+    pat = regexp_pattern;
 
-  err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
+  if (single_line)
+    re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
+  else
+    re_set_syntax (RE_SYNTAX_EMACS);
+
+  err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
+  if (multi_line)
+    free (pat);
   if (err != NULL)
     {
       error ("%s while compiling pattern", err);
       return;
     }
 
-  pp = p_head;
-  p_head = xnew (1, pattern);
-  p_head->regex = savestr (regexp_pattern);
-  p_head->p_next = pp;
+  rp = p_head;
+  p_head = xnew (1, regexp);
+  p_head->pattern = savestr (regexp_pattern);
+  p_head->p_next = rp;
   p_head->lang = lang;
   p_head->pat = patbuf;
-  p_head->name_pattern = savestr (name);
+  p_head->name = savestr (name);
   p_head->error_signaled = FALSE;
+  p_head->force_explicit_name = force_explicit_name;
   p_head->ignore_case = ignore_case;
+  p_head->multi_line = multi_line;
 }
 
 /*
@@ -5478,6 +5874,7 @@ substitute (in, out, regs)
       size -= 1;
 
   /* Allocate space and do the substitutions. */
+  assert (size >= 0);
   result = xnew (size + 1, char);
 
   for (t = result; *out != '\0'; out++)
@@ -5492,26 +5889,111 @@ substitute (in, out, regs)
       *t++ = *out;
   *t = '\0';
 
-  assert (t <= result + size && t - result == (int)strlen (result));
+  assert (t <= result + size);
+  assert (t - result == (int)strlen (result));
 
   return result;
 }
 
-/* Deallocate all patterns. */
+/* Deallocate all regexps. */
 static void
-free_patterns ()
+free_regexps ()
 {
-  pattern *pp;
+  regexp *rp;
   while (p_head != NULL)
     {
-      pp = p_head->p_next;
-      free (p_head->regex);
-      free (p_head->name_pattern);
+      rp = p_head->p_next;
+      free (p_head->pattern);
+      free (p_head->name);
       free (p_head);
-      p_head = pp;
+      p_head = rp;
     }
   return;
 }
+
+/*
+ * Reads the whole file as a single string from `filebuf' and looks for
+ * multi-line regular expressions, creating tags on matches.
+ * readline already dealt with normal regexps.
+ *
+ * Idea by Ben Wing <ben@666.com> (2002).
+ */
+static void
+regex_tag_multiline ()
+{
+  char *buffer = filebuf.buffer;
+  regexp *rp;
+  char *name;
+
+  for (rp = p_head; rp != NULL; rp = rp->p_next)
+    {
+      int match = 0;
+
+      if (!rp->multi_line)
+	continue;		/* skip normal regexps */
+
+      /* Generic initialisations before parsing file from memory. */
+      lineno = 1;		/* reset global line number */
+      charno = 0;		/* reset global char number */
+      linecharno = 0;		/* reset global char number of line start */
+
+      /* Only use generic regexps or those for the current language. */
+      if (rp->lang != NULL && rp->lang != curfdp->lang)
+	continue;
+
+      while (match >= 0 && match < filebuf.len)
+	{
+	  match = re_search (rp->pat, buffer, filebuf.len, charno,
+			     filebuf.len - match, &rp->regs);
+	  switch (match)
+	    {
+	    case -2:
+	      /* Some error. */
+	      if (!rp->error_signaled)
+		{
+		  error ("regexp stack overflow while matching \"%s\"",
+			 rp->pattern);
+		  rp->error_signaled = TRUE;
+		}
+	      break;
+	    case -1:
+	      /* No match. */
+	      break;
+	    default:
+	      if (match == rp->regs.end[0])
+		{
+		  if (!rp->error_signaled)
+		    {
+		      error ("regexp matches the empty string: \"%s\"",
+			     rp->pattern);
+		      rp->error_signaled = TRUE;
+		    }
+		  match = -3;	/* exit from while loop */
+		  break;
+		}
+
+	      /* Match occurred.  Construct a tag. */
+	      while (charno < rp->regs.end[0])
+		if (buffer[charno++] == '\n')
+		  lineno++, linecharno = charno;
+	      name = rp->name;
+	      if (name[0] == '\0')
+		name = NULL;
+	      else /* make a named tag */
+		name = substitute (buffer, rp->name, &rp->regs);
+	      if (rp->force_explicit_name)
+		/* Force explicit tag name, if a name is there. */
+		pfnote (name, TRUE, buffer + linecharno,
+			charno - linecharno + 1, lineno, linecharno);
+	      else
+		make_tag (name, strlen (name), TRUE, buffer + linecharno,
+			  charno - linecharno + 1, lineno, linecharno);
+	      break;
+	    }
+	}
+    }
+}
+
 #endif /* ETAGS_REGEXPS */
 
 
@@ -5531,32 +6013,24 @@ nocase_tail (cp)
   return FALSE;
 }
 
-static char *
-get_tag (bp)
+static void
+get_tag (bp, namepp)
      register char *bp;
+     char **namepp;
 {
-  register char *cp, *name;
+  register char *cp = bp;
 
-  if (*bp == '\0')
-    return NULL;
-  /* Go till you get to white space or a syntactic break */
-  for (cp = bp + 1; !notinname (*cp); cp++)
-    continue;
-  name = savenstr (bp, cp-bp);
-  pfnote (name, TRUE,
-	  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
-  return name;
-}
+  if (*bp != '\0')
+    {
+      /* Go till you get to white space or a syntactic break */
+      for (cp = bp + 1; !notinname (*cp); cp++)
+	continue;
+      make_tag (bp, cp - bp, TRUE,
+		lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+    }
 
-/* Initialize a linebuffer for use */
-static void
-initbuffer (lbp)
-     linebuffer *lbp;
-{
-  lbp->size = (DEBUG) ? 3 : 200;
-  lbp->buffer = xnew (lbp->size, char);
-  lbp->buffer[0] = '\0';
-  lbp->len = 0;
+  if (namepp != NULL)
+    *namepp = savenstr (bp, cp - bp);
 }
 
 /*
@@ -5564,10 +6038,13 @@ initbuffer (lbp)
  * newline or CR-NL, if any.  Return the number of characters read from
  * `stream', which is the length of the line including the newline.
  *
- * On DOS or Windows we do not count the CR character, if any, before the
- * NL, in the returned length; this mirrors the behavior of emacs on those
+ * On DOS or Windows we do not count the CR character, if any before the
+ * NL, in the returned length; this mirrors the behavior of Emacs on those
  * platforms (for text files, it translates CR-NL to NL as it reads in the
  * file).
+ *
+ * If multi-line regular expressions are requested, each line read is
+ * appended to `filebuf'.
  */
 static long
 readline_internal (lbp, stream)
@@ -5626,12 +6103,28 @@ readline_internal (lbp, stream)
     }
   lbp->len = p - buffer;
 
+  if (need_filebuf		/* we need filebuf for multi-line regexps */
+      && chars_deleted > 0)	/* not at EOF */
+    {
+      while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
+	{
+	  /* Expand filebuf. */
+	  filebuf.size *= 2;
+	  xrnew (filebuf.buffer, filebuf.size, char);
+	}
+      strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
+      filebuf.len += lbp->len;
+      filebuf.buffer[filebuf.len++] = '\n';
+      filebuf.buffer[filebuf.len] = '\0';
+    }
+
   return lbp->len + chars_deleted;
 }
 
 /*
  * Like readline_internal, above, but in addition try to match the
- * input line against relevant regular expressions.
+ * input line against relevant regular expressions and manage #line
+ * directives.
  */
 static void
 readline (lbp, stream)
@@ -5736,6 +6229,8 @@ readline (lbp, stream)
 			  fdhead->infabsdir = savestr (curfdp->infabsdir);
 			  fdhead->taggedfname = taggedfname;
 			  fdhead->usecharno = FALSE;
+			  fdhead->prop = NULL;
+			  fdhead->written = FALSE;
 			  curfdp = fdhead;
 			}
 		    }
@@ -5752,8 +6247,8 @@ readline (lbp, stream)
 	{
 	  if (result > 0)
 	    {
-	    /* Do a tail recursion on ourselves, thus discarding the contents
-	       of the line buffer. */
+	      /* Do a tail recursion on ourselves, thus discarding the contents
+		 of the line buffer. */
 	      readline (lbp, stream);
 	      return;
 	    }
@@ -5766,46 +6261,56 @@ readline (lbp, stream)
 #ifdef ETAGS_REGEXPS
   {
     int match;
-    pattern *pp;
+    regexp *rp;
+    char *name;
 
-    /* Match against relevant patterns. */
+    /* Match against relevant regexps. */
     if (lbp->len > 0)
-      for (pp = p_head; pp != NULL; pp = pp->p_next)
+      for (rp = p_head; rp != NULL; rp = rp->p_next)
 	{
-	  /* Only use generic regexps or those for the current language. */
-	  if (pp->lang != NULL && pp->lang != fdhead->lang)
+	  /* Only use generic regexps or those for the current language.
+	     Also do not use multiline regexps, which is the job of
+	     regex_tag_multiline. */
+	  if ((rp->lang != NULL && rp->lang != fdhead->lang)
+	      || rp->multi_line)
 	    continue;
 
-	  match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
+	  match = re_match (rp->pat, lbp->buffer, lbp->len, 0, &rp->regs);
 	  switch (match)
 	    {
 	    case -2:
 	      /* Some error. */
-	      if (!pp->error_signaled)
+	      if (!rp->error_signaled)
 		{
-		  error ("error while matching \"%s\"", pp->regex);
-		  pp->error_signaled = TRUE;
+		  error ("regexp stack overflow while matching \"%s\"",
+			 rp->pattern);
+		  rp->error_signaled = TRUE;
 		}
 	      break;
 	    case -1:
 	      /* No match. */
 	      break;
-	    default:
-	      /* Match occurred.  Construct a tag. */
-	      if (pp->name_pattern[0] != '\0')
+	    case 0:
+	      /* Empty string matched. */
+	      if (!rp->error_signaled)
 		{
-		  /* Make a named tag. */
-		  char *name = substitute (lbp->buffer,
-					   pp->name_pattern, &pp->regs);
-		  if (name != NULL)
-		    pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
+		  error ("regexp matches the empty string: \"%s\"", rp->pattern);
+		  rp->error_signaled = TRUE;
 		}
+	      break;
+	    default:
+	      /* Match occurred.  Construct a tag. */
+	      name = rp->name;
+	      if (name[0] == '\0')
+		name = NULL;
+	      else /* make a named tag */
+		name = substitute (lbp->buffer, rp->name, &rp->regs);
+	      if (rp->force_explicit_name)
+		/* Force explicit tag name, if a name is there. */
+		pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
 	      else
-		{
-		  /* Make an unnamed tag. */
-		  pfnote ((char *)NULL, TRUE,
+		make_tag (name, strlen (name), TRUE,
 			  lbp->buffer, match, lineno, linecharno);
-		}
 	      break;
 	    }
 	}
@@ -5884,13 +6389,12 @@ etags_strchr (sp, c)
 }
 
 /*
- * Return TRUE if the two strings are equal, ignoring case for alphabetic
- * characters.
+ * Compare two strings, ignoring case for alphabetic characters.
  *
- * Analogous to BSD's strcasecmp, included for portability.
+ * Same as BSD's strcasecmp, included for portability.
  */
-static bool
-strcaseeq (s1, s2)
+static int
+etags_strcasecmp (s1, s2)
      register const char *s1;
      register const char *s2;
 {
@@ -5900,7 +6404,35 @@ strcaseeq (s1, s2)
 	     : *s1 == *s2))
     s1++, s2++;
 
-  return (*s1 == *s2);
+  return (ISALPHA (*s1) && ISALPHA (*s2)
+	  ? lowcase (*s1) - lowcase (*s2)
+	  : *s1 - *s2);
+}
+
+/*
+ * Compare two strings, ignoring case for alphabetic characters.
+ * Stop after a given number of characters
+ *
+ * Same as BSD's strncasecmp, included for portability.
+ */
+static int
+etags_strncasecmp (s1, s2, n)
+     register const char *s1;
+     register const char *s2;
+     register int n;
+{
+  while (*s1 != '\0' && n-- > 0
+	 && (ISALPHA (*s1) && ISALPHA (*s2)
+	     ? lowcase (*s1) == lowcase (*s2)
+	     : *s1 == *s2))
+    s1++, s2++;
+
+  if (n < 0)
+    return 0;
+  else
+    return (ISALPHA (*s1) && ISALPHA (*s2)
+	    ? lowcase (*s1) - lowcase (*s2)
+	    : *s1 - *s2);
 }
 
 /* Skip spaces, return new pointer. */
@@ -5929,7 +6461,7 @@ fatal (s1, s2)
      char *s1, *s2;
 {
   error (s1, s2);
-  exit (BAD);
+  exit (EXIT_FAILURE);
 }
 
 static void
@@ -5937,21 +6469,21 @@ pfatal (s1)
      char *s1;
 {
   perror (s1);
-  exit (BAD);
+  exit (EXIT_FAILURE);
 }
 
 static void
 suggest_asking_for_help ()
 {
-  fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
-	   progname,
+
 #ifdef LONG_OPTIONS
-	   "--help"
+fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
+	 progname, "--help");
 #else
-	   "-h"
+fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
+	 progname, "-h");
 #endif
-	   );
-  exit (BAD);
+  exit (EXIT_FAILURE);
 }
 
 /* Print error message.  `s1' is printf control string, `s2' is arg for it. */
@@ -6021,7 +6553,7 @@ etags_getcwd ()
   linebuffer path;
   FILE *pipe;
 
-  initbuffer (&path);
+  linebuffer_init (&path);
   pipe = (FILE *) popen ("pwd 2>/dev/null", "r");
   if (pipe == NULL || readline_internal (&path, pipe) == 0)
     pfatal ("pwd");
@@ -6187,6 +6719,18 @@ canonicalize_filename (fn)
 #endif
 }
 
+
+/* Initialize a linebuffer for use */
+static void
+linebuffer_init (lbp)
+     linebuffer *lbp;
+{
+  lbp->size = (DEBUG) ? 3 : 200;
+  lbp->buffer = xnew (lbp->size, char);
+  lbp->buffer[0] = '\0';
+  lbp->len = 0;
+}
+
 /* Set the minimum size of a string contained in a linebuffer. */
 static void
 linebuffer_setlen (lbp, toksize)
@@ -6201,7 +6745,7 @@ linebuffer_setlen (lbp, toksize)
   lbp->len = toksize;
 }
 
-/* Like malloc but get fatal error if memory is exhausted.  */
+/* Like malloc but get fatal error if memory is exhausted. */
 static PTR
 xmalloc (size)
      unsigned int size;
@@ -6229,6 +6773,11 @@ xrealloc (ptr, size)
  * indent-tabs-mode: t
  * tab-width: 8
  * fill-column: 79
- * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node")
+ * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "regexp")
  * End:
  */
+
+/* arch-tag: 8a9b748d-390c-4922-99db-2eeefa921051
+   (do not change this comment) */
+
+/* etags.c ends here */