X-Git-Url: http://git.hcoop.net/bpt/emacs.git/blobdiff_plain/901b219d7b84c2f7b9ff22571e00a61c1af03269..5bd471e85a81bc27e319ee84efcdab0cc9a6e2f7:/lib-src/etags.c

diff --git a/lib-src/etags.c b/lib-src/etags.c
index c9b5ddeb4d..552a09d2f0 100644
--- a/lib-src/etags.c
+++ b/lib-src/etags.c
@@ -1,5 +1,5 @@
 /* Tags file maker to go with GNU Emacs
-   Copyright (C) 1984, 87, 88, 89, 93, 94, 95
+   Copyright (C) 1984, 87, 88, 89, 93, 94, 95, 98, 99
    Free Software Foundation, Inc. and Ken Arnold
 
 This file is not considered part of GNU Emacs.
@@ -28,22 +28,38 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  *	Francesco Potorti` reorganised C and C++ based on work by Joe Wells.
  *	Regexp tags by Tom Tromey.
  *
- *	Francesco Potorti` (F.Potorti@cnuce.cnr.it) is the current maintainer.
+ *	Francesco Potorti` (pot@gnu.org) is the current maintainer.
  */
 
-char pot_etags_version[] = "@(#) pot revision number is 11.66";
+char pot_etags_version[] = "@(#) pot revision number is 13.33";
 
 #define	TRUE	1
 #define	FALSE	0
 
+#ifndef _GNU_SOURCE
+# define _GNU_SOURCE		/* enables some compiler checks on GNU */
+#endif
 #ifndef DEBUG
 # define DEBUG FALSE
 #endif
 
+#ifdef HAVE_CONFIG_H
+# include <config.h>
+  /* On some systems, Emacs defines static as nothing for the sake
+     of unexec.  We don't want that here since we don't use unexec. */
+# undef static
+# define ETAGS_REGEXPS		/* use the regexp features */
+# define LONG_OPTIONS		/* accept long options */
+#endif /* HAVE_CONFIG_H */
+
 #ifdef MSDOS
-# include <string.h>
 # include <fcntl.h>
 # include <sys/param.h>
+# include <io.h>
+# ifndef HAVE_CONFIG_H
+#   define DOS_NT
+#   include <sys/config.h>
+# endif
 #endif /* MSDOS */
 
 #ifdef WINDOWSNT
@@ -52,20 +68,34 @@ char pot_etags_version[] = "@(#) pot revision number is 11.66";
 # include <string.h>
 # include <io.h>
 # define MAXPATHLEN _MAX_PATH
+# ifdef HAVE_CONFIG_H
+#   undef HAVE_NTGUI
+# else
+#   define DOS_NT
+# endif /* not HAVE_CONFIG_H */
+# ifndef HAVE_GETCWD
+#   define HAVE_GETCWD
+# endif /* undef HAVE_GETCWD */
+#endif /* WINDOWSNT */
+
+#if !defined (WINDOWSNT) && defined (STDC_HEADERS)
+#include <stdlib.h>
+#include <string.h>
 #endif
 
-#ifdef HAVE_CONFIG_H
-# include <config.h>
-  /* On some systems, Emacs defines static as nothing for the sake
-     of unexec.  We don't want that here since we don't use unexec. */
-# undef static
-#endif
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#else
+# ifdef HAVE_GETCWD
+    extern char *getcwd ();
+# endif
+#endif /* HAVE_UNISTD_H */
 
 #include <stdio.h>
 #include <ctype.h>
 #include <errno.h>
 #ifndef errno
-extern int errno;
+  extern int errno;
 #endif
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -74,14 +104,20 @@ extern int errno;
 # define S_ISREG(m)	(((m) & S_IFMT) == S_IFREG)
 #endif
 
-#include <getopt.h>
+#ifdef LONG_OPTIONS
+# include <getopt.h>
+#else
+# define getopt_long(argc,argv,optstr,lopts,lind) getopt (argc, argv, optstr)
+  extern char *optarg;
+  extern int optind, opterr;
+#endif /* LONG_OPTIONS */
 
 #ifdef ETAGS_REGEXPS
 # include <regex.h>
 #endif /* ETAGS_REGEXPS */
 
 /* Define CTAGS to make the program "ctags" compatible with the usual one.
- Let it undefined to make the program "etags", which makes emacs-style
+ Leave it undefined to make the program "etags", which makes emacs-style
  tag tables and tags typedefs, #defines and struct/union/enum by default. */
 #ifdef CTAGS
 # undef  CTAGS
@@ -102,78 +138,72 @@ extern int errno;
 /* C extensions. */
 #define C_PLPL	0x00001		/* C++ */
 #define C_STAR	0x00003		/* C* */
+#define C_JAVA	0x00005		/* JAVA */
 #define YACC	0x10000		/* yacc file */
 
-#define streq(s,t)	((DEBUG &&!(s)&&!(t)&&(abort(),1)) || !strcmp(s,t))
-#define strneq(s,t,n)	((DEBUG &&!(s)&&!(t)&&(abort(),1)) || !strncmp(s,t,n))
-
-#define lowcase(c)	tolower ((unsigned char)c)
+#define streq(s,t)	((DEBUG && (s) == NULL && (t) == NULL	\
+			  && (abort (), 1)) || !strcmp (s, t))
+#define strneq(s,t,n)	((DEBUG && (s) == NULL && (t) == NULL	\
+			  && (abort (), 1)) || !strncmp (s, t, n))
 
-#define	iswhite(arg)	(_wht[arg])	/* T if char is white		*/
-#define	begtoken(arg)	(_btk[arg])	/* T if char can start token	*/
-#define	intoken(arg)	(_itk[arg])	/* T if char can be in token	*/
-#define	endtoken(arg)	(_etk[arg])	/* T if char ends tokens	*/
+#define lowcase(c)	tolower ((char)c)
 
-#ifdef DOS_NT
-# define absolutefn(fn) (fn[0] == '/' \
-			 || (fn[1] == ':' && fn[2] == '/'))
-#else
-# define absolutefn(fn) (fn[0] == '/')
-#endif
+#define CHARS 256		/* 2^sizeof(char) */
+#define CHAR(x)		((unsigned int)x & (CHARS - 1))
+#define	iswhite(c)	(_wht[CHAR(c)]) /* c is white */
+#define notinname(c)	(_nin[CHAR(c)]) /* c is not in a name */
+#define	begtoken(c)	(_btk[CHAR(c)]) /* c can start token */
+#define	intoken(c)	(_itk[CHAR(c)]) /* c can be in token */
+#define	endtoken(c)	(_etk[CHAR(c)]) /* c ends tokens */
 
 
 /*
- *	xnew -- allocate storage
+ *	xnew, xrnew -- allocate, reallocate storage
  *
  * SYNOPSIS:	Type *xnew (int n, Type);
+ *		Type *xrnew (OldPointer, int n, Type);
  */
-#define xnew(n,Type)	((Type *) xmalloc ((n) * sizeof (Type)))
+#ifdef chkmalloc
+# include "chkmalloc.h"
+# define xnew(n,Type)	  ((Type *) trace_malloc (__FILE__, __LINE__, \
+						  (n) * sizeof (Type)))
+# define xrnew(op,n,Type) ((Type *) trace_realloc (__FILE__, __LINE__, \
+						   (op), (n) * sizeof (Type)))
+#else
+# define xnew(n,Type)	  ((Type *) xmalloc ((n) * sizeof (Type)))
+# define xrnew(op,n,Type) ((Type *) xrealloc ((op), (n) * sizeof (Type)))
+#endif
 
-typedef int logical;
+typedef int bool;
 
-typedef struct nd_st
-{				/* sorting structure		*/
-  char *name;			/* function or type name	*/
-  char *file;			/* file name			*/
-  logical is_func;		/* use pattern or line no	*/
-  logical been_warned;		/* set if noticed dup		*/
-  int lno;			/* line number tag is on	*/
-  long cno;			/* character number line starts on */
-  char *pat;			/* search pattern		*/
-  struct nd_st *left, *right;	/* left and right sons		*/
-} NODE;
+typedef void Lang_function ();
 
-extern char *getenv ();
+typedef struct
+{
+  char *suffix;
+  char *command;		/* Takes one arg and decompresses to stdout */
+} compressor;
 
-char *concat ();
-char *savenstr (), *savestr ();
-char *etags_strchr (), *etags_strrchr ();
-char *etags_getcwd ();
-char *relative_filename (), *absolute_filename (), *absolute_dirname ();
-long *xmalloc (), *xrealloc ();
+typedef struct
+{
+  char *name;
+  Lang_function *function;
+  char **suffixes;
+  char **interpreters;
+} language;
 
-typedef void Lang_function ();
-#if FALSE				/* many compilers barf on this */
-Lang_function Asm_labels;
-Lang_function default_C_entries;
-Lang_function C_entries;
-Lang_function Cplusplus_entries;
-Lang_function Cstar_entries;
-Lang_function Erlang_functions;
-Lang_function Fortran_functions;
-Lang_function Yacc_entries;
-Lang_function Lisp_functions;
-Lang_function Pascal_functions;
-Lang_function Perl_functions;
-Lang_function Prolog_functions;
-Lang_function Scheme_functions;
-Lang_function TeX_functions;
-Lang_function just_read_file;
-#else				/* so let's write it this way */
+extern char *getenv ();
+
+/* Many compilers barf on this:
+	Lang_function Ada_funcs;
+   so let's write it this way */
+void Ada_funcs ();
 void Asm_labels ();
 void C_entries ();
 void default_C_entries ();
 void plain_C_entries ();
+void Cjava_entries ();
+void Cobol_paragraphs ();
 void Cplusplus_entries ();
 void Cstar_entries ();
 void Erlang_functions ();
@@ -182,150 +212,217 @@ void Yacc_entries ();
 void Lisp_functions ();
 void Pascal_functions ();
 void Perl_functions ();
+void Postscript_functions ();
 void Prolog_functions ();
+void Python_functions ();
 void Scheme_functions ();
 void TeX_functions ();
 void just_read_file ();
-#endif
 
-Lang_function *get_language_from_name ();
-Lang_function *get_language_from_interpreter ();
-Lang_function *get_language_from_suffix ();
+compressor *get_compressor_from_suffix ();
+language *get_language_from_name ();
+language *get_language_from_interpreter ();
+language *get_language_from_suffix ();
 int total_size_of_entries ();
-long readline ();
-long readline_internal ();
+long readline (), readline_internal ();
+void get_tag ();
+
 #ifdef ETAGS_REGEXPS
+void analyse_regex ();
 void add_regex ();
-#endif
-void add_node ();
+void free_patterns ();
+#endif /* ETAGS_REGEXPS */
 void error ();
 void suggest_asking_for_help ();
 void fatal (), pfatal ();
-void find_entries ();
-void free_tree ();
-void getit ();
+void add_node ();
+
 void init ();
 void initbuffer ();
-void pfnote ();
+void find_entries ();
+void free_tree ();
+void pfnote (), new_pfnote ();
 void process_file ();
 void put_entries ();
 void takeprec ();
 
+char *concat ();
+char *skip_spaces (), *skip_non_spaces ();
+char *savenstr (), *savestr ();
+char *etags_strchr (), *etags_strrchr ();
+char *etags_getcwd ();
+char *relative_filename (), *absolute_filename (), *absolute_dirname ();
+bool filename_is_absolute ();
+void canonicalize_filename ();
+void grow_linebuffer ();
+long *xmalloc (), *xrealloc ();
+
 
 char searchar = '/';		/* use /.../ searches */
 
-int lineno;			/* line number of current line */
-long charno;			/* current character number */
-long linecharno;		/* charno of start of line */
-
-char *curfile;			/* current input file name */
 char *tagfile;			/* output file */
 char *progname;			/* name this program was invoked with */
 char *cwd;			/* current working directory */
 char *tagfiledir;		/* directory of tagfile */
-
 FILE *tagf;			/* ioptr for tags file */
-NODE *head;			/* the head of the binary tree of tags */
+
+char *curfile;			/* current input file name */
+language *curlang;		/* current language */
+
+int lineno;			/* line number of current line */
+long charno;			/* current character number */
+long linecharno;		/* charno of start of current line */
+char *dbp;			/* pointer to start of current tag */
+
+typedef struct node_st
+{				/* sorting structure		*/
+  char *name;			/* function or type name	*/
+  char *file;			/* file name			*/
+  bool is_func;			/* use pattern or line no	*/
+  bool been_warned;		/* set if noticed dup		*/
+  int lno;			/* line number tag is on	*/
+  long cno;			/* character number line starts on */
+  char *pat;			/* search pattern		*/
+  struct node_st *left, *right;	/* left and right sons		*/
+} node;
+
+node *head;			/* the head of the binary tree of tags */
 
 /*
- * A `struct linebuffer' is a structure which holds a line of text.
- * `readline' reads a line from a stream into a linebuffer and works
- * regardless of the length of the line.
+ * A `linebuffer' is a structure which holds a line of text.
+ * `readline_internal' reads a line from a stream into a linebuffer
+ * and works regardless of the length of the line.
+ * SIZE is the size of BUFFER, LEN is the length of the string in
+ * BUFFER after readline reads it.
  */
-#define GROW_LINEBUFFER(buf,toksize)					\
-while (buf.size < toksize)						\
-  buf.buffer = (char *) xrealloc (buf.buffer, buf.size *= 2)
-struct linebuffer
+typedef struct
 {
   long size;
+  int len;
   char *buffer;
-};
+} linebuffer;
 
-struct linebuffer lb;		/* the current line */
-struct linebuffer token_name;	/* used by C_entries as a temporary area */
+linebuffer lb;			/* the current line */
+linebuffer token_name;		/* used by C_entries as a temporary area */
 struct
 {
   long linepos;
-  struct linebuffer lb;		/* used by C_entries instead of lb */
+  linebuffer lb;		/* used by C_entries instead of lb */
 } lbs[2];
 
 /* boolean "functions" (see init)	*/
-logical _wht[0177], _etk[0177], _itk[0177], _btk[0177];
+bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
 char
   /* white chars */
-  *white = " \f\t\n\013",
+  *white = " \f\t\n\r",
+  /* not in a name */
+  *nonam = " \f\t\n\r(=,[;",
   /* token ending chars */
-  *endtk = " \t\n\013\"'#()[]{}=-+%*/&|^~!<>;,.:?",
+  *endtk = " \t\n\r\"'#()[]{}=-+%*/&|^~!<>;,.:?",
   /* token starting chars */
   *begtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$~@",
   /* valid in-token chars */
-  *intk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
+  *midtk = "ABCDEFGHIJKLMNOPQRSTUVWXYZ_abcdefghijklmnopqrstuvwxyz$0123456789";
 
-logical append_to_tagfile;	/* -a: append to tags */
-/* The following three default to TRUE for etags, but to FALSE for ctags.  */
-logical typedefs;		/* -t: create tags for typedefs */
-logical typedefs_and_cplusplus;	/* -T: create tags for typedefs, level */
+bool append_to_tagfile;		/* -a: append to tags */
+/* The following four default to TRUE for etags, but to FALSE for ctags.  */
+bool typedefs;			/* -t: create tags for C and Ada typedefs */
+bool typedefs_and_cplusplus;	/* -T: create tags for C typedefs, level */
 				/* 0 struct/enum/union decls, and C++ */
 				/* member functions. */
-logical constantypedefs;	/* -d: create tags for C #define and enum */
-				/* constants.  Enum consts not implemented. */
+bool constantypedefs;		/* -d: create tags for C #define, enum */
+				/* constants and variables. */
 				/* -D: opposite of -d.  Default under ctags. */
-logical update;			/* -u: update tags */
-logical vgrind_style;		/* -v: create vgrind style index output */
-logical no_warnings;		/* -w: suppress warnings */
-logical cxref_style;		/* -x: create cxref style output */
-logical cplusplus;		/* .[hc] means C++, not C */
-logical noindentypedefs;	/* -I: ignore indentation in C */
-
+bool declarations;		/* --declarations: tag them and extern in C&Co*/
+bool globals;			/* create tags for global variables */
+bool members;			/* create tags for C member variables */
+bool update;			/* -u: update tags */
+bool vgrind_style;		/* -v: create vgrind style index output */
+bool no_warnings;		/* -w: suppress warnings */
+bool cxref_style;		/* -x: create cxref style output */
+bool cplusplus;			/* .[hc] means C++, not C */
+bool noindentypedefs;		/* -I: ignore indentation in C */
+bool packages_only;		/* --packages-only: in Ada, only tag packages*/
+
+#ifdef LONG_OPTIONS
 struct option longopts[] =
 {
-  { "append",			no_argument,	   NULL, 'a' },
-  { "backward-search",		no_argument,	   NULL, 'B' },
-  { "c++",			no_argument,	   NULL, 'C' },
-  { "cxref",			no_argument,	   NULL, 'x' },
-  { "defines",			no_argument,	   NULL, 'd' },
-  { "help",			no_argument,	   NULL, 'h' },
-  { "help",			no_argument,	   NULL, 'H' },
-  { "ignore-indentation",	no_argument,	   NULL, 'I' },
-  { "include",			required_argument, NULL, 'i' },
-  { "language",                 required_argument, NULL, 'l' },
-  { "no-defines",		no_argument,	   NULL, 'D' },
-  { "no-regex",			no_argument,	   NULL, 'R' },
-  { "no-warn",			no_argument,	   NULL, 'w' },
-  { "output",			required_argument, NULL, 'o' },
-  { "regex",			required_argument, NULL, 'r' },
-  { "typedefs",			no_argument,	   NULL, 't' },
-  { "typedefs-and-c++",		no_argument,	   NULL, 'T' },
-  { "update",			no_argument,	   NULL, 'u' },
-  { "version",			no_argument,	   NULL, 'V' },
-  { "vgrind",			no_argument,	   NULL, 'v' },
-  { 0 }
+  { "packages-only",      no_argument,	     &packages_only, TRUE  },
+  { "append",		  no_argument,	     NULL,	     'a'   },
+  { "backward-search",	  no_argument,	     NULL,	     'B'   },
+  { "c++",		  no_argument,	     NULL,	     'C'   },
+  { "cxref",		  no_argument,	     NULL,	     'x'   },
+  { "defines",		  no_argument,	     NULL,	     'd'   },
+  { "declarations",	  no_argument,	     &declarations,  TRUE  },
+  { "no-defines",	  no_argument,	     NULL,	     'D'   },
+  { "globals",		  no_argument,	     &globals, 	     TRUE  },
+  { "no-globals",	  no_argument,	     &globals, 	     FALSE },
+  { "help",		  no_argument,	     NULL,     	     'h'   },
+  { "help",		  no_argument,	     NULL,     	     'H'   },
+  { "ignore-indentation", no_argument,	     NULL,     	     'I'   },
+  { "include",		  required_argument, NULL,     	     'i'   },
+  { "language",           required_argument, NULL,     	     'l'   },
+  { "members",		  no_argument,	     &members, 	     TRUE  },
+  { "no-members",	  no_argument,	     &members, 	     FALSE },
+  { "no-warn",		  no_argument,	     NULL,	     'w'   },
+  { "output",		  required_argument, NULL,	     'o'   },
+#ifdef ETAGS_REGEXPS
+  { "regex",		  required_argument, NULL,	     'r'   },
+  { "no-regex",		  no_argument,	     NULL,	     'R'   },
+  { "ignore-case-regex",  required_argument, NULL,	     'c'   },
+#endif /* ETAGS_REGEXPS */
+  { "typedefs",		  no_argument,	     NULL,	     't'   },
+  { "typedefs-and-c++",	  no_argument,	     NULL,     	     'T'   },
+  { "update",		  no_argument,	     NULL,     	     'u'   },
+  { "version",		  no_argument,	     NULL,     	     'V'   },
+  { "vgrind",		  no_argument,	     NULL,     	     'v'   },
+  { NULL }
 };
+#endif /* LONG_OPTIONS */
 
 #ifdef ETAGS_REGEXPS
 /* Structure defining a regular expression.  Elements are
    the compiled pattern, and the name string. */
-struct pattern
+typedef struct pattern
 {
+  struct pattern *p_next;
+  language *language;
+  char *regex;
   struct re_pattern_buffer *pattern;
   struct re_registers regs;
   char *name_pattern;
-  logical error_signaled;
-};
+  bool error_signaled;
+} pattern;
 
-/* Number of regexps found. */
-int num_patterns = 0;
+/* List of all regexps. */
+pattern *p_head = NULL;
 
-/* Array of all regexps. */
-struct pattern *patterns = NULL;
+/* How many characters in the character set.  (From regex.c.)  */
+#define CHAR_SET_SIZE 256
+/* Translation table for case-insensitive matching. */
+char lc_trans[CHAR_SET_SIZE];
 #endif /* ETAGS_REGEXPS */
 
+compressor compressors[] =
+{
+  { "z", "gzip -d -c"},
+  { "Z", "gzip -d -c"},
+  { "gz", "gzip -d -c"},
+  { "GZ", "gzip -d -c"},
+  { "bz2", "bzip2 -d -c" },
+  { NULL }
+};
+
 /*
  * Language stuff.
  */
 
 /* Non-NULL if language fixed. */
-Lang_function *lang_func = NULL;
+language *forced_lang = NULL;
+
+/* Ada code */
+char *Ada_suffixes [] =
+  { "ads", "adb", "ada", NULL };
 
 /* Assembly code */
 char *Asm_suffixes [] = { "a",	/* Unix assembler */
@@ -334,6 +431,7 @@ char *Asm_suffixes [] = { "a",	/* Unix assembler */
 			  "inc", /* Microcontroller include files */
 			  "ins", /* Microcontroller include files */
 			  "s", "sa", /* Unix assembler */
+			  "S",   /* cpp-processed Unix assembler */
 			  "src", /* BSO/Tasking C compiler output */
 			  NULL
 			};
@@ -343,9 +441,17 @@ char *Asm_suffixes [] = { "a",	/* Unix assembler */
 char *default_C_suffixes [] =
   { "c", "h", NULL };
 
-/* .M is for Objective C++ files. */
 char *Cplusplus_suffixes [] =
-  { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx", "M", NULL};
+  { "C", "H", "c++", "cc", "cpp", "cxx", "h++", "hh", "hpp", "hxx",
+    "M",			/* Objective C++ */
+    "pdb",			/* Postscript with C syntax */
+    NULL };
+
+char *Cjava_suffixes [] =
+  { "java", NULL };
+
+char *Cobol_suffixes [] =
+  { "COB", "cob", NULL };
 
 char *Cstar_suffixes [] =
   { "cs", "hs", NULL };
@@ -373,57 +479,62 @@ char *plain_C_suffixes [] =
     "lm",			/* Objective lex file */
      NULL };
 
+char *Postscript_suffixes [] =
+  { "ps", "psw", NULL };	/* .psw is for PSWrap */
+
 char *Prolog_suffixes [] =
   { "prolog", NULL };
 
+char *Python_suffixes [] =
+  { "py", NULL };
+
 /* Can't do the `SCM' or `scm' prefix with a version number. */
 char *Scheme_suffixes [] =
-  { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "t", NULL };
+  { "SCM", "SM", "oak", "sch", "scheme", "scm", "sm", "ss", "t", NULL };
 
 char *TeX_suffixes [] =
   { "TeX", "bib", "clo", "cls", "ltx", "sty", "tex", NULL };
 
 char *Yacc_suffixes [] =
-  { "y", "ym", NULL };		/* .ym is Objective yacc file */
+  { "y", "ym", "yy", "yxx", "y++", NULL }; /* .ym is Objective yacc file */
 
-/* Table of language names and corresponding functions, file suffixes
-   and interpreter names.
-   It is ok for a given function to be listed under more than one
-   name.  I just didn't. */
-struct lang_entry
-{
-  char *name;
-  Lang_function *function;
-  char **suffixes;
-  char **interpreters;
-};
+/*
+ * Table of languages.
+ *
+ * It is ok for a given function to be listed under more than one
+ * name.  I just didn't.
+ */
 
-struct lang_entry lang_names [] =
+language lang_names [] =
 {
-  { "asm",     Asm_labels,	    Asm_suffixes,	  NULL              },
-  { "c",       default_C_entries,   default_C_suffixes,	  NULL              },
-  { "c++",     Cplusplus_entries,   Cplusplus_suffixes,	  NULL              },
-  { "c*",      Cstar_entries,	    Cstar_suffixes,	  NULL              },
-  { "erlang",  Erlang_functions,    Erlang_suffixes,	  NULL              },
-  { "fortran", Fortran_functions,   Fortran_suffixes,	  NULL              },
-  { "lisp",    Lisp_functions,	    Lisp_suffixes,	  NULL              },
-  { "pascal",  Pascal_functions,    Pascal_suffixes,	  NULL              },
-  { "perl",    Perl_functions,	    Perl_suffixes,	  Perl_interpreters },
-  { "proc",    plain_C_entries,	    plain_C_suffixes,	  NULL              },
-  { "prolog",  Prolog_functions,    Prolog_suffixes,	  NULL              },
-  { "scheme",  Scheme_functions,    Scheme_suffixes,	  NULL              },
-  { "tex",     TeX_functions,	    TeX_suffixes,	  NULL              },
-  { "yacc",    Yacc_entries,	    Yacc_suffixes,	  NULL              },
+  { "ada",     Ada_funcs,           Ada_suffixes,         NULL              },
+  { "asm",     Asm_labels,          Asm_suffixes,         NULL              },
+  { "c",       default_C_entries,   default_C_suffixes,   NULL              },
+  { "c++",     Cplusplus_entries,   Cplusplus_suffixes,   NULL              },
+  { "c*",      Cstar_entries,       Cstar_suffixes,       NULL              },
+  { "cobol",   Cobol_paragraphs,    Cobol_suffixes,       NULL              },
+  { "erlang",  Erlang_functions,    Erlang_suffixes,      NULL              },
+  { "fortran", Fortran_functions,   Fortran_suffixes,     NULL              },
+  { "java",    Cjava_entries,       Cjava_suffixes,       NULL              },
+  { "lisp",    Lisp_functions,      Lisp_suffixes,        NULL              },
+  { "pascal",  Pascal_functions,    Pascal_suffixes,      NULL              },
+  { "perl",    Perl_functions,      Perl_suffixes,        Perl_interpreters },
+  { "postscript", Postscript_functions, Postscript_suffixes, NULL           },
+  { "proc",    plain_C_entries,     plain_C_suffixes,     NULL              },
+  { "prolog",  Prolog_functions,    Prolog_suffixes,      NULL              },
+  { "python",  Python_functions,    Python_suffixes,      NULL              },
+  { "scheme",  Scheme_functions,    Scheme_suffixes,      NULL              },
+  { "tex",     TeX_functions,       TeX_suffixes,         NULL              },
+  { "yacc",    Yacc_entries,        Yacc_suffixes,        NULL              },
   { "auto", NULL },             /* default guessing scheme */
   { "none", just_read_file },   /* regexp matching only */
   { NULL, NULL }                /* end of list */
 };
-
 
 void
 print_language_names ()
 {
-  struct lang_entry *lang;
+  language *lang;
   char **ext;
 
   puts ("\nThese are the currently supported languages, along with the\n\
@@ -441,16 +552,19 @@ name suffix, and `none' means only do regexp processing on files.\n\
 If no language is specified and no matching suffix is found,\n\
 the first line of the file is read for a sharp-bang (#!) sequence\n\
 followed by the name of an interpreter.  If no such sequence is found,\n\
-Fortran is tried first; if no tags are found, C is tried next.");
+Fortran is tried first; if no tags are found, C is tried next.\n\
+Compressed files are supported using gzip and bzip2.");
 }
 
 #ifndef VERSION
-# define VERSION "19"
+# define VERSION "20"
 #endif
 void
 print_version ()
 {
-  printf ("%s for Emacs version %s\n", (CTAGS) ? "ctags" : "etags", VERSION);
+  printf ("%s (GNU Emacs %s)\n", (CTAGS) ? "ctags" : "etags", VERSION);
+  puts ("Copyright (C) 1999 Free Software Foundation, Inc. and Ken Arnold");
+  puts ("This program is distributed under the same terms as Emacs");
 
   exit (GOOD);
 }
@@ -458,17 +572,27 @@ print_version ()
 void
 print_help ()
 {
-  printf ("These are the options accepted by %s.  You may use unambiguous\n\
-abbreviations for the long option names.  A - as file name means read\n\
-names from stdin.", progname);
+  printf ("Usage: %s [options] [[regex-option ...] file-name] ...\n\
+\n\
+These are the options accepted by %s.\n", progname, progname);
+#ifdef LONG_OPTIONS
+  puts ("You may use unambiguous abbreviations for the long option names.");
+#else
+  puts ("Long option names do not work with this executable, as it is not\n\
+linked with GNU getopt.");
+#endif /* LONG_OPTIONS */
+  puts ("A - as file name means read names from stdin (one per line).");
   if (!CTAGS)
-    printf ("  Absolute names are stored in the output file as they\n\
-are.  Relative ones are stored relative to the output file's directory.");
+    printf ("  Absolute names are stored in the output file as they are.\n\
+Relative ones are stored relative to the output file's directory.");
   puts ("\n");
 
   puts ("-a, --append\n\
         Append tag entries to existing tags file.");
 
+  puts ("--packages-only\n\
+        For Ada files, only generate tags for packages .");
+
   if (CTAGS)
     puts ("-B, --backward-search\n\
         Write the search commands for the tag entries using '?', the\n\
@@ -477,13 +601,21 @@ are.  Relative ones are stored relative to the output file's directory.");
   puts ("-C, --c++\n\
         Treat files whose name suffix defaults to C language as C++ files.");
 
+  puts ("--declarations\n\
+	In C and derived languages, create tags for function declarations,");
+  if (CTAGS)
+    puts ("\tand create tags for extern variables if --globals is used.");
+  else
+    puts
+      ("\tand create tags for extern variables unless --no-globals is used.");
+
   if (CTAGS)
     puts ("-d, --defines\n\
-        Create tag entries for constant C #defines, too.");
+        Create tag entries for C #define constants and enum constants, too.");
   else
     puts ("-D, --no-defines\n\
-        Don't create tag entries for constant C #defines.  This makes\n\
-	the tags file smaller.");
+        Don't create tag entries for C #define constants and enum constants.\n\
+	This makes the tags file smaller.");
 
   if (!CTAGS)
     {
@@ -496,13 +628,27 @@ are.  Relative ones are stored relative to the output file's directory.");
 	named language up to the next --language=LANG option.");
     }
 
+  if (CTAGS)
+    puts ("--globals\n\
+	Create tag entries for global variables in some languages.");
+  else
+    puts ("--no-globals\n\
+	Do not create tag entries for global variables in some\n\
+	languages.  This makes the tags file smaller.");
+  puts ("--members\n\
+	Create tag entries for member variables in C and derived languages.");
+
 #ifdef ETAGS_REGEXPS
-  puts ("-r /REGEXP/, --regex=/REGEXP/\n\
-        Make a tag for each line matching pattern REGEXP in the\n\
- 	following files.  REGEXP is anchored (as if preceded by ^).\n\
-	The form /REGEXP/NAME/ creates a named tag.  For example Tcl\n\
-	named tags can be created with:\n\
+  puts ("-r /REGEXP/, --regex=/REGEXP/ or --regex=@regexfile\n\
+        Make a tag for each line matching pattern REGEXP in the following\n\
+ 	files.  {LANGUAGE}/REGEXP/ uses REGEXP for LANGUAGE files only.\n\
+	regexfile is a file containing one REGEXP per line.\n\
+	REGEXP is anchored (as if preceded by ^).\n\
+	The form /REGEXP/NAME/ creates a named tag.\n\
+	For example Tcl named tags can be created with:\n\
 	--regex=/proc[ \\t]+\\([^ \\t]+\\)/\\1/.");
+  puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
+        Like -r, --regex but ignore case when matching expressions.");
   puts ("-R, --no-regex\n\
         Don't create tags from regexps for the following files.");
 #endif /* ETAGS_REGEXPS */
@@ -517,7 +663,7 @@ are.  Relative ones are stored relative to the output file's directory.");
   if (CTAGS)
     {
       puts ("-t, --typedefs\n\
-        Generate tag entries for C typedefs.");
+        Generate tag entries for C and Ada typedefs.");
       puts ("-T, --typedefs-and-c++\n\
         Generate tag entries for C typedefs, C struct/enum/union tags,\n\
         and C++ member functions.");
@@ -549,6 +695,9 @@ are.  Relative ones are stored relative to the output file's directory.");
 
   print_language_names ();
 
+  puts ("");
+  puts ("Report bugs to bug-gnu-emacs@gnu.org");
+
   exit (GOOD);
 }
 
@@ -557,15 +706,16 @@ enum argument_type
 {
   at_language,
   at_regexp,
-  at_filename
+  at_filename,
+  at_icregexp
 };
 
-/* This structure helps us allow mixing of --lang and filenames. */
+/* This structure helps us allow mixing of --lang and file names. */
 typedef struct
 {
   enum argument_type arg_type;
   char *what;
-  Lang_function *function;
+  language *lang;		/* language of the regexp */
 } argument;
 
 #ifdef VMS			/* VMS specific functions */
@@ -582,7 +732,7 @@ typedef struct	{
 
 /*
  v1.05 nmm 26-Jun-86 fn_exp - expand specification of list of file names
- returning in each successive call the next filename matching the input
+ returning in each successive call the next file name matching the input
  spec. The function expects that each in_spec passed
  to it will be processed to completion; in particular, up to and
  including the call following that in which the last matching name
@@ -591,7 +741,7 @@ typedef struct	{
  If an error occurs, on return out_spec contains the value
  of in_spec when the error occurred.
 
- With each successive filename returned in out_spec, the
+ With each successive file name returned in out_spec, the
  function's return value is one. When there are no more matching
  names the function returns zero. If on the first call no file
  matches in_spec, or there is any other error, -1 is returned.
@@ -608,7 +758,7 @@ fn_exp (out, in)
   static long context = 0;
   static struct dsc$descriptor_s o;
   static struct dsc$descriptor_s i;
-  static logical pass1 = TRUE;
+  static bool pass1 = TRUE;
   long status;
   short retval;
 
@@ -648,7 +798,7 @@ fn_exp (out, in)
 char *
 gfnames (arg, p_error)
      char *arg;
-     logical *p_error;
+     bool *p_error;
 {
   static vspec filename = {MAX_FILE_SPEC_LEN, "\0"};
 
@@ -670,7 +820,7 @@ gfnames (arg, p_error)
 system (cmd)
      char *cmd;
 {
-  fprintf (stderr, "system() function not implemented under VMS\n");
+  error ("%s", "system() function not implemented under VMS");
 }
 #endif
 
@@ -693,20 +843,20 @@ char *massage_name (s)
 #endif /* VMS */
 
 
-void
+int
 main (argc, argv)
      int argc;
      char *argv[];
 {
   int i;
-  unsigned int nincluded_files = 0;
-  char **included_files = xnew (argc, char *);
+  unsigned int nincluded_files;
+  char **included_files;
   char *this_file;
   argument *argbuffer;
-  int current_arg = 0, file_count = 0;
-  struct linebuffer filename_lb;
+  int current_arg, file_count;
+  linebuffer filename_lb;
 #ifdef VMS
-  logical got_err;
+  bool got_err;
 #endif
 
 #ifdef DOS_NT
@@ -714,6 +864,10 @@ main (argc, argv)
 #endif /* DOS_NT */
 
   progname = argv[0];
+  nincluded_files = 0;
+  included_files = xnew (argc, char *);
+  current_arg = 0;
+  file_count = 0;
 
   /* Allocate enough no matter what happens.  Overkill, but each one
      is small. */
@@ -721,21 +875,40 @@ main (argc, argv)
 
 #ifdef ETAGS_REGEXPS
   /* Set syntax for regular expression routines. */
-  re_set_syntax (RE_SYNTAX_EMACS);
+  re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
+  /* Translation table for case-insensitive search. */
+  for (i = 0; i < CHAR_SET_SIZE; i++)
+    lc_trans[i] = lowcase (i);
 #endif /* ETAGS_REGEXPS */
 
   /*
    * If etags, always find typedefs and structure tags.  Why not?
-   * Also default is to find macro constants.
+   * Also default is to find macro constants, enum constants and
+   * global variables.
    */
   if (!CTAGS)
-    typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
+    {
+      typedefs = typedefs_and_cplusplus = constantypedefs = TRUE;
+      globals = TRUE;
+      members = FALSE;
+    }
 
   while (1)
     {
-      int opt = getopt_long (argc, argv,
-			     "-aCdDf:Il:o:r:RStTi:BuvxwVhH", longopts, 0);
+      int opt;
+      char *optstring;
+
+#ifdef ETAGS_REGEXPS
+      optstring = "-aCdDf:Il:o:r:c:RStTi:BuvxwVhH";
+#else
+      optstring = "-aCdDf:Il:o:StTi:BuvxwVhH";
+#endif /* ETAGS_REGEXPS */
+
+#ifndef LONG_OPTIONS
+      optstring = optstring + 1;
+#endif /* LONG_OPTIONS */
 
+      opt = getopt_long (argc, argv, optstring, longopts, 0);
       if (opt == EOF)
 	break;
 
@@ -747,7 +920,7 @@ main (argc, argv)
 	  break;
 
 	case 1:
-	  /* This means that a filename has been seen.  Record it. */
+	  /* This means that a file name has been seen.  Record it. */
 	  argbuffer[current_arg].arg_type = at_filename;
 	  argbuffer[current_arg].what = optarg;
 	  ++current_arg;
@@ -755,24 +928,15 @@ main (argc, argv)
 	  break;
 
 	  /* Common options. */
-	case 'a':
-	  append_to_tagfile = TRUE;
-	  break;
-	case 'C':
-	  cplusplus = TRUE;
-	  break;
-	case 'd':
-	  constantypedefs = TRUE;
-	  break;
-	case 'D':
-	  constantypedefs = FALSE;
-	  break;
+	case 'a': append_to_tagfile = TRUE;	break;
+	case 'C': cplusplus = TRUE;		break;
+	case 'd': constantypedefs = TRUE;	break;
+	case 'D': constantypedefs = FALSE;	break;
 	case 'f':		/* for compatibility with old makefiles */
 	case 'o':
 	  if (tagfile)
 	    {
-	      fprintf (stderr, "%s: -%c option may only be given once.\n",
-		       progname, opt);
+	      error ("-o option may only be given once.", (char *)NULL);
 	      suggest_asking_for_help ();
 	    }
 	  tagfile = optarg;
@@ -782,9 +946,15 @@ main (argc, argv)
 	  noindentypedefs = TRUE;
 	  break;
 	case 'l':
-	  argbuffer[current_arg].function = get_language_from_name (optarg);
-	  argbuffer[current_arg].arg_type = at_language;
-	  ++current_arg;
+	  {
+	    language *lang = get_language_from_name (optarg);
+	    if (lang != NULL)
+	      {
+		argbuffer[current_arg].lang = lang;
+		argbuffer[current_arg].arg_type = at_language;
+		++current_arg;
+	      }
+	  }
 	  break;
 #ifdef ETAGS_REGEXPS
 	case 'r':
@@ -797,6 +967,11 @@ main (argc, argv)
 	  argbuffer[current_arg].what = NULL;
 	  ++current_arg;
 	  break;
+        case 'c':
+	  argbuffer[current_arg].arg_type = at_icregexp;
+	  argbuffer[current_arg].what = optarg;
+	  ++current_arg;
+	  break;
 #endif /* ETAGS_REGEXPS */
 	case 'V':
 	  print_version ();
@@ -818,21 +993,11 @@ main (argc, argv)
 	  break;
 #else /* CTAGS */
 	  /* Ctags options. */
-	case 'B':
-	  searchar = '?';
-	  break;
-	case 'u':
-	  update = TRUE;
-	  break;
-	case 'v':
-	  vgrind_style = TRUE;
-	  /*FALLTHRU*/
-	case 'x':
-	  cxref_style = TRUE;
-	  break;
-	case 'w':
-	  no_warnings = TRUE;
-	  break;
+	case 'B': searchar = '?';	break;
+	case 'u': update = TRUE;	break;
+	case 'v': vgrind_style = TRUE;	/*FALLTHRU*/
+	case 'x': cxref_style = TRUE;	break;
+	case 'w': no_warnings = TRUE;	break;
 #endif /* CTAGS */
 	default:
 	  suggest_asking_for_help ();
@@ -849,7 +1014,7 @@ main (argc, argv)
 
   if (nincluded_files == 0 && file_count == 0)
     {
-      fprintf (stderr, "%s: No input files specified.\n", progname);
+      error ("no input files specified.", (char *)NULL);
       suggest_asking_for_help ();
     }
 
@@ -857,7 +1022,11 @@ main (argc, argv)
     tagfile = CTAGS ? "tags" : "TAGS";
   cwd = etags_getcwd ();	/* the current working directory */
   if (cwd[strlen (cwd) - 1] != '/')
-    cwd = concat (cwd, "/", "");
+    {
+      char *oldcwd = cwd;
+      cwd = concat (oldcwd, "/", "");
+      free (oldcwd);
+    }
   if (streq (tagfile, "-"))
     tagfiledir = cwd;
   else
@@ -897,11 +1066,14 @@ main (argc, argv)
       switch (argbuffer[i].arg_type)
 	{
 	case at_language:
-	  lang_func = argbuffer[i].function;
+	  forced_lang = argbuffer[i].lang;
 	  break;
 #ifdef ETAGS_REGEXPS
 	case at_regexp:
-	  add_regex (argbuffer[i].what);
+	  analyse_regex (argbuffer[i].what, FALSE);
+	  break;
+	case at_icregexp:
+	  analyse_regex (argbuffer[i].what, TRUE);
 	  break;
 #endif
 	case at_filename:
@@ -910,7 +1082,7 @@ main (argc, argv)
 	    {
 	      if (got_err)
 		{
-		  error ("Can't find file %s\n", this_file);
+		  error ("can't find file %s\n", this_file);
 		  argc--, argv++;
 		}
 	      else
@@ -921,7 +1093,7 @@ main (argc, argv)
 	      this_file = argbuffer[i].what;
 #endif
 	      /* Input file named "-" means read file names from stdin
-		 and use them. */
+		 (one per line) and use them. */
 	      if (streq (this_file, "-"))
 		while (readline_internal (&filename_lb, stdin) > 0)
 		  process_file (filename_lb.buffer);
@@ -934,6 +1106,10 @@ main (argc, argv)
 	}
     }
 
+#ifdef ETAGS_REGEXPS
+  free_patterns ();
+#endif /* ETAGS_REGEXPS */
+
   if (!CTAGS)
     {
       while (nincluded_files-- > 0)
@@ -947,10 +1123,9 @@ main (argc, argv)
      because we want them ordered.  Let's do it now. */
   if (cxref_style)
     {
-      tagf = fopen (tagfile, append_to_tagfile ? "a" : "w");
-      if (tagf == NULL)
-	pfatal (tagfile);
       put_entries (head);
+      free_tree (head);
+      head = NULL;
       exit (GOOD);
     }
 
@@ -965,7 +1140,7 @@ main (argc, argv)
 		   "mv %s OTAGS;fgrep -v '\t%s\t' OTAGS >%s;rm OTAGS",
 		   tagfile, argbuffer[i].what, tagfile);
 	  if (system (cmd) != GOOD)
-	    fatal ("failed to execute shell command");
+	    fatal ("failed to execute shell command", (char *)NULL);
 	}
       append_to_tagfile = TRUE;
     }
@@ -974,6 +1149,8 @@ main (argc, argv)
   if (tagf == NULL)
     pfatal (tagfile);
   put_entries (head);
+  free_tree (head);
+  head = NULL;
   fclose (tagf);
 
   if (update)
@@ -982,46 +1159,85 @@ main (argc, argv)
       sprintf (cmd, "sort %s -o %s", tagfile, tagfile);
       exit (system (cmd));
     }
-  exit (GOOD);
+  return GOOD;
+}
+
+
+
+/*
+ * Return a compressor given the file name.  If EXTPTR is non-zero,
+ * return a pointer into FILE where the compressor-specific
+ * extension begins.  If no compressor is found, NULL is returned
+ * and EXTPTR is not significant.
+ * Idea by Vladimir Alexiev <vladimir@cs.ualberta.ca>
+ */
+compressor *
+get_compressor_from_suffix (file, extptr)
+     char *file;
+     char **extptr;
+{
+  compressor *compr;
+  char *slash, *suffix;
+
+  /* This relies on FN to be after canonicalize_filename,
+     so we don't need to consider backslashes on DOS_NT.  */
+  slash = etags_strrchr (file, '/');
+  suffix = etags_strrchr (file, '.');
+  if (suffix == NULL || suffix < slash)
+    return NULL;
+  if (extptr != NULL)
+    *extptr = suffix;
+  suffix += 1;
+  /* Let those poor souls who live with DOS 8+3 file name limits get
+     some solace by treating foo.cgz as if it were foo.c.gz, etc.
+     Only the first do loop is run if not MSDOS */
+  do
+    {
+      for (compr = compressors; compr->suffix != NULL; compr++)
+	if (streq (compr->suffix, suffix))
+	  return compr;
+#ifndef MSDOS
+      break;
+#endif
+      if (extptr != NULL)
+	*extptr = ++suffix;
+    } while (*suffix != '\0');
+  return NULL;
 }
 
 
+
 /*
- * Return a Lang_function given the name.
+ * Return a language given the name.
  */
-Lang_function *
+language *
 get_language_from_name (name)
      char *name;
 {
-  struct lang_entry *lang;
+  language *lang;
 
-  if (name != NULL)
-    for (lang = lang_names; lang->name != NULL; lang++)
-      {
+  if (name == NULL)
+    error ("empty language name", (char *)NULL);
+  else
+    {
+      for (lang = lang_names; lang->name != NULL; lang++)
 	if (streq (name, lang->name))
-	  return lang->function;
-      }
-
-  fprintf (stderr, "%s: language \"%s\" not recognized.\n",
-	   progname, optarg);
-  suggest_asking_for_help ();
+	  return lang;
+      error ("unknown language \"%s\"", name);
+    }
 
-  /* This point should never be reached.  The function should either
-     return a function pointer  or never return.  Note that a NULL
-     pointer cannot be considered as an error, as it means that the
-     language has not been explicitely imposed by the user ("auto"). */
-  return NULL;			/* avoid warnings from compiler */
+  return NULL;
 }
 
 
 /*
- * Return a Lang_function given the interpreter name.
+ * Return a language given the interpreter name.
  */
-Lang_function *
+language *
 get_language_from_interpreter (interpreter)
      char *interpreter;
 {
-  struct lang_entry *lang;
+  language *lang;
   char **iname;
 
   if (interpreter == NULL)
@@ -1030,7 +1246,7 @@ get_language_from_interpreter (interpreter)
     if (lang->interpreters != NULL)
       for (iname = lang->interpreters; *iname != NULL; iname++)
 	if (streq (*iname, interpreter))
-	    return lang->function;
+	    return lang;
 
   return NULL;
 }
@@ -1038,27 +1254,29 @@ get_language_from_interpreter (interpreter)
 
 
 /*
- * Return a Lang_function given the file suffix.
+ * Return a language given the file name.
  */
-Lang_function *
-get_language_from_suffix (suffix)
-     char *suffix;
+language *
+get_language_from_suffix (file)
+     char *file;
 {
-  struct lang_entry *lang;
-  char **ext;
+  language *lang;
+  char **ext, *suffix;
 
+  suffix = etags_strrchr (file, '.');
   if (suffix == NULL)
     return NULL;
+  suffix += 1;
   for (lang = lang_names; lang->name != NULL; lang++)
     if (lang->suffixes != NULL)
       for (ext = lang->suffixes; *ext != NULL; ext++)
 	if (streq (*ext, suffix))
-	    return lang->function;
-
+	  return lang;
   return NULL;
 }
 
 
+
 /*
  * This routine is called on each file argument.
  */
@@ -1068,47 +1286,136 @@ process_file (file)
 {
   struct stat stat_buf;
   FILE *inf;
-#ifdef DOS_NT
-  char *p;
+  compressor *compr;
+  char *compressed_name, *uncompressed_name;
+  char *ext, *real_name;
 
-  for (p = file; *p != '\0'; p++)
-    if (*p == '\\')
-      *p = '/';
-#endif
 
-  if (stat (file, &stat_buf) == 0 && !S_ISREG (stat_buf.st_mode))
+  canonicalize_filename (file);
+  if (streq (file, tagfile) && !streq (tagfile, "-"))
     {
-      fprintf (stderr, "Skipping %s: it is not a regular file.\n", file);
+      error ("skipping inclusion of %s in self.", file);
       return;
     }
-  if (streq (file, tagfile) && !streq (tagfile, "-"))
+  if ((compr = get_compressor_from_suffix (file, &ext)) == NULL)
     {
-      fprintf (stderr, "Skipping inclusion of %s in self.\n", file);
-      return;
+      compressed_name = NULL;
+      real_name = uncompressed_name = savestr (file);
+    }
+  else
+    {
+      real_name = compressed_name = savestr (file);
+      uncompressed_name = savenstr (file, ext - file);
     }
-  inf = fopen (file, "r");
+
+  /* If the canonicalised uncompressed name has already be dealt with,
+     skip it silently, else add it to the list. */
+  {
+    typedef struct processed_file
+    {
+      char *filename;
+      struct processed_file *next;
+    } processed_file;
+    static processed_file *pf_head = NULL;
+    register processed_file *fnp;
+
+    for (fnp = pf_head; fnp != NULL; fnp = fnp->next)
+      if (streq (uncompressed_name, fnp->filename))
+	goto exit;
+    fnp = pf_head;
+    pf_head = xnew (1, struct processed_file);
+    pf_head->filename = savestr (uncompressed_name);
+    pf_head->next = fnp;
+  }
+
+  if (stat (real_name, &stat_buf) != 0)
+    {
+      /* Reset real_name and try with a different name. */
+      real_name = NULL;
+      if (compressed_name != NULL) /* try with the given suffix */
+	{
+	  if (stat (uncompressed_name, &stat_buf) == 0)
+	    real_name = uncompressed_name;
+	}
+      else			/* try all possible suffixes */
+	{
+	  for (compr = compressors; compr->suffix != NULL; compr++)
+	    {
+	      compressed_name = concat (file, ".", compr->suffix);
+	      if (stat (compressed_name, &stat_buf) != 0)
+		{
+#ifdef MSDOS
+		  char *suf = compressed_name + strlen (file);
+		  size_t suflen = strlen (compr->suffix) + 1;
+		  for ( ; suf[1]; suf++, suflen--)
+		    {
+		      memmove (suf, suf + 1, suflen);
+		      if (stat (compressed_name, &stat_buf) == 0)
+			{
+			  real_name = compressed_name;
+			  break;
+			}
+		    }
+		  if (real_name != NULL)
+		    break;
+#endif
+		  free (compressed_name);
+		  compressed_name = NULL;
+		}
+	      else
+		{
+		  real_name = compressed_name;
+		  break;
+		}
+	    }
+	}
+      if (real_name == NULL)
+	{
+	  perror (file);
+	  goto exit;
+	}
+    } /* try with a different name */
+
+  if (!S_ISREG (stat_buf.st_mode))
+    {
+      error ("skipping %s: it is not a regular file.", real_name);
+      goto exit;
+    }
+  if (real_name == compressed_name)
+    {
+      char *cmd = concat (compr->command, " ", real_name);
+      inf = popen (cmd, "r");
+      free (cmd);
+    }
+  else
+    inf = fopen (real_name, "r");
   if (inf == NULL)
     {
-      perror (file);
-      return;
+      perror (real_name);
+      goto exit;
     }
 
-  find_entries (file, inf);
+  find_entries (uncompressed_name, inf);
+
+  if (real_name == compressed_name)
+    pclose (inf);
+  else
+    fclose (inf);
 
   if (!CTAGS)
     {
       char *filename;
 
-      if (absolutefn (file))
+      if (filename_is_absolute (uncompressed_name))
 	{
-	  /* file is an absolute filename.  Canonicalise it. */
-	  filename = absolute_filename (file, cwd);
+	  /* file is an absolute file name.  Canonicalise it. */
+	  filename = absolute_filename (uncompressed_name, cwd);
 	}
       else
 	{
-	  /* file is a filename relative to cwd.  Make it relative
+	  /* file is a file name relative to cwd.  Make it relative
 	     to the directory of the tags file. */
-	  filename = relative_filename (file, tagfiledir);
+	  filename = relative_filename (uncompressed_name, tagfiledir);
 	}
       fprintf (tagf, "\f\n%s,%d\n", filename, total_size_of_entries (head));
       free (filename);
@@ -1116,11 +1423,16 @@ process_file (file)
       free_tree (head);
       head = NULL;
     }
+
+ exit:
+  if (compressed_name) free(compressed_name);
+  if (uncompressed_name) free(uncompressed_name);
+  return;
 }
 
 /*
  * This routine sets up the boolean pseudo-functions which work
- * by setting boolean flags dependent upon the corresponding character
+ * by setting boolean flags dependent upon the corresponding character.
  * Every char which is NOT in that string is not a white char.  Therefore,
  * all of the array "_wht" is set to FALSE, and then the elements
  * subscripted by the chars in "white" are set to TRUE.  Thus "_wht"
@@ -1132,66 +1444,63 @@ init ()
   register char *sp;
   register int i;
 
-  for (i = 0; i < 0177; i++)
-    _wht[i] = _etk[i] = _itk[i] = _btk[i] = FALSE;
-  for (sp = white; *sp; sp++)
-    _wht[*sp] = TRUE;
-  for (sp = endtk; *sp; sp++)
-    _etk[*sp] = TRUE;
-  for (sp = intk; *sp; sp++)
-    _itk[*sp] = TRUE;
-  for (sp = begtk; *sp; sp++)
-    _btk[*sp] = TRUE;
-  _wht[0] = _wht['\n'];
-  _etk[0] = _etk['\n'];
-  _btk[0] = _btk['\n'];
-  _itk[0] = _itk['\n'];
+  for (i = 0; i < CHARS; i++)
+    iswhite(i) = notinname(i) = begtoken(i) = intoken(i) = endtoken(i) = FALSE;
+  for (sp = white; *sp != '\0'; sp++) iswhite (*sp) = TRUE;
+  for (sp = nonam; *sp != '\0'; sp++) notinname (*sp) = TRUE;
+  for (sp = begtk; *sp != '\0'; sp++) begtoken (*sp) = TRUE;
+  for (sp = midtk; *sp != '\0'; sp++) intoken (*sp) = TRUE;
+  for (sp = endtk; *sp != '\0'; sp++) endtoken (*sp) = TRUE;
+  iswhite('\0') = iswhite('\n');
+  notinname('\0') = notinname('\n');
+  begtoken('\0') = begtoken('\n');
+  intoken('\0') = intoken('\n');
+  endtoken('\0') = endtoken('\n');
 }
 
 /*
  * This routine opens the specified file and calls the function
  * which finds the function and type definitions.
  */
+node *last_node = NULL;
+
 void
 find_entries (file, inf)
      char *file;
      FILE *inf;
 {
   char *cp;
-  Lang_function *function;
-  NODE *old_last_node;
-  extern NODE *last_node;
-
-
-  /* Memory leakage here: the memory block pointed by curfile is never
-     released.  The amount of memory leaked here is the sum of the
-     lengths of the input file names. */
+  language *lang;
+  node *old_last_node;
+
+  /* Memory leakage here: the string pointed by curfile is
+     never released, because curfile is copied into np->file
+     for each node, to be used in CTAGS mode.  The amount of
+     memory leaked here is the sum of the lengths of the
+     file names. */
   curfile = savestr (file);
 
   /* If user specified a language, use it. */
-  function = lang_func;
-  if (function != NULL)
+  lang = forced_lang;
+  if (lang != NULL && lang->function != NULL)
     {
-      function (inf);
-      fclose (inf);
+      curlang = lang;
+      lang->function (inf);
       return;
     }
 
-  cp = etags_strrchr (file, '.');
-  if (cp != NULL)
+  /* Try to guess the language given the file name. */
+  lang = get_language_from_suffix (file);
+  if (lang != NULL && lang->function != NULL)
     {
-      cp += 1;
-      function = get_language_from_suffix (cp);
-      if (function != NULL)
-	{
-	  function (inf);
-	  fclose (inf);
-	  return;
-	}
+      curlang = lang;
+      lang->function (inf);
+      return;
     }
 
   /* Look for sharp-bang as the first two characters. */
-  if (readline_internal (&lb, inf) > 2
+  if (readline_internal (&lb, inf) > 0
+      && lb.len >= 2
       && lb.buffer[0] == '#'
       && lb.buffer[1] == '!')
     {
@@ -1204,36 +1513,40 @@ find_entries (file, inf)
       if (lp != NULL)
 	lp += 1;
       else
-	for (lp = lb.buffer+2; *lp != '\0' && isspace (*lp); lp++)
-	  continue;
-      for (cp = lp; *cp != '\0' && !isspace (*cp); cp++)
-	continue;
+	lp = skip_spaces (lb.buffer + 2);
+      cp = skip_non_spaces (lp);
       *cp = '\0';
 
       if (strlen (lp) > 0)
 	{
-	  function = get_language_from_interpreter (lp);
-	  if (function != NULL)
+	  lang = get_language_from_interpreter (lp);
+	  if (lang != NULL && lang->function != NULL)
 	    {
-	      function (inf);
-	      fclose (inf);
+	      curlang = lang;
+	      lang->function (inf);
 	      return;
 	    }
 	}
     }
+  /* We rewind here, even if inf may be a pipe.  We fail if the
+     length of the first line is longer than the pipe block size,
+     which is unlikely. */
   rewind (inf);
 
   /* Try Fortran. */
   old_last_node = last_node;
+  curlang = get_language_from_name ("fortran");
   Fortran_functions (inf);
 
   /* No Fortran entries found.  Try C. */
   if (old_last_node == last_node)
     {
+      /* We do not tag if rewind fails.
+	 Only the file name will be recorded in the tags file. */
       rewind (inf);
+      curlang = get_language_from_name (cplusplus ? "c++" : "c");
       default_C_entries (inf);
     }
-  fclose (inf);
   return;
 }
 
@@ -1241,27 +1554,27 @@ find_entries (file, inf)
 void
 pfnote (name, is_func, linestart, linelen, lno, cno)
      char *name;		/* tag name, or NULL if unnamed */
-     logical is_func;		/* tag is a function */
+     bool is_func;		/* tag is a function */
      char *linestart;		/* start of the line where tag is */
      int linelen;		/* length of the line where tag is */
      int lno;			/* line number */
      long cno;			/* character number */
 {
-  register NODE *np;
+  register node *np;
 
   if (CTAGS && name == NULL)
     return;
 
-  np = xnew (1, NODE);
+  np = xnew (1, node);
 
   /* If ctags mode, change name "main" to M<thisfilename>. */
   if (CTAGS && !cxref_style && streq (name, "main"))
     {
       register char *fp = etags_strrchr (curfile, '/');
-      np->name = concat ("M", fp == 0 ? curfile : fp + 1, "");
+      np->name = concat ("M", fp == NULL ? curfile : fp + 1, "");
       fp = etags_strrchr (np->name, '.');
-      if (fp && fp[1] != '\0' && fp[2] == '\0')
-	fp[0] = 0;
+      if (fp != NULL && fp[1] != '\0' && fp[2] == '\0')
+	fp[0] = '\0';
     }
   else
     np->name = name;
@@ -1289,23 +1602,80 @@ pfnote (name, is_func, linestart, linelen, lno, cno)
   add_node (np, &head);
 }
 
-/*
- * free_tree ()
- *	recurse on left children, iterate on right children.
+/* Date: Wed, 22 Jan 1997 02:56:31 -0500 [last amended 18 Sep 1997]
+ * From: Sam Kendall <kendall@mv.mv.com>
+ * Subject: Proposal for firming up the TAGS format specification
+ * To: F.Potorti@cnuce.cnr.it
+ *
+ * pfnote should emit the optimized form [unnamed tag] only if:
+ *  1. name does not contain any of the characters " \t\r\n(),;";
+ *  2. linestart contains name as either a rightmost, or rightmost but
+ *     one character, substring;
+ *  3. the character, if any, immediately before name in linestart must
+ *     be one of the characters " \t(),;";
+ *  4. the character, if any, immediately after name in linestart must
+ *     also be one of the characters " \t(),;".
+ *
+ * The real implementation uses the notinname() macro, which recognises
+ * characters slightly different form " \t\r\n(),;".  See the variable
+ * `nonam'.
  */
+#define traditional_tag_style TRUE
 void
-free_tree (node)
-     register NODE *node;
+new_pfnote (name, namelen, is_func, linestart, linelen, lno, cno)
+     char *name;		/* tag name, or NULL if unnamed */
+     int namelen;		/* tag length */
+     bool is_func;		/* tag is a function */
+     char *linestart;		/* start of the line where tag is */
+     int linelen;		/* length of the line where tag is */
+     int lno;			/* line number */
+     long cno;			/* character number */
 {
-  while (node)
+  register char *cp;
+  bool named;
+
+  named = TRUE;
+  if (!CTAGS)
     {
-      register NODE *node_right = node->right;
-      free_tree (node->left);
-      if (node->name != NULL)
-	free (node->name);
-      free (node->pat);
-      free ((char *) node);
-      node = node_right;
+      for (cp = name; !notinname (*cp); cp++)
+	continue;
+      if (*cp == '\0')				/* rule #1 */
+	{
+	  cp = linestart + linelen - namelen;
+	  if (notinname (linestart[linelen-1]))
+	    cp -= 1;				/* rule #4 */
+	  if (cp >= linestart			/* rule #2 */
+	      && (cp == linestart
+		  || notinname (cp[-1]))	/* rule #3 */
+	      && strneq (name, cp, namelen))	/* rule #2 */
+	    named = FALSE;	/* use unnamed tag */
+	}
+    }
+
+  if (named)
+    name = savenstr (name, namelen);
+  else
+    name = NULL;
+  pfnote (name, is_func, linestart, linelen, lno, cno);
+}
+
+/*
+ * free_tree ()
+ *	recurse on left children, iterate on right children.
+ */
+void
+free_tree (np)
+     register node *np;
+{
+  while (np)
+    {
+      register node *node_right = np->right;
+      free_tree (np->left);
+      if (np->name != NULL)
+	free (np->name);
+      free (np->pat);
+      free (np);
+      np = node_right;
     }
 }
 
@@ -1318,18 +1688,17 @@ free_tree (node)
  *	add_node is the only function allowed to add nodes, so it can
  *	maintain state.
  */
-NODE *last_node = NULL;
 void
-add_node (node, cur_node_p)
-     NODE *node, **cur_node_p;
+add_node (np, cur_node_p)
+     node *np, **cur_node_p;
 {
   register int dif;
-  register NODE *cur_node = *cur_node_p;
+  register node *cur_node = *cur_node_p;
 
   if (cur_node == NULL)
     {
-      *cur_node_p = node;
-      last_node = node;
+      *cur_node_p = np;
+      last_node = np;
       return;
     }
 
@@ -1337,14 +1706,14 @@ add_node (node, cur_node_p)
     {
       /* Etags Mode */
       if (last_node == NULL)
-	fatal ("internal error in add_node", 0);
-      last_node->right = node;
-      last_node = node;
+	fatal ("internal error in add_node", (char *)NULL);
+      last_node->right = np;
+      last_node = np;
     }
   else
     {
       /* Ctags Mode */
-      dif = strcmp (node->name, cur_node->name);
+      dif = strcmp (np->name, cur_node->name);
 
       /*
        * If this tag name matches an existing one, then
@@ -1352,12 +1721,12 @@ add_node (node, cur_node_p)
        */
       if (!dif)
 	{
-	  if (streq (node->file, cur_node->file))
+	  if (streq (np->file, cur_node->file))
 	    {
 	      if (!no_warnings)
 		{
 		  fprintf (stderr, "Duplicate entry in file %s, line %d: %s\n",
-			   node->file, lineno, node->name);
+			   np->file, lineno, np->name);
 		  fprintf (stderr, "Second entry ignored\n");
 		}
 	    }
@@ -1366,64 +1735,64 @@ add_node (node, cur_node_p)
 	      fprintf
 		(stderr,
 		 "Duplicate entry in files %s and %s: %s (Warning only)\n",
-		 node->file, cur_node->file, node->name);
+		 np->file, cur_node->file, np->name);
 	      cur_node->been_warned = TRUE;
 	    }
 	  return;
 	}
 
       /* Actually add the node */
-      add_node (node, dif < 0 ? &cur_node->left : &cur_node->right);
+      add_node (np, dif < 0 ? &cur_node->left : &cur_node->right);
     }
 }
 
 void
-put_entries (node)
-     register NODE *node;
+put_entries (np)
+     register node *np;
 {
   register char *sp;
 
-  if (node == NULL)
+  if (np == NULL)
     return;
 
   /* Output subentries that precede this one */
-  put_entries (node->left);
+  put_entries (np->left);
 
   /* Output this entry */
 
   if (!CTAGS)
     {
-      if (node->name != NULL)
-	fprintf (tagf, "%s\177%s\001%d,%d\n",
-		 node->pat, node->name, node->lno, node->cno);
+      if (np->name != NULL)
+	fprintf (tagf, "%s\177%s\001%d,%ld\n",
+		 np->pat, np->name, np->lno, np->cno);
       else
-	fprintf (tagf, "%s\177%d,%d\n",
-		 node->pat, node->lno, node->cno);
+	fprintf (tagf, "%s\177%d,%ld\n",
+		 np->pat, np->lno, np->cno);
     }
   else
     {
-      if (node->name == NULL)
-	error ("internal error: NULL name in ctags mode.", 0);
+      if (np->name == NULL)
+	error ("internal error: NULL name in ctags mode.", (char *)NULL);
 
       if (cxref_style)
 	{
 	  if (vgrind_style)
 	    fprintf (stdout, "%s %s %d\n",
-		     node->name, node->file, (node->lno + 63) / 64);
+		     np->name, np->file, (np->lno + 63) / 64);
 	  else
 	    fprintf (stdout, "%-16s %3d %-16s %s\n",
-		     node->name, node->lno, node->file, node->pat);
+		     np->name, np->lno, np->file, np->pat);
 	}
       else
 	{
-	  fprintf (tagf, "%s\t%s\t", node->name, node->file);
+	  fprintf (tagf, "%s\t%s\t", np->name, np->file);
 
-	  if (node->is_func)
+	  if (np->is_func)
 	    {			/* a function */
 	      putc (searchar, tagf);
 	      putc ('^', tagf);
 
-	      for (sp = node->pat; *sp; sp++)
+	      for (sp = np->pat; *sp; sp++)
 		{
 		  if (*sp == '\\' || *sp == searchar)
 		    putc ('\\', tagf);
@@ -1433,14 +1802,14 @@ put_entries (node)
 	    }
 	  else
 	    {			/* a typedef; text pattern inadequate */
-	      fprintf (tagf, "%d", node->lno);
+	      fprintf (tagf, "%d", np->lno);
 	    }
 	  putc ('\n', tagf);
 	}
     }
 
   /* Output subentries that follow this one */
-  put_entries (node->right);
+  put_entries (np->right);
 }
 
 /* Length of a number's decimal representation. */
@@ -1448,11 +1817,9 @@ int
 number_len (num)
      long num;
 {
-  int len = 0;
-  if (!num)
-    return 1;
-  for (; num; num /= 10)
-    ++len;
+  int len = 1;
+  while ((num /= 10) > 0)
+    len += 1;
   return len;
 }
 
@@ -1464,25 +1831,24 @@ number_len (num)
  * backward compatibility.
  */
 int
-total_size_of_entries (node)
-     register NODE *node;
+total_size_of_entries (np)
+     register node *np;
 {
   register int total;
 
-  if (node == NULL)
+  if (np == NULL)
     return 0;
 
-  total = 0;
-  for (; node; node = node->right)
+  for (total = 0; np != NULL; np = np->right)
     {
       /* Count left subentries. */
-      total += total_size_of_entries (node->left);
+      total += total_size_of_entries (np->left);
 
       /* Count this entry */
-      total += strlen (node->pat) + 1;
-      total += number_len ((long) node->lno) + 1 + number_len (node->cno) + 1;
-      if (node->name != NULL)
-	total += 1 + strlen (node->name);	/* \001name */
+      total += strlen (np->pat) + 1;
+      total += number_len ((long) np->lno) + 1 + number_len (np->cno) + 1;
+      if (np->name != NULL)
+	total += 1 + strlen (np->name);	/* \001name */
     }
 
   return total;
@@ -1493,8 +1859,13 @@ total_size_of_entries (node)
  */
 enum sym_type
 {
-  st_none, st_C_objprot, st_C_objimpl, st_C_objend, st_C_gnumacro,
-  st_C_struct, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
+  st_none,
+  st_C_objprot, st_C_objimpl, st_C_objend,
+  st_C_gnumacro,
+  st_C_ignore,
+  st_C_javastruct,
+  st_C_operator,
+  st_C_struct, st_C_extern, st_C_enum, st_C_define, st_C_typedef, st_C_typespec
 };
 
 /* Feed stuff between (but not including) %[ and %] lines to:
@@ -1502,18 +1873,31 @@ enum sym_type
 %[
 struct C_stab_entry { char *name; int c_ext; enum sym_type type; }
 %%
+if,		0,	st_C_ignore
+for,		0,	st_C_ignore
+while,		0,	st_C_ignore
+switch,		0,	st_C_ignore
+return,		0,	st_C_ignore
 @interface,	0,	st_C_objprot
 @protocol,	0,	st_C_objprot
 @implementation,0,	st_C_objimpl
 @end,		0,	st_C_objend
+import,		C_JAVA,	st_C_ignore
+package,	C_JAVA,	st_C_ignore
+friend,		C_PLPL,	st_C_ignore
+extends,  	C_JAVA,	st_C_javastruct
+implements,  	C_JAVA,	st_C_javastruct
+interface,	C_JAVA, st_C_struct
 class,  	C_PLPL,	st_C_struct
 namespace,	C_PLPL,	st_C_struct
 domain, 	C_STAR,	st_C_struct
 union,  	0,	st_C_struct
 struct, 	0,	st_C_struct
+extern,  	0,	st_C_extern
 enum,    	0,	st_C_enum
 typedef, 	0,	st_C_typedef
 define,  	0,	st_C_define
+operator,	C_PLPL, st_C_operator
 bool,		C_PLPL,	st_C_typespec
 long,    	0,	st_C_typespec
 short,   	0,	st_C_typespec
@@ -1525,7 +1909,6 @@ signed,  	0,	st_C_typespec
 unsigned,	0,	st_C_typespec
 auto,    	0,	st_C_typespec
 void,    	0,	st_C_typespec
-extern,  	0,	st_C_typespec
 static,  	0,	st_C_typespec
 const,   	0,	st_C_typespec
 volatile,	0,	st_C_typespec
@@ -1544,118 +1927,163 @@ PSEUDO,		0,	st_C_gnumacro
 %]
 and replace lines between %< and %> with its output. */
 /*%<*/
-/* C code produced by gperf version 2.1 (K&R C version) */
+/* C code produced by gperf version 2.7.1 (19981006 egcs) */
 /* Command-line: gperf -c -k 1,3 -o -p -r -t  */
-
-
 struct C_stab_entry { char *name; int c_ext; enum sym_type type; };
 
-#define MIN_WORD_LENGTH 3
+#define TOTAL_KEYWORDS 46
+#define MIN_WORD_LENGTH 2
 #define MAX_WORD_LENGTH 15
-#define MIN_HASH_VALUE 34
-#define MAX_HASH_VALUE 121
-/*
-   34 keywords
-   88 is the maximum key range
-*/
+#define MIN_HASH_VALUE 13
+#define MAX_HASH_VALUE 123
+/* maximum key range = 111, duplicates = 0 */
 
-static int
+#ifdef __GNUC__
+__inline
+#endif
+static unsigned int
 hash (str, len)
-     register char *str;
-     register unsigned int  len;
+     register const char *str;
+     register unsigned int len;
 {
-  static unsigned char hash_table[] =
+  static unsigned char asso_values[] =
+    {
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124,   3, 124, 124, 124,  43,   6,
+       11, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+       11, 124, 124,  58,   7, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124,  57,   7,  42,
+        4,  14,  52,   0, 124,  53, 124, 124,  29,  11,
+        6,  35,  32, 124,  29,  34,  59,  58,  51,  24,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124, 124, 124, 124, 124,
+      124, 124, 124, 124, 124, 124
+    };
+  register int hval = len;
+
+  switch (hval)
     {
-     121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
-     121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
-     121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
-     121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
-     121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
-     121, 121, 121, 121, 121, 121, 121, 121, 121, 121,
-     121, 121, 121, 121,  45, 121, 121, 121,  16,  19,
-      61, 121, 121, 121, 121, 121, 121, 121, 121, 121,
-      10, 121, 121,  20,  53, 121, 121, 121, 121, 121,
-     121, 121, 121, 121, 121, 121, 121,  41,  45,  22,
-      60,  47,  37,  28, 121,  55, 121, 121,  20,  14,
-      29,  30,   5, 121,  50,  59,  30,  54,   6, 121,
-     121, 121, 121, 121, 121, 121, 121, 121,
-  };
-  return len + hash_table[str[2]] + hash_table[str[0]];
+      default:
+      case 3:
+        hval += asso_values[(unsigned char)str[2]];
+      case 2:
+      case 1:
+        hval += asso_values[(unsigned char)str[0]];
+        break;
+    }
+  return hval;
 }
 
+#ifdef __GNUC__
+__inline
+#endif
 struct C_stab_entry *
 in_word_set (str, len)
-     register char *str;
+     register const char *str;
      register unsigned int len;
 {
-
-  static struct C_stab_entry  wordlist[] =
+  static struct C_stab_entry wordlist[] =
     {
-      {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, 
-      {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, 
-      {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, 
-      {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, 
-      {"volatile", 	0,	st_C_typespec},
-      {"PSEUDO", 		0,	st_C_gnumacro},
-      {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, 
-      {"typedef",  	0,	st_C_typedef},
-      {"typename", 	C_PLPL,	st_C_typespec},
-      {"",}, {"",}, {"",}, 
-      {"SYSCALL", 	0,	st_C_gnumacro},
-      {"",}, {"",}, {"",}, 
-      {"mutable", 	C_PLPL,	st_C_typespec},
-      {"namespace", 	C_PLPL,	st_C_struct},
-      {"long",     	0,	st_C_typespec},
-      {"",}, {"",}, 
-      {"const",    	0,	st_C_typespec},
-      {"",}, {"",}, {"",}, 
-      {"explicit", 	C_PLPL,	st_C_typespec},
-      {"",}, {"",}, {"",}, {"",}, 
-      {"void",     	0,	st_C_typespec},
-      {"",}, 
-      {"char",     	0,	st_C_typespec},
-      {"class",   	C_PLPL,	st_C_struct},
-      {"",}, {"",}, {"",}, 
-      {"float",    	0,	st_C_typespec},
-      {"",}, 
-      {"@implementation", 0,	st_C_objimpl},
-      {"auto",     	0,	st_C_typespec},
-      {"",}, 
-      {"ENTRY", 		0,	st_C_gnumacro},
-      {"@end", 		0,	st_C_objend},
-      {"bool", 		C_PLPL,	st_C_typespec},
-      {"domain",  	C_STAR,	st_C_struct},
-      {"",}, 
-      {"DEFUN", 		0,	st_C_gnumacro},
-      {"extern",   	0,	st_C_typespec},
-      {"@interface", 	0,	st_C_objprot},
-      {"",}, {"",}, {"",}, 
-      {"int",      	0,	st_C_typespec},
-      {"",}, {"",}, {"",}, {"",}, 
-      {"signed",   	0,	st_C_typespec},
-      {"short",    	0,	st_C_typespec},
-      {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, 
-      {"define",   	0,	st_C_define},
-      {"@protocol", 	0,	st_C_objprot},
-      {"enum",     	0,	st_C_enum},
-      {"static",   	0,	st_C_typespec},
-      {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, {"",}, 
-      {"union",   	0,	st_C_struct},
-      {"struct",  	0,	st_C_struct},
-      {"",}, {"",}, {"",}, {"",}, 
-      {"double",   	0,	st_C_typespec},
-      {"unsigned", 	0,	st_C_typespec},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {""}, {""}, {""}, {""},
+      {"@end",		0,	st_C_objend},
+      {""}, {""}, {""}, {""},
+      {"ENTRY",		0,	st_C_gnumacro},
+      {"@interface",	0,	st_C_objprot},
+      {""},
+      {"domain", 	C_STAR,	st_C_struct},
+      {""},
+      {"PSEUDO",		0,	st_C_gnumacro},
+      {""}, {""},
+      {"namespace",	C_PLPL,	st_C_struct},
+      {""}, {""},
+      {"@implementation",0,	st_C_objimpl},
+      {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""}, {""},
+      {"long",    	0,	st_C_typespec},
+      {"signed",  	0,	st_C_typespec},
+      {"@protocol",	0,	st_C_objprot},
+      {""}, {""}, {""}, {""},
+      {"bool",		C_PLPL,	st_C_typespec},
+      {""}, {""}, {""}, {""}, {""}, {""},
+      {"const",   	0,	st_C_typespec},
+      {"explicit",	C_PLPL,	st_C_typespec},
+      {"if",		0,	st_C_ignore},
+      {""},
+      {"operator",	C_PLPL, st_C_operator},
+      {""},
+      {"DEFUN",		0,	st_C_gnumacro},
+      {""}, {""},
+      {"define",  	0,	st_C_define},
+      {""}, {""}, {""}, {""}, {""},
+      {"double",  	0,	st_C_typespec},
+      {"struct", 	0,	st_C_struct},
+      {""}, {""}, {""}, {""},
+      {"short",   	0,	st_C_typespec},
+      {""},
+      {"enum",    	0,	st_C_enum},
+      {"mutable",	C_PLPL,	st_C_typespec},
+      {""},
+      {"extern",  	0,	st_C_extern},
+      {"extends",  	C_JAVA,	st_C_javastruct},
+      {"package",	C_JAVA,	st_C_ignore},
+      {"while",		0,	st_C_ignore},
+      {""},
+      {"for",		0,	st_C_ignore},
+      {""}, {""}, {""},
+      {"volatile",	0,	st_C_typespec},
+      {""}, {""},
+      {"import",		C_JAVA,	st_C_ignore},
+      {"float",   	0,	st_C_typespec},
+      {"switch",		0,	st_C_ignore},
+      {"return",		0,	st_C_ignore},
+      {"implements",  	C_JAVA,	st_C_javastruct},
+      {""},
+      {"static",  	0,	st_C_typespec},
+      {"typedef", 	0,	st_C_typedef},
+      {"typename",	C_PLPL,	st_C_typespec},
+      {"unsigned",	0,	st_C_typespec},
+      {""}, {""},
+      {"char",    	0,	st_C_typespec},
+      {"class",  	C_PLPL,	st_C_struct},
+      {""}, {""}, {""},
+      {"void",    	0,	st_C_typespec},
+      {""}, {""},
+      {"friend",		C_PLPL,	st_C_ignore},
+      {""}, {""}, {""},
+      {"int",     	0,	st_C_typespec},
+      {"union",  	0,	st_C_struct},
+      {""}, {""}, {""},
+      {"auto",    	0,	st_C_typespec},
+      {"interface",	C_JAVA, st_C_struct},
+      {""},
+      {"SYSCALL",	0,	st_C_gnumacro}
     };
 
   if (len <= MAX_WORD_LENGTH && len >= MIN_WORD_LENGTH)
     {
       register int key = hash (str, len);
 
-      if (key <= MAX_HASH_VALUE && key >= MIN_HASH_VALUE)
+      if (key <= MAX_HASH_VALUE && key >= 0)
         {
-          register char *s = wordlist[key].name;
+          register const char *s = wordlist[key].name;
 
-          if (*s == *str && !strncmp (str + 1, s + 1, len - 1))
+          if (*str == *s && !strncmp (str + 1, s + 1, len - 1))
             return &wordlist[key];
         }
     }
@@ -1664,12 +2092,12 @@ in_word_set (str, len)
 /*%>*/
 
 enum sym_type
-C_symtype(str, len, c_ext)
+C_symtype (str, len, c_ext)
      char *str;
      int len;
      int c_ext;
 {
-  register struct C_stab_entry *se = in_word_set(str, len);
+  register struct C_stab_entry *se = in_word_set (str, len);
 
   if (se == NULL || (se->c_ext && !(c_ext & se->c_ext)))
     return st_none;
@@ -1677,19 +2105,22 @@ C_symtype(str, len, c_ext)
 }
 
  /*
-  * C functions are recognized using a simple finite automaton.
-  * funcdef is its state variable.
+  * C functions and variables are recognized using a simple
+  * finite automaton.  fvdef is its state variable.
   */
 enum
 {
-  fnone,			/* nothing seen */
-  ftagseen,			/* function-like tag seen */
-  fstartlist,			/* just after open parenthesis */
-  finlist,			/* in parameter list */
-  flistseen,			/* after parameter list */
-  fignore			/* before open brace */
-} funcdef;
-
+  fvnone,			/* nothing seen */
+  foperator,			/* func: operator keyword seen (cplpl) */
+  fvnameseen,			/* function or variable name seen */
+  fstartlist,			/* func: just after open parenthesis */
+  finlist,			/* func: in parameter list */
+  flistseen,			/* func: after parameter list */
+  fignore,			/* func: before open brace */
+  vignore			/* var-like: ignore until ';' */
+} fvdef;
+
+bool fvextern;			/* func or var: extern keyword seen; */
 
  /*
   * typedefs are recognized using a simple finite automaton.
@@ -1698,7 +2129,8 @@ enum
 enum
 {
   tnone,			/* nothing seen */
-  ttypedseen,			/* typedef keyword seen */
+  tkeyseen,			/* typedef keyword seen */
+  ttypeseen,			/* defined type seen */
   tinbody,			/* inside typedef body */
   tend,				/* just before typedef tag */
   tignore			/* junk after typedef tag */
@@ -1745,6 +2177,7 @@ enum
 
 /*
  * State machine for Objective C protocols and implementations.
+ * Tom R.Hageman <tom@basil.icce.rug.nl>
  */
 enum
 {
@@ -1762,16 +2195,34 @@ enum
   oignore			/* wait for @end */
 } objdef;
 
+
+/*
+ * Use this structure to keep info about the token read, and how it
+ * should be tagged.  Used by the make_C_tag function to build a tag.
+ */
+typedef struct
+{
+  bool valid;
+  char *str;
+  bool named;
+  int linelen;
+  int lineno;
+  long linepos;
+  char *buffer;
+} token;
+
+token tok;			/* latest token read */
+
 /*
  * Set this to TRUE, and the next token considered is called a function.
  * Used only for GNU emacs's function-defining macros.
  */
-logical next_token_is_func;
+bool next_token_is_func;
 
 /*
  * TRUE in the rules part of a yacc file, FALSE outside (parse as C).
  */
-logical yacc_rules;
+bool yacc_rules;
 
 /*
  * methodlen is the length of the method name stored in token_name.
@@ -1781,19 +2232,14 @@ int methodlen;
 /*
  * consider_token ()
  *	checks to see if the current token is at the start of a
- *	function, or corresponds to a typedef, or is a struct/union/enum
- *	tag.
- *
- *	*IS_FUNC gets TRUE iff the token is a function or macro with args.
- *	C_EXT is which language we are looking at.
+ *	function or variable, or corresponds to a typedef, or
+ * 	is a struct/union/enum tag, or #define, or an enum constant.
  *
- *	In the future we will need some way to adjust where the end of
- *	the token is; for instance, implementing the C++ keyword
- *	`operator' properly will adjust the end of the token to be after
- *	whatever follows `operator'.
+ *	*IS_FUNC gets TRUE iff the token is a function or #define macro
+ *	with args.  C_EXT is which language we are looking at.
  *
  * Globals
- *	funcdef			IN OUT
+ *	fvdef			IN OUT
  *	structdef		IN OUT
  *	definedef		IN OUT
  *	typdef			IN OUT
@@ -1801,15 +2247,15 @@ int methodlen;
  *	next_token_is_func	IN OUT
  */
 
-logical
-consider_token (str, len, c, c_ext, cblev, parlev, is_func)
+bool
+consider_token (str, len, c, c_ext, cblev, parlev, is_func_or_var)
      register char *str;	/* IN: token pointer */
      register int len;		/* IN: token length */
      register char c;		/* IN: first char after the token */
      int c_ext;			/* IN: C extensions mask */
      int cblev;			/* IN: curly brace level */
      int parlev;		/* IN: parenthesis level */
-     logical *is_func;		/* OUT: function found */
+     bool *is_func_or_var;	/* OUT: function or variable found */
 {
   enum sym_type toktype = C_symtype (str, len, c_ext);
 
@@ -1837,15 +2283,15 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
        * and constantypedefs is FALSE.
        */
       definedef = dignorerest;
-      *is_func = (c == '(');
-      if (!*is_func && !constantypedefs)
+      *is_func_or_var = (c == '(');
+      if (!*is_func_or_var && !constantypedefs)
 	return FALSE;
       else
 	return TRUE;
     case dignorerest:
       return FALSE;
     default:
-      error ("internal error: definedef value.", 0);
+      error ("internal error: definedef value.", (char *)NULL);
     }
 
   /*
@@ -1857,20 +2303,20 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
       if (toktype == st_C_typedef)
 	{
 	  if (typedefs)
-	    typdef = ttypedseen;
-	  funcdef = fnone;
+	    typdef = tkeyseen;
+	  fvextern = FALSE;
+	  fvdef = fvnone;
 	  return FALSE;
 	}
       break;
-    case ttypedseen:
+    case tkeyseen:
       switch (toktype)
 	{
 	case st_none:
 	case st_C_typespec:
-	  typdef = tend;
-	  break;
 	case st_C_struct:
 	case st_C_enum:
+	  typdef = ttypeseen;
 	  break;
 	}
       /* Do not return here, so the structdef stuff has a chance. */
@@ -1895,17 +2341,16 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
    * This structdef business is NOT invoked when we are ctags and the
    * file is plain C.  This is because a struct tag may have the same
    * name as another tag, and this loses with ctags.
-   *
-   * This if statement deals with the typdef state machine as
-   * follows: if typdef==ttypedseen and token is struct/union/class/enum,
-   * return FALSE.  All the other code here is for the structdef
-   * state machine.
    */
   switch (toktype)
     {
+    case st_C_javastruct:
+      if (structdef == stagseen)
+        structdef = scolonseen;
+      return FALSE;
     case st_C_struct:
     case st_C_enum:
-      if (typdef == ttypedseen
+      if (typdef == tkeyseen
 	  || (typedefs_and_cplusplus && cblev == 0 && structdef == snone))
 	{
 	  structdef = skeyseen;
@@ -1913,10 +2358,11 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
 	}
       return FALSE;
     }
+
   if (structdef == skeyseen)
     {
-      /* Save the tag for struct/union/class, for functions that may be
-         defined inside. */
+      /* Save the tag for struct/union/class, for functions and variables
+	 that may be defined inside. */
       if (structtype == st_C_struct)
 	structtag = savenstr (str, len);
       else
@@ -1925,14 +2371,24 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
       return TRUE;
     }
 
-  /* Avoid entering funcdef stuff if typdef is going on. */
   if (typdef != tnone)
-    {
-      definedef = dnone;
-      return FALSE;
-    }
-
-  /* Detect GNU macros. */
+    definedef = dnone;
+
+  /* Detect GNU macros.
+
+     Writers of emacs code are recommended to put the
+     first two args of a DEFUN on the same line.
+
+      The DEFUN macro, used in emacs C source code, has a first arg
+     that is a string (the lisp function name), and a second arg that
+     is a C function name.  Since etags skips strings, the second arg
+     is tagged.  This is unfortunate, as it would be better to tag the
+     first arg.  The simplest way to deal with this problem would be
+     to name the tag with a name built from the function name, by
+     removing the initial 'F' character and substituting '-' for '_'.
+     Anyway, this assumes that the conventions of naming lisp
+     functions will never change.  Currently, this method is not
+     implemented. */
   if (definedef == dnone && toktype == st_C_gnumacro)
     {
       next_token_is_func = TRUE;
@@ -1941,14 +2397,12 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
   if (next_token_is_func)
     {
       next_token_is_func = FALSE;
-      funcdef = fignore;
-      *is_func = TRUE;
+      fvdef = fignore;
+      *is_func_or_var = TRUE;
       return TRUE;
     }
 
-  /*
-   * Detecting Objective C constructs.
-   */
+  /* Detect Objective C constructs. */
   switch (objdef)
     {
     case onone:
@@ -1963,7 +2417,7 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
 	}
       break;
     case oimplementation:
-      /* Save the class tag for functions that may be defined inside. */
+      /* Save the class tag for functions or variables defined inside. */
       objtag = savenstr (str, len);
       objdef = oinbody;
       return FALSE;
@@ -1971,11 +2425,11 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
       /* Save the class tag for categories. */
       objtag = savenstr (str, len);
       objdef = otagseen;
-      *is_func = TRUE;
+      *is_func_or_var = TRUE;
       return TRUE;
     case oparenseen:
       objdef = ocatseen;
-      *is_func = TRUE;
+      *is_func_or_var = TRUE;
       return TRUE;
     case oinbody:
       break;
@@ -1984,9 +2438,10 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
 	{
 	  objdef = omethodtag;
 	  methodlen = len;
-	  GROW_LINEBUFFER (token_name, methodlen+1);
+	  grow_linebuffer (&token_name, methodlen + 1);
 	  strncpy (token_name.buffer, str, len);
 	  token_name.buffer[methodlen] = '\0';
+	  token_name.len = methodlen;
 	  return TRUE;
 	}
       return FALSE;
@@ -1999,8 +2454,9 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
 	{
 	  objdef = omethodtag;
 	  methodlen += len;
-	  GROW_LINEBUFFER (token_name, methodlen+1);
+	  grow_linebuffer (&token_name, methodlen + 1);
 	  strncat (token_name.buffer, str, len);
+	  token_name.len = methodlen;
 	  return TRUE;
 	}
       return FALSE;
@@ -2018,20 +2474,40 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
       return FALSE;
     }
 
-  /* A function? */
+  /* A function, variable or enum constant? */
   switch (toktype)
     {
+    case st_C_extern:
+      fvextern = TRUE;
+      /* FALLTHRU */
     case st_C_typespec:
-      if (funcdef != finlist && funcdef != fignore)
-        funcdef = fnone;		/* should be useless */
+      if (fvdef != finlist && fvdef != fignore && fvdef != vignore)
+	fvdef = fvnone;		/* should be useless */
       return FALSE;
-    default:
-      if (funcdef == fnone)
+    case st_C_ignore:
+      fvextern = FALSE;
+      fvdef = vignore;
+      return FALSE;
+    case st_C_operator:
+      fvdef = foperator;
+      *is_func_or_var = TRUE;
+      return TRUE;
+    case st_none:
+      if ((c_ext & C_PLPL) && strneq (str+len-10, "::operator", 10))
+	{
+	  fvdef = foperator;
+	  *is_func_or_var = TRUE;
+	  return TRUE;
+	}
+      if (constantypedefs && structdef == sinbody && structtype == st_C_enum)
+	return TRUE;
+      if (fvdef == fvnone)
 	{
-	  funcdef = ftagseen;
-	  *is_func = TRUE;
+	  fvdef = fvnameseen;	/* function or variable */
+	  *is_func_or_var = TRUE;
 	  return TRUE;
 	}
+      break;
     }
 
   return FALSE;
@@ -2039,21 +2515,10 @@ consider_token (str, len, c, c_ext, cblev, parlev, is_func)
 
 /*
  * C_entries ()
- *	This routine finds functions, typedefs, #define's and
- * 	struct/union/enum definitions in C syntax and adds them
- *	to the list.
+ *	This routine finds functions, variables, typedefs,
+ * 	#define's, enum constants and struct/union/enum definitions in
+ * 	C syntax and adds them to the list.
  */
-typedef struct
-{
-  logical valid;
-  char *str;
-  logical named;
-  int linelen;
-  int lineno;
-  long linepos;
-  char *buffer;
-} TOKEN;
-
 #define current_lb_is_new (newndx == curndx)
 #define switch_line_buffers() (curndx = 1 - curndx)
 
@@ -2064,7 +2529,7 @@ typedef struct
 #define othlinepos (lbs[1-curndx].linepos)
 #define newlinepos (lbs[newndx].linepos)
 
-#define CNL_SAVE_DEFINEDEF						\
+#define CNL_SAVE_DEFINEDEF()						\
 do {									\
   curlinepos = charno;							\
   lineno++;								\
@@ -2075,9 +2540,9 @@ do {									\
   newndx = curndx;							\
 } while (0)
 
-#define CNL								\
+#define CNL()								\
 do {									\
-  CNL_SAVE_DEFINEDEF;							\
+  CNL_SAVE_DEFINEDEF();							\
   if (savetok.valid)							\
     {									\
       tok = savetok;							\
@@ -2086,16 +2551,35 @@ do {									\
   definedef = dnone;							\
 } while (0)
 
-/* Ideally this macro should never be called wihen tok.valid is FALSE,
-   but this would mean that the state machines always guess right. */
-#define make_tag(isfun)  do \
-if (tok.valid) {							\
-  char *name = NULL;							\
-  if (CTAGS || tok.named)						\
-    name = savestr (token_name.buffer);					\
-  pfnote (name, isfun, tok.buffer, tok.linelen, tok.lineno, tok.linepos); \
-  tok.valid = FALSE;							\
-} while (0)
+
+void
+make_C_tag (isfun)
+     bool isfun;
+{
+  /* This function should never be called when tok.valid is FALSE, but
+     we must protect against invalid input or internal errors. */
+  if (tok.valid)
+    {
+      if (traditional_tag_style)
+	{
+	  /* This was the original code.  Now we call new_pfnote instead,
+	     which uses the new method for naming tags (see new_pfnote). */
+	  char *name = NULL;
+
+	  if (CTAGS || tok.named)
+	    name = savestr (token_name.buffer);
+	  pfnote (name, isfun,
+		  tok.buffer, tok.linelen, tok.lineno, tok.linepos);
+	}
+      else
+	new_pfnote (token_name.buffer, token_name.len, isfun,
+		    tok.buffer, tok.linelen, tok.lineno, tok.linepos);
+      tok.valid = FALSE;
+    }
+  else if (DEBUG)
+    abort ();
+}
+
 
 void
 C_entries (c_ext, inf)
@@ -2105,30 +2589,38 @@ C_entries (c_ext, inf)
   register char c;		/* latest char read; '\0' for end of line */
   register char *lp;		/* pointer one beyond the character `c' */
   int curndx, newndx;		/* indices for current and new lb */
-  TOKEN tok;			/* latest token read */
   register int tokoff;		/* offset in line of start of current token */
   register int toklen;		/* length of current token */
+  char *qualifier;		/* string used to qualify names */
+  int qlen;			/* length of qualifier */
   int cblev;			/* current curly brace level */
   int parlev;			/* current parenthesis level */
-  logical incomm, inquote, inchar, quotednl, midtoken;
-  logical cplpl;
-  TOKEN savetok;		/* token saved during preprocessor handling */
+  bool incomm, inquote, inchar, quotednl, midtoken;
+  bool purec, cplpl, cjava;
+  token savetok;		/* token saved during preprocessor handling */
 
 
+  tokoff = toklen = 0;		/* keep compiler quiet */
   curndx = newndx = 0;
   lineno = 0;
   charno = 0;
   lp = curlb.buffer;
   *lp = 0;
 
-  funcdef = fnone; typdef = tnone; structdef = snone;
-  definedef = dnone; objdef = onone;
+  fvdef = fvnone; fvextern = FALSE; typdef = tnone;
+  structdef = snone; definedef = dnone; objdef = onone;
   next_token_is_func = yacc_rules = FALSE;
   midtoken = inquote = inchar = incomm = quotednl = FALSE;
   tok.valid = savetok.valid = FALSE;
   cblev = 0;
   parlev = 0;
-  cplpl = c_ext & C_PLPL;
+  purec = !(c_ext & ~YACC);	/* no extensions (apart from possibly yacc) */
+  cplpl = (c_ext & C_PLPL) == C_PLPL;
+  cjava = (c_ext & C_JAVA) == C_JAVA;
+  if (cjava)
+    { qualifier = "."; qlen = 1; }
+  else
+    { qualifier = "::"; qlen = 2; }
 
   while (!feof (inf))
     {
@@ -2160,7 +2652,7 @@ C_entries (c_ext, inf)
 	    case '\0':
 	      /* Newlines inside comments do not end macro definitions in
 		 traditional cpp. */
-	      CNL_SAVE_DEFINEDEF;
+	      CNL_SAVE_DEFINEDEF ();
 	      break;
 	    }
 	  continue;
@@ -2176,7 +2668,7 @@ C_entries (c_ext, inf)
 	      /* Newlines inside strings do not end macro definitions
 		 in traditional cpp, even though compilers don't
 		 usually accept them. */
-	      CNL_SAVE_DEFINEDEF;
+	      CNL_SAVE_DEFINEDEF ();
 	      break;
 	    }
 	  continue;
@@ -2187,7 +2679,7 @@ C_entries (c_ext, inf)
 	    {
 	    case '\0':
 	      /* Hmmm, something went wrong. */
-	      CNL;
+	      CNL ();
 	      /* FALLTHRU */
 	    case '\'':
 	      inchar = FALSE;
@@ -2200,13 +2692,19 @@ C_entries (c_ext, inf)
 	  {
 	  case '"':
 	    inquote = TRUE;
-	    if (funcdef != finlist && funcdef != fignore)
-	      funcdef = fnone;
+	    if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
+	      {
+		fvextern = FALSE;
+		fvdef = fvnone;
+	      }
 	    continue;
 	  case '\'':
 	    inchar = TRUE;
-	    if (funcdef != finlist && funcdef != fignore)
-	      funcdef = fnone;
+	    if (fvdef != finlist && fvdef != fignore && fvdef !=vignore)
+	      {
+		fvextern = FALSE;
+		fvdef = fvnone;
+	      }
 	    continue;
 	  case '/':
 	    if (*lp == '*')
@@ -2227,7 +2725,7 @@ C_entries (c_ext, inf)
 	      {
 		/* entering or exiting rules section in yacc file */
 		lp++;
-		definedef = dnone; funcdef = fnone;
+		definedef = dnone; fvdef = fvnone; fvextern = FALSE;
 		typdef = tnone; structdef = snone;
 		next_token_is_func = FALSE;
 		midtoken = inquote = inchar = incomm = quotednl = FALSE;
@@ -2241,7 +2739,7 @@ C_entries (c_ext, inf)
 	    if (definedef == dnone)
 	      {
 		char *cp;
-		logical cpptoken = TRUE;
+		bool cpptoken = TRUE;
 
 		/* Look back on this line.  If all blanks, or nonblanks
 		   followed by an end of comment, this is a preprocessor
@@ -2268,55 +2766,75 @@ C_entries (c_ext, inf)
       /* Consider token only if some complicated conditions are satisfied. */
       if ((definedef != dnone
 	   || (cblev == 0 && structdef != scolonseen)
-	   || (cblev == 1 && cplpl && structdef == sinbody))
+	   || (cblev == 1 && cplpl && structdef == sinbody)
+	   || (structdef == sinbody && purec))
 	  && typdef != tignore
 	  && definedef != dignorerest
-	  && funcdef != finlist)
+	  && fvdef != finlist)
 	{
 	  if (midtoken)
 	    {
 	      if (endtoken (c))
 		{
-		  if (c == ':' && cplpl && *lp == ':' && begtoken(*(lp + 1)))
+		  bool funorvar = FALSE;
+
+		  if (c == ':' && cplpl && *lp == ':' && begtoken (lp[1]))
 		    {
 		      /*
 		       * This handles :: in the middle, but not at the
-		       * beginning of an identifier.
+		       * beginning of an identifier.  Also, space-separated
+		       * :: is not recognised.
 		       */
 		      lp += 2;
-		      toklen += 3;
+		      toklen += 2;
+		      c = lp[-1];
+		      goto intok;
 		    }
 		  else
 		    {
-		      logical is_func = FALSE;
-
 		      if (yacc_rules
 			  || consider_token (newlb.buffer + tokoff, toklen, c,
-					     c_ext, cblev, parlev, &is_func))
+					     c_ext, cblev, parlev, &funorvar))
 			{
-			  if (structdef == sinbody
+			  if (fvdef == foperator)
+			    {
+			      char *oldlp = lp;
+			      lp = skip_spaces (lp-1);
+			      if (*lp != '\0')
+				lp += 1;
+			      while (*lp != '\0'
+				     && !isspace (*lp) && *lp != '(')
+				lp += 1;
+			      c = *lp++;
+			      toklen += lp - oldlp;
+			    }
+			  tok.named = FALSE;
+			  if (!purec
+			      && funorvar
 			      && definedef == dnone
-			      && is_func)
-			    /* function defined in C++ class body */
+			      && structdef == sinbody)
+			    /* function or var defined in C++ class body */
 			    {
-			      GROW_LINEBUFFER (token_name,
-					       strlen(structtag)+2+toklen+1);
+			      int len = strlen (structtag) + qlen + toklen;
+			      grow_linebuffer (&token_name, len + 1);
 			      strcpy (token_name.buffer, structtag);
-			      strcat (token_name.buffer, "::");
+			      strcat (token_name.buffer, qualifier);
 			      strncat (token_name.buffer,
-				       newlb.buffer+tokoff, toklen);
+				       newlb.buffer + tokoff, toklen);
+			      token_name.len = len;
 			      tok.named = TRUE;
 			    }
 			  else if (objdef == ocatseen)
 			    /* Objective C category */
 			    {
-			      GROW_LINEBUFFER (token_name,
-					       strlen(objtag)+2+toklen+1);
+			      int len = strlen (objtag) + 2 + toklen;
+			      grow_linebuffer (&token_name, len + 1);
 			      strcpy (token_name.buffer, objtag);
 			      strcat (token_name.buffer, "(");
 			      strncat (token_name.buffer,
-				       newlb.buffer+tokoff, toklen);
+				       newlb.buffer + tokoff, toklen);
 			      strcat (token_name.buffer, ")");
+			      token_name.len = len;
 			      tok.named = TRUE;
 			    }
 			  else if (objdef == omethodtag
@@ -2327,17 +2845,20 @@ C_entries (c_ext, inf)
 			    }
 			  else
 			    {
-			      GROW_LINEBUFFER (token_name, toklen+1);
+			      grow_linebuffer (&token_name, toklen + 1);
 			      strncpy (token_name.buffer,
-				       newlb.buffer+tokoff, toklen);
+				       newlb.buffer + tokoff, toklen);
 			      token_name.buffer[toklen] = '\0';
-			      if (structdef == stagseen
-				  || typdef == tend
-				  || (is_func
-				      && definedef == dignorerest)) /* macro */
-				tok.named = TRUE;
-			      else
-				tok.named = FALSE;
+			      token_name.len = toklen;
+			      /* Name macros and members. */
+			      tok.named = (structdef == stagseen
+					   || typdef == ttypeseen
+					   || typdef == tend
+					   || (funorvar
+					       && definedef == dignorerest)
+					   || (funorvar
+					       && definedef == dnone
+					       && structdef == sinbody));
 			    }
 			  tok.lineno = lineno;
 			  tok.linelen = tokoff + toklen + 1;
@@ -2346,7 +2867,8 @@ C_entries (c_ext, inf)
 			  tok.valid = TRUE;
 
 			  if (definedef == dnone
-			      && (funcdef == ftagseen
+			      && (fvdef == fvnameseen
+				  || fvdef == foperator
 				  || structdef == stagseen
 				  || typdef == tend
 				  || objdef != onone))
@@ -2355,12 +2877,13 @@ C_entries (c_ext, inf)
 				switch_line_buffers ();
 			    }
 			  else
-			    make_tag (is_func);
+			    make_C_tag (funorvar);
 			}
 		      midtoken = FALSE;
 		    }
 		} /* if (endtoken (c)) */
 	      else if (intoken (c))
+		intok:
 		{
 		  toklen++;
 		  continue;
@@ -2371,20 +2894,20 @@ C_entries (c_ext, inf)
 	      switch (definedef)
 		{
 		case dnone:
-		  switch (funcdef)
+		  switch (fvdef)
 		    {
 		    case fstartlist:
-		      funcdef = finlist;
+		      fvdef = finlist;
 		      continue;
 		    case flistseen:
-		      make_tag (TRUE);
-		      funcdef = fignore;
+		      make_C_tag (TRUE); /* a function */
+		      fvdef = fignore;
 		      break;
-		    case ftagseen:
-		      funcdef = fnone;
+		    case fvnameseen:
+		      fvdef = fvnone;
 		      break;
 		    }
-		  if (structdef == stagseen)
+		  if (structdef == stagseen && !cjava)
 		    structdef = snone;
 		  break;
 		case dsharpseen:
@@ -2412,30 +2935,32 @@ C_entries (c_ext, inf)
 	    {
 	    case  otagseen:
 	      objdef = oignore;
-	      make_tag (TRUE);
+	      make_C_tag (TRUE); /* an Objective C class */
 	      break;
 	    case omethodtag:
 	    case omethodparm:
 	      objdef = omethodcolon;
 	      methodlen += 1;
-	      GROW_LINEBUFFER (token_name, methodlen+1);
+	      grow_linebuffer (&token_name, methodlen + 1);
 	      strcat (token_name.buffer, ":");
+	      token_name.len = methodlen;
 	      break;
 	    }
 	  if (structdef == stagseen)
 	    structdef = scolonseen;
 	  else
-	    switch (funcdef)
+	    switch (fvdef)
 	      {
-	      case ftagseen:
+	      case fvnameseen:
 		if (yacc_rules)
 		  {
-		    make_tag (FALSE);
-		    funcdef = fignore;
+		    make_C_tag (FALSE); /* a yacc function */
+		    fvdef = fignore;
 		  }
 		break;
 	      case fstartlist:
-		funcdef = fnone;
+		fvextern = FALSE;
+		fvdef = fvnone;
 		break;
 	      }
 	  break;
@@ -2446,14 +2971,30 @@ C_entries (c_ext, inf)
 	    switch (typdef)
 	      {
 	      case tend:
-		make_tag (FALSE);
+		make_C_tag (FALSE); /* a typedef */
 		/* FALLTHRU */
 	      default:
 		typdef = tnone;
 	      }
-	  if (funcdef != fignore)
+	  switch (fvdef)
 	    {
-	      funcdef = fnone;
+	    case fignore:
+	      break;
+	    case fvnameseen:
+	      if ((members && cblev == 1)
+		  || (globals && cblev == 0 && (!fvextern || declarations)))
+		make_C_tag (FALSE); /* a variable */
+	      fvextern = FALSE;
+	      fvdef = fvnone;
+	      tok.valid = FALSE;
+	      break;
+	    case flistseen:
+	      if (declarations && (cblev == 0 || cblev == 1))
+		make_C_tag (TRUE); /* a function declaration */
+	      /* FALLTHRU */
+	    default:
+	      fvextern = FALSE;
+	      fvdef = fvnone;
 	      /* The following instruction invalidates the token.
 		 Probably the token should be invalidated in all
 		 other cases  where some state machine is reset. */
@@ -2469,12 +3010,25 @@ C_entries (c_ext, inf)
 	    {
 	    case omethodtag:
 	    case omethodparm:
-	      make_tag (TRUE);
+	      make_C_tag (TRUE); /* an Objective C method */
 	      objdef = oinbody;
 	      break;
 	    }
-	  if (funcdef != finlist && funcdef != fignore)
-	    funcdef = fnone;
+	  switch (fvdef)
+	    {
+	    case foperator:
+	    case finlist:
+	    case fignore:
+	    case vignore:
+	      break;
+	    case fvnameseen:
+	      if ((members && cblev == 1)
+		  || (globals && cblev == 0 && (!fvextern || declarations)))
+		make_C_tag (FALSE); /* a variable */
+	      break;
+	    default:
+	      fvdef = fvnone;
+	    }
 	  if (structdef == stagseen)
 	    structdef = snone;
 	  break;
@@ -2484,11 +3038,24 @@ C_entries (c_ext, inf)
 	  if (cblev == 0 && typdef == tend)
 	    {
 	      typdef = tignore;
-	      make_tag (FALSE);
+	      make_C_tag (FALSE);	/* a typedef */
 	      break;
 	    }
-	  if (funcdef != finlist && funcdef != fignore)
-	    funcdef = fnone;
+	  switch (fvdef)
+	    {
+	    case foperator:
+	    case finlist:
+	    case fignore:
+	    case vignore:
+	      break;
+	    case fvnameseen:
+	      if ((members && cblev == 1)
+		  || (globals && cblev == 0 && (!fvextern || declarations)))
+		make_C_tag (FALSE); /* a variable */
+	      /* FALLTHRU */
+	    default:
+	      fvdef = fvnone;
+	    }
 	  if (structdef == stagseen)
 	    structdef = snone;
 	  break;
@@ -2497,29 +3064,25 @@ C_entries (c_ext, inf)
 	    break;
 	  if (objdef == otagseen && parlev == 0)
 	    objdef = oparenseen;
-	  switch (funcdef)
+	  switch (fvdef)
 	    {
-	    case fnone:
-	      switch (typdef)
+	    case fvnameseen:
+	      if (typdef == ttypeseen
+		  && tok.valid
+		  && *lp != '*'
+		  && structdef != sinbody)
 		{
-		case ttypedseen:
-		case tend:
-		  /* Make sure that the next char is not a '*'.
-		     This handles constructs like:
+		  /* This handles constructs like:
 		     typedef void OperatorFun (int fun); */
-		  if (*lp != '*')
-		    {
-		      typdef = tignore;
-		      make_tag (FALSE);
-		    }
-		  break;
-		} /* switch (typdef) */
-	      break;
-	    case ftagseen:
-	      funcdef = fstartlist;
+		  make_C_tag (FALSE);
+		  typdef = tignore;
+		}
+	      /* FALLTHRU */
+	    case foperator:
+	      fvdef = fstartlist;
 	      break;
 	    case flistseen:
-	      funcdef = finlist;
+	      fvdef = finlist;
 	      break;
 	    }
 	  parlev++;
@@ -2529,22 +3092,22 @@ C_entries (c_ext, inf)
 	    break;
 	  if (objdef == ocatseen && parlev == 1)
 	    {
-	      make_tag (TRUE);
+	      make_C_tag (TRUE); /* an Objective C category */
 	      objdef = oignore;
 	    }
 	  if (--parlev == 0)
 	    {
-	      switch (funcdef)
+	      switch (fvdef)
 		{
 		case fstartlist:
 		case finlist:
-		  funcdef = flistseen;
+		  fvdef = flistseen;
 		  break;
 		}
-	      if (cblev == 0 && typdef == tend)
+	      if (cblev == 0 && (typdef == tend))
 		{
 		  typdef = tignore;
-		  make_tag (FALSE);
+		  make_C_tag (FALSE); /* a typedef */
 		}
 	    }
 	  else if (parlev < 0)	/* can happen due to ill-conceived #if's. */
@@ -2553,42 +3116,42 @@ C_entries (c_ext, inf)
 	case '{':
 	  if (definedef != dnone)
 	    break;
-	  if (typdef == ttypedseen)
+	  if (typdef == ttypeseen)
 	    typdef = tinbody;
 	  switch (structdef)
 	    {
 	    case skeyseen:	/* unnamed struct */
-	      structtag = "_anonymous_";
 	      structdef = sinbody;
+	      structtag = "_anonymous_";
 	      break;
 	    case stagseen:
 	    case scolonseen:	/* named struct */
 	      structdef = sinbody;
-	      make_tag (FALSE);
+	      make_C_tag (FALSE);	/* a struct */
 	      break;
 	    }
-	  switch (funcdef)
+	  switch (fvdef)
 	    {
 	    case flistseen:
-	      make_tag (TRUE);
+	      make_C_tag (TRUE); /* a function */
 	      /* FALLTHRU */
 	    case fignore:
-	      funcdef = fnone;
+	      fvdef = fvnone;
 	      break;
-	    case fnone:
+	    case fvnone:
 	      switch (objdef)
 		{
 		case otagseen:
-		  make_tag (TRUE);
+		  make_C_tag (TRUE); /* an Objective C class */
 		  objdef = oignore;
 		  break;
 		case omethodtag:
 		case omethodparm:
-		  make_tag (TRUE);
+		  make_C_tag (TRUE); /* an Objective C method */
 		  objdef = oinbody;
 		  break;
 		default:
-		  /* Neutralize `extern "C" {' grot and look inside structs. */
+		  /* Neutralize `extern "C" {' grot. */
 		  if (cblev == 0 && structdef == snone && typdef == tnone)
 		    cblev = -1;
 		}
@@ -2598,8 +3161,8 @@ C_entries (c_ext, inf)
 	case '*':
 	  if (definedef != dnone)
 	    break;
-	  if (funcdef == fstartlist)
-	    funcdef = fnone;	/* avoid tagging `foo' in `foo (*bar()) ()' */
+	  if (fvdef == fstartlist)
+	    fvdef = fvnone;	/* avoid tagging `foo' in `foo (*bar()) ()' */
 	  break;
 	case '}':
 	  if (definedef != dnone)
@@ -2627,6 +3190,25 @@ C_entries (c_ext, inf)
 	      structtag = "<error>";
 	    }
 	  break;
+	case '=':
+	  if (definedef != dnone)
+	    break;
+	  switch (fvdef)
+	    {
+	    case foperator:
+	    case finlist:
+	    case fignore:
+	    case vignore:
+	      break;
+	    case fvnameseen:
+	      if ((members && cblev == 1)
+		  || (globals && cblev == 0 && (!fvextern || declarations)))
+		make_C_tag (FALSE); /* a variable */
+	      /* FALLTHRU */
+	    default:
+	      fvdef = vignore;
+	    }
+	  break;
 	case '+':
 	case '-':
 	  if (objdef == oinbody && cblev == 0)
@@ -2635,25 +3217,33 @@ C_entries (c_ext, inf)
 	      break;
 	    }
 	  /* FALLTHRU */
-	case '=': case '#': case '~': case '&': case '%': case '/':
-	case '|': case '^': case '!': case '<': case '>': case '.': case '?':
+	case '#': case '~': case '&': case '%': case '/': case '|':
+	case '^': case '!': case '<': case '>': case '.': case '?': case ']':
 	  if (definedef != dnone)
 	    break;
-	  /* These surely cannot follow a function tag. */
-	  if (funcdef != finlist && funcdef != fignore)
-	    funcdef = fnone;
+	  /* These surely cannot follow a function tag in C. */
+	  switch (fvdef)
+	    {
+	    case foperator:
+	    case finlist:
+	    case fignore:
+	    case vignore:
+	      break;
+	    default:
+	      fvdef = fvnone;
+	    }
 	  break;
 	case '\0':
 	  if (objdef == otagseen)
 	    {
-	      make_tag (TRUE);
+	      make_C_tag (TRUE); /* an Objective C class */
 	      objdef = oignore;
 	    }
 	  /* If a macro spans multiple lines don't reset its state. */
 	  if (quotednl)
-	    CNL_SAVE_DEFINEDEF;
+	    CNL_SAVE_DEFINEDEF ();
 	  else
-	    CNL;
+	    CNL ();
 	  break;
 	} /* switch (c) */
 
@@ -2687,6 +3277,14 @@ Cplusplus_entries (inf)
   C_entries (C_PLPL, inf);
 }
 
+/* Always do Java. */
+void
+Cjava_entries (inf)
+     FILE *inf;
+{
+  C_entries (C_JAVA, inf);
+}
+
 /* Always do C*. */
 void
 Cstar_entries (inf)
@@ -2703,19 +3301,43 @@ Yacc_entries (inf)
   C_entries (YACC, inf);
 }
 
-/* Fortran parsing */
+/* A useful macro. */
+#define LOOP_ON_INPUT_LINES(file_pointer, line_buffer, char_pointer)	\
+  for (lineno = charno = 0;	/* loop initialization */		\
+       !feof (file_pointer)	/* loop test */				\
+       && (lineno++,		/* instructions at start of loop */	\
+	   linecharno = charno,						\
+	   charno += readline (&line_buffer, file_pointer),		\
+	   char_pointer = lb.buffer,					\
+	   TRUE);							\
+      )
 
-char *dbp;
 
-logical
+/*
+ * Read a file, but do no processing.  This is used to do regexp
+ * matching on files that have no language defined.
+ */
+void
+just_read_file (inf)
+     FILE *inf;
+{
+  register char *dummy;
+
+  LOOP_ON_INPUT_LINES (inf, lb, dummy)
+    continue;
+}
+
+/* Fortran parsing */
+
+bool
 tail (cp)
      char *cp;
 {
   register int len = 0;
 
-  while (*cp && lowcase(*cp) == lowcase(dbp[len]))
+  while (*cp != '\0' && lowcase (*cp) == lowcase (dbp[len]))
     cp++, len++;
-  if (*cp == '\0' && !intoken(dbp[len]))
+  if (*cp == '\0' && !intoken (dbp[len]))
     {
       dbp += len;
       return TRUE;
@@ -2726,13 +3348,11 @@ tail (cp)
 void
 takeprec ()
 {
-  while (isspace (*dbp))
-    dbp++;
+  dbp = skip_spaces (dbp);
   if (*dbp != '*')
     return;
   dbp++;
-  while (isspace (*dbp))
-    dbp++;
+  dbp = skip_spaces (dbp);
   if (strneq (dbp, "(*)", 3))
     {
       dbp += 3;
@@ -2754,8 +3374,7 @@ getit (inf)
 {
   register char *cp;
 
-  while (isspace (*dbp))
-    dbp++;
+  dbp = skip_spaces (dbp);
   if (*dbp == '\0')
     {
       lineno++;
@@ -2765,39 +3384,26 @@ getit (inf)
       if (dbp[5] != '&')
 	return;
       dbp += 6;
-      while (isspace (*dbp))
-	dbp++;
+      dbp = skip_spaces (dbp);
     }
-  if (!isalpha (*dbp)
-      && *dbp != '_'
-      && *dbp != '$')
+  if (!isalpha (*dbp) && *dbp != '_' && *dbp != '$')
     return;
-  for (cp = dbp + 1;
-       (*cp
-	&& (isalpha (*cp) || isdigit (*cp) || (*cp == '_') || (*cp == '$')));
-       cp++)
+  for (cp = dbp + 1; *cp != '\0' && intoken (*cp); cp++)
     continue;
-  pfnote ((CTAGS) ? savenstr (dbp, cp-dbp) : NULL, TRUE,
+  pfnote (savenstr (dbp, cp-dbp), TRUE,
 	  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
 }
 
+
 void
 Fortran_functions (inf)
      FILE *inf;
 {
-  lineno = 0;
-  charno = 0;
-
-  while (!feof (inf))
+  LOOP_ON_INPUT_LINES (inf, lb, dbp)
     {
-      lineno++;
-      linecharno = charno;
-      charno += readline (&lb, inf);
-      dbp = lb.buffer;
       if (*dbp == '%')
 	dbp++;			/* Ratfor escape to fortran */
-      while (isspace (*dbp))
-	dbp++;
+      dbp = skip_spaces (dbp);
       if (*dbp == '\0')
 	continue;
       switch (lowcase (*dbp))
@@ -2821,8 +3427,7 @@ Fortran_functions (inf)
 	case 'd':
 	  if (tail ("double"))
 	    {
-	      while (isspace (*dbp))
-		dbp++;
+	      dbp = skip_spaces (dbp);
 	      if (*dbp == '\0')
 		continue;
 	      if (tail ("precision"))
@@ -2831,8 +3436,7 @@ Fortran_functions (inf)
 	    }
 	  break;
 	}
-      while (isspace (*dbp))
-	dbp++;
+      dbp = skip_spaces (dbp);
       if (*dbp == '\0')
 	continue;
       switch (lowcase (*dbp))
@@ -2849,19 +3453,186 @@ Fortran_functions (inf)
 	  if (tail ("entry"))
 	    getit (inf);
 	  continue;
-	case 'p':
-	  if (tail ("program"))
+	case 'b':
+	  if (tail ("blockdata") || tail ("block data"))
 	    {
-	      getit (inf);
-	      continue;
+	      dbp = skip_spaces (dbp);
+	      if (*dbp == '\0')	/* assume un-named */
+		pfnote (savestr ("blockdata"), TRUE,
+			lb.buffer, dbp - lb.buffer, lineno, linecharno);
+	      else
+		getit (inf);	/* look for name */
 	    }
-	  if (tail ("procedure"))
-	    getit (inf);
 	  continue;
 	}
     }
 }
 
+/*
+ * Philippe Waroquiers <philippe.waroquiers@eurocontrol.be>, 1998-04-24
+ * Ada parsing
+ */
+/* Once we are positioned after an "interesting" keyword, let's get
+   the real tag value necessary. */
+void
+adagetit (inf, name_qualifier)
+     FILE *inf;
+     char *name_qualifier;
+{
+  register char *cp;
+  char *name;
+  char c;
+
+  while (!feof (inf))
+    {
+      dbp = skip_spaces (dbp);
+      if (*dbp == '\0'
+	  || (dbp[0] == '-' && dbp[1] == '-'))
+	{
+	  lineno++;
+	  linecharno = charno;
+	  charno += readline (&lb, inf);
+	  dbp = lb.buffer;
+	}
+      switch (*dbp)
+        {
+        case 'b':
+        case 'B':
+          if (tail ("body"))
+            {
+              /* Skipping body of   procedure body   or   package body or ....
+		 resetting qualifier to body instead of spec. */
+              name_qualifier = "/b";
+              continue;
+            }
+          break;
+        case 't':
+        case 'T':
+          /* Skipping type of   task type   or   protected type ... */
+          if (tail ("type"))
+            continue;
+          break;
+        }
+      if (*dbp == '"')
+	{
+	  dbp += 1;
+	  for (cp = dbp; *cp != '\0' && *cp != '"'; cp++)
+	    continue;
+	}
+      else
+	{
+	  dbp = skip_spaces (dbp);
+	  for (cp = dbp;
+	       (*cp != '\0'
+		&& (isalpha (*cp) || isdigit (*cp) || *cp == '_' || *cp == '.'));
+	       cp++)
+	    continue;
+	  if (cp == dbp)
+	    return;
+	}
+      c = *cp;
+      *cp = '\0';
+      name = concat (dbp, name_qualifier, "");
+      *cp = c;
+      pfnote (name, TRUE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+      if (c == '"')
+	dbp = cp + 1;
+      return;
+    }
+}
+
+void
+Ada_funcs (inf)
+     FILE *inf;
+{
+  bool inquote = FALSE;
+
+  LOOP_ON_INPUT_LINES (inf, lb, dbp)
+    {
+      while (*dbp != '\0')
+	{
+	  /* Skip a string i.e. "abcd". */
+	  if (inquote || (*dbp == '"'))
+	    {
+	      dbp = etags_strchr ((inquote) ? dbp : dbp+1, '"');
+	      if (dbp != NULL)
+		{
+		  inquote = FALSE;
+		  dbp += 1;
+		  continue;	/* advance char */
+		}
+	      else
+		{
+		  inquote = TRUE;
+		  break;	/* advance line */
+		}
+	    }
+
+	  /* Skip comments. */
+	  if (dbp[0] == '-' && dbp[1] == '-')
+	    break;		/* advance line */
+
+	  /* Skip character enclosed in single quote i.e. 'a'
+	     and skip single quote starting an attribute i.e. 'Image. */
+	  if (*dbp == '\'')
+	    {
+	      dbp++ ;
+	      if (*dbp != '\0')
+		dbp++;
+	      continue;
+	    }
+
+	  /* Search for beginning of a token.  */
+	  if (!begtoken (*dbp))
+	    {
+	      dbp++;
+	      continue;		/* advance char */
+	    }
+
+	  /* We are at the beginning of a token. */
+	  switch (*dbp)
+	    {
+	    case 'f':
+	    case 'F':
+	      if (!packages_only && tail ("function"))
+		adagetit (inf, "/f");
+	      else
+		break;		/* from switch */
+	      continue;		/* advance char */
+	    case 'p':
+	    case 'P':
+	      if (!packages_only && tail ("procedure"))
+		adagetit (inf, "/p");
+	      else if (tail ("package"))
+		adagetit (inf, "/s");
+	      else if (tail ("protected")) /* protected type */
+		adagetit (inf, "/t");
+	      else
+		break;		/* from switch */
+	      continue;		/* advance char */
+	    case 't':
+	    case 'T':
+	      if (!packages_only && tail ("task"))
+		adagetit (inf, "/k");
+	      else if (typedefs && !packages_only && tail ("type"))
+		{
+		  adagetit (inf, "/t");
+		  while (*dbp != '\0')
+		    dbp += 1;
+		}
+	      else
+		break;		/* from switch */
+	      continue;		/* advance char */
+	    }
+
+	  /* Look for the end of the token. */
+	  while (!endtoken (*dbp))
+	    dbp++;
+
+	} /* advance char */
+    } /* advance line */
+}
+
 /*
  * Bob Weiner, Motorola Inc., 4/3/94
  * Unix and microcontroller assembly tag handling
@@ -2873,16 +3644,8 @@ Asm_labels (inf)
 {
   register char *cp;
 
-  lineno = 0;
-  charno = 0;
-
-  while (!feof (inf))
+  LOOP_ON_INPUT_LINES (inf, lb, cp)
     {
-      lineno++;
-      linecharno = charno;
-      charno += readline (&lb, inf);
-      cp = lb.buffer;
-
       /* If first char is alphabetic or one of [_.$], test for colon
 	 following identifier. */
       if (isalpha (*cp) || *cp == '_' || *cp == '.' || *cp == '$')
@@ -2894,7 +3657,7 @@ Asm_labels (inf)
  	  if (*cp == ':' || isspace (*cp))
  	    {
  	      /* Found end of label, so copy it and add it to the table. */
- 	      pfnote ((CTAGS) ? savenstr(lb.buffer, cp-lb.buffer) : NULL, TRUE,
+ 	      pfnote (savenstr(lb.buffer, cp-lb.buffer), TRUE,
 		      lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
  	    }
  	}
@@ -2903,7 +3666,9 @@ Asm_labels (inf)
 
 /*
  * Perl support by Bart Robinson <lomew@cs.utah.edu>
+ *              enhanced by Michael Ernst <mernst@alum.mit.edu>
  * Perl sub names: look for /^sub[ \t\n]+[^ \t\n{]+/
+ * Perl variable names: /^(my|local).../
  */
 void
 Perl_functions (inf)
@@ -2911,32 +3676,134 @@ Perl_functions (inf)
 {
   register char *cp;
 
-  lineno = 0;
-  charno = 0;
-
-  while (!feof (inf))
+  LOOP_ON_INPUT_LINES (inf, lb, cp)
     {
-      lineno++;
-      linecharno = charno;
-      charno += readline (&lb, inf);
-      cp = lb.buffer;
-
-      if (*cp++ == 's' && *cp++ == 'u' && *cp++ == 'b' && isspace(*cp++))
+      if (*cp++ == 's'
+	  && *cp++ == 'u'
+	  && *cp++ == 'b' && isspace (*cp++))
 	{
-	  while (*cp && isspace(*cp))
-	    cp++;
-	  while (*cp && ! isspace(*cp) && *cp != '{')
-	    cp++;
-	  pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : NULL, TRUE,
-		  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
-	}
-    }
-}
-
-/* Added by Mosur Mohan, 4/22/88 */
-/* Pascal parsing                */
-
-/*
+	  cp = skip_spaces (cp);
+ 	  if (*cp != '\0')
+ 	    {
+	      char *sp = cp;
+ 	      while (*cp != '\0'
+		     && !isspace (*cp) && *cp != '{' && *cp != '(')
+		cp++;
+	      pfnote (savenstr (sp, cp-sp), TRUE,
+ 		      lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+ 	    }
+ 	}
+       else if (globals		/* only if tagging global vars is enabled */
+		&& ((cp = lb.buffer,
+		     *cp++ == 'm'
+		     && *cp++ == 'y')
+		    || (cp = lb.buffer,
+			*cp++ == 'l'
+			&& *cp++ == 'o'
+			&& *cp++ == 'c'
+			&& *cp++ == 'a'
+			&& *cp++ == 'l'))
+		&& (*cp == '(' || isspace (*cp)))
+ 	{
+ 	  /* After "my" or "local", but before any following paren or space. */
+ 	  char *varname = NULL;
+
+ 	  cp = skip_spaces (cp);
+ 	  if (*cp == '$' || *cp == '@' || *cp == '%')
+ 	    {
+ 	      char* varstart = ++cp;
+ 	      while (isalnum (*cp) || *cp == '_')
+ 		cp++;
+ 	      varname = savenstr (varstart, cp-varstart);
+ 	    }
+ 	  else
+ 	    {
+ 	      /* Should be examining a variable list at this point;
+ 		 could insist on seeing an open parenthesis. */
+ 	      while (*cp != '\0' && *cp != ';' && *cp != '=' &&  *cp != ')')
+ 		cp++;
+ 	    }
+
+ 	  /* Perhaps I should back cp up one character, so the TAGS table
+ 	     doesn't mention (and so depend upon) the following char. */
+ 	  pfnote ((CTAGS) ? savenstr (lb.buffer, cp-lb.buffer) : varname,
+ 		  FALSE, lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+	}
+    }
+}
+
+/*
+ * Python support by Eric S. Raymond <esr@thyrsus.com>
+ * Look for /^def[ \t\n]+[^ \t\n(:]+/ or /^class[ \t\n]+[^ \t\n(:]+/
+ */
+void
+Python_functions (inf)
+     FILE *inf;
+{
+  register char *cp;
+
+  LOOP_ON_INPUT_LINES (inf, lb, cp)
+    {
+      if (*cp++ == 'd'
+	  && *cp++ == 'e'
+	  && *cp++ == 'f' && isspace (*cp++))
+	{
+	  cp = skip_spaces (cp);
+	  while (*cp != '\0' && !isspace (*cp) && *cp != '(' && *cp != ':')
+	    cp++;
+	  pfnote (NULL, TRUE,
+		  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+	}
+
+      cp = lb.buffer;
+      if (*cp++ == 'c'
+	  && *cp++ == 'l'
+	  && *cp++ == 'a'
+	  && *cp++ == 's'
+	  && *cp++ == 's' && isspace (*cp++))
+	{
+	  cp = skip_spaces (cp);
+	  while (*cp != '\0' && !isspace (*cp) && *cp != '(' && *cp != ':')
+	    cp++;
+	  pfnote (NULL, TRUE,
+		  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+	}
+    }
+}
+
+/* Idea by Corny de Souza
+ * Cobol tag functions
+ * We could look for anything that could be a paragraph name.
+ * i.e. anything that starts in column 8 is one word and ends in a full stop.
+ */
+void
+Cobol_paragraphs (inf)
+     FILE *inf;
+{
+  register char *bp, *ep;
+
+  LOOP_ON_INPUT_LINES (inf, lb, bp)
+    {
+      if (lb.len < 9)
+	continue;
+      bp += 8;
+
+      /* If eoln, compiler option or comment ignore whole line. */
+      if (bp[-1] != ' ' || !isalnum (bp[0]))
+        continue;
+
+      for (ep = bp; isalnum (*ep) || *ep == '-'; ep++)
+	continue;
+      if (*ep++ == '.')
+	pfnote (savenstr (bp, ep-bp), TRUE,
+		lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
+    }
+}
+
+/* Added by Mosur Mohan, 4/22/88 */
+/* Pascal parsing                */
+
+/*
  *  Locates tags for procedures & functions.  Doesn't do any type- or
  *  var-definitions.  It does look for the keyword "extern" or
  *  "forward" immediately following the procedure statement; if found,
@@ -2946,12 +3813,12 @@ void
 Pascal_functions (inf)
      FILE *inf;
 {
-  struct linebuffer tline;	/* mostly copied from C_entries */
+  linebuffer tline;		/* mostly copied from C_entries */
   long save_lcno;
   int save_lineno, save_len;
   char c, *cp, *namebuf;
 
-  logical			/* each of these flags is TRUE iff: */
+  bool				/* each of these flags is TRUE iff: */
     incomment,			/* point is inside a comment */
     inquote,			/* point is inside '..' string */
     get_tagname,		/* point is after PROCEDURE/FUNCTION
@@ -2963,11 +3830,12 @@ Pascal_functions (inf)
 				   is a FORWARD/EXTERN to be ignored, or
 				   whether it is a real tag */
 
+  save_lcno = save_lineno = save_len = 0; /* keep compiler quiet */
+  namebuf = NULL;		/* keep compiler quiet */
   lineno = 0;
   charno = 0;
   dbp = lb.buffer;
   *dbp = '\0';
-  save_len = 0;
   initbuffer (&tline);
 
   incomment = inquote = FALSE;
@@ -2976,8 +3844,8 @@ Pascal_functions (inf)
   inparms = FALSE;		/* found '(' after "proc"            */
   verify_tag = FALSE;		/* check if "extern" is ahead        */
 
-  /* long main loop to get next char */
-  while (!feof (inf))
+
+  while (!feof (inf))		/* long main loop to get next char */
     {
       c = *dbp++;
       if (c == '\0')		/* if end of line */
@@ -2988,8 +3856,8 @@ Pascal_functions (inf)
 	  dbp = lb.buffer;
 	  if (*dbp == '\0')
 	    continue;
-	  if (!((found_tag && verify_tag) ||
-		get_tagname))
+	  if (!((found_tag && verify_tag)
+		|| get_tagname))
 	    c = *dbp++;		/* only if don't need *dbp pointing
 				   to the beginning of the name of
 				   the procedure or function */
@@ -3077,15 +3945,15 @@ Pascal_functions (inf)
 	    continue;
 
 	  /* save all values for later tagging */
-	  GROW_LINEBUFFER (tline, strlen (lb.buffer) + 1);
+	  grow_linebuffer (&tline, lb.len + 1);
 	  strcpy (tline.buffer, lb.buffer);
 	  save_lineno = lineno;
 	  save_lcno = linecharno;
 
 	  /* grab block name */
-	  for (cp = dbp + 1; *cp && (!endtoken (*cp)); cp++)
+	  for (cp = dbp + 1; *cp != '\0' && !endtoken (*cp); cp++)
 	    continue;
-	  namebuf = (CTAGS) ? savenstr (dbp, cp-dbp) : NULL;
+	  namebuf = savenstr (dbp, cp-dbp);
 	  dbp = cp;		/* set dbp to e-o-token */
 	  save_len = dbp - lb.buffer + 1;
 	  get_tagname = FALSE;
@@ -3131,12 +3999,12 @@ int
 L_isquote (strp)
      register char *strp;
 {
-  return ((*(++strp) == 'q' || *strp == 'Q')
-	  && (*(++strp) == 'u' || *strp == 'U')
-	  && (*(++strp) == 'o' || *strp == 'O')
-	  && (*(++strp) == 't' || *strp == 'T')
-	  && (*(++strp) == 'e' || *strp == 'E')
-	  && isspace(*(++strp)));
+  return ((*++strp == 'q' || *strp == 'Q')
+	  && (*++strp == 'u' || *strp == 'U')
+	  && (*++strp == 'o' || *strp == 'O')
+	  && (*++strp == 't' || *strp == 'T')
+	  && (*++strp == 'e' || *strp == 'E')
+	  && isspace (*++strp));
 }
 
 void
@@ -3146,20 +4014,23 @@ L_getit ()
 
   if (*dbp == '\'')		/* Skip prefix quote */
     dbp++;
-  else if (*dbp == '(' && L_isquote (dbp)) /* Skip "(quote " */
+  else if (*dbp == '(')
   {
-    dbp += 7;
-    while (isspace(*dbp))
-      dbp++;
+    if (L_isquote (dbp))
+      dbp += 7;			/* Skip "(quote " */
+    else
+      dbp += 1;			/* Skip "(" before name in (defstruct (foo)) */
+    dbp = skip_spaces (dbp);
   }
+
   for (cp = dbp /*+1*/;
-       *cp && *cp != '(' && *cp != ' ' && *cp != ')';
+       *cp != '\0' && *cp != '(' && !isspace(*cp) && *cp != ')';
        cp++)
     continue;
   if (cp == dbp)
     return;
 
-  pfnote ((CTAGS) ? savenstr (dbp, cp-dbp) : NULL, TRUE,
+  pfnote (savenstr (dbp, cp-dbp), TRUE,
 	  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
 }
 
@@ -3167,23 +4038,14 @@ void
 Lisp_functions (inf)
      FILE *inf;
 {
-  lineno = 0;
-  charno = 0;
-
-  while (!feof (inf))
+  LOOP_ON_INPUT_LINES (inf, lb, dbp)
     {
-      lineno++;
-      linecharno = charno;
-      charno += readline (&lb, inf);
-      dbp = lb.buffer;
       if (dbp[0] == '(')
 	{
 	  if (L_isdef (dbp))
 	    {
-	      while (!isspace (*dbp))
-		dbp++;
-	      while (isspace (*dbp))
-		dbp++;
+	      dbp = skip_non_spaces (dbp);
+	      dbp = skip_spaces (dbp);
 	      L_getit ();
 	    }
 	  else
@@ -3191,7 +4053,7 @@ Lisp_functions (inf)
 	      /* Check for (foo::defmumble name-defined ... */
 	      do
 		dbp++;
-	      while (*dbp && !isspace (*dbp)
+	      while (*dbp != '\0' && !isspace (*dbp)
 		     && *dbp != ':' && *dbp != '(' && *dbp != ')');
 	      if (*dbp == ':')
 		{
@@ -3201,10 +4063,8 @@ Lisp_functions (inf)
 
 		  if (L_isdef (dbp - 1))
 		    {
-		      while (!isspace (*dbp))
-			dbp++;
-		      while (isspace (*dbp))
-			dbp++;
+		      dbp = skip_non_spaces (dbp);
+		      dbp = skip_spaces (dbp);
 		      L_getit ();
 		    }
 		}
@@ -3213,6 +4073,40 @@ Lisp_functions (inf)
     }
 }
 
+/*
+ * Postscript tag functions
+ * Just look for lines where the first character is '/'
+ * Richard Mlynarik <mly@adoc.xerox.com>
+ * Also look at "defineps" for PSWrap
+ * suggested by Masatake YAMATO <masata-y@is.aist-nara.ac.jp>
+ */
+void
+Postscript_functions (inf)
+     FILE *inf;
+{
+  register char *bp, *ep;
+
+  LOOP_ON_INPUT_LINES (inf, lb, bp)
+    {
+      if (bp[0] == '/')
+	{
+	  for (ep = bp+1;
+	       *ep != '\0' && *ep != ' ' && *ep != '{';
+	       ep++)
+	    continue;
+	  pfnote (savenstr (bp, ep-bp), TRUE,
+		  lb.buffer, ep - lb.buffer + 1, lineno, linecharno);
+	}
+      else if (strneq (bp, "defineps", 8))
+	{
+	  bp = skip_non_spaces (bp);
+ 	  bp = skip_spaces (bp);
+	  get_tag (bp);
+	}
+    }
+}
+
+
 /*
  * Scheme tag functions
  * look for (def... xyzzy
@@ -3221,65 +4115,38 @@ Lisp_functions (inf)
  * look for (set! xyzzy
  */
 
-void get_scheme ();
-
 void
 Scheme_functions (inf)
      FILE *inf;
 {
-  lineno = 0;
-  charno = 0;
+  register char *bp;
 
-  while (!feof (inf))
+  LOOP_ON_INPUT_LINES (inf, lb, bp)
     {
-      lineno++;
-      linecharno = charno;
-      charno += readline (&lb, inf);
-      dbp = lb.buffer;
-      if (dbp[0] == '(' &&
-	  (dbp[1] == 'D' || dbp[1] == 'd') &&
-	  (dbp[2] == 'E' || dbp[2] == 'e') &&
-	  (dbp[3] == 'F' || dbp[3] == 'f'))
+      if (bp[0] == '('
+	  && (bp[1] == 'D' || bp[1] == 'd')
+	  && (bp[2] == 'E' || bp[2] == 'e')
+	  && (bp[3] == 'F' || bp[3] == 'f'))
 	{
-	  while (!isspace (*dbp))
-	    dbp++;
+	  bp = skip_non_spaces (bp);
 	  /* Skip over open parens and white space */
-	  while (*dbp && (isspace (*dbp) || *dbp == '('))
-	    dbp++;
-	  get_scheme ();
+	  while (isspace (*bp) || *bp == '(')
+	    bp++;
+	  get_tag (bp);
 	}
-      if (dbp[0] == '(' &&
-	  (dbp[1] == 'S' || dbp[1] == 's') &&
-	  (dbp[2] == 'E' || dbp[2] == 'e') &&
-	  (dbp[3] == 'T' || dbp[3] == 't') &&
-	  (dbp[4] == '!' || dbp[4] == '!') &&
-	  (isspace (dbp[5])))
+      if (bp[0] == '('
+	  && (bp[1] == 'S' || bp[1] == 's')
+	  && (bp[2] == 'E' || bp[2] == 'e')
+	  && (bp[3] == 'T' || bp[3] == 't')
+	  && (bp[4] == '!' || bp[4] == '!')
+	  && (isspace (bp[5])))
 	{
-	  while (!isspace (*dbp))
-	    dbp++;
-	  /* Skip over white space */
-	  while (isspace (*dbp))
-	    dbp++;
-	  get_scheme ();
+	  bp = skip_non_spaces (bp);
+	  bp = skip_spaces (bp);
+	  get_tag (bp);
 	}
     }
 }
-
-void
-get_scheme ()
-{
-  register char *cp;
-
-  if (*dbp == '\0')
-    return;
-  /* Go till you get to white space or a syntactic break */
-  for (cp = dbp + 1;
-       *cp && *cp != '(' && *cp != ')' && !isspace (*cp);
-       cp++)
-    continue;
-  pfnote ((CTAGS) ? savenstr (dbp, cp-dbp) : NULL, TRUE,
-	  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
-}
 
 /* Find tags in TeX and LaTeX input files.  */
 
@@ -3304,9 +4171,6 @@ char *TEX_defenv = "\
 void TEX_mode ();
 struct TEX_tabent *TEX_decode_env ();
 int TEX_Token ();
-#if TeX_named_tokens
-void TEX_getit ();
-#endif
 
 char TEX_esc = '\\';
 char TEX_opgrp = '{';
@@ -3319,10 +4183,8 @@ void
 TeX_functions (inf)
      FILE *inf;
 {
-  char *lasthit;
-
-  lineno = 0;
-  charno = 0;
+  char *cp, *lasthit;
+  register int i;
 
   /* Select either \ or ! as escape character.  */
   TEX_mode (inf);
@@ -3331,30 +4193,28 @@ TeX_functions (inf)
   if (!TEX_toktab)
     TEX_toktab = TEX_decode_env ("TEXTAGS", TEX_defenv);
 
-  while (!feof (inf))
-    {				/* Scan each line in file */
-      lineno++;
-      linecharno = charno;
-      charno += readline (&lb, inf);
-      dbp = lb.buffer;
-      lasthit = dbp;
-      while (dbp = etags_strchr (dbp, TEX_esc)) /* Look at each esc in line */
+  LOOP_ON_INPUT_LINES (inf, lb, cp)
+    {
+      lasthit = cp;
+      /* Look at each esc in line. */
+      while ((cp = etags_strchr (cp, TEX_esc)) != NULL)
 	{
-	  register int i;
-
-	  if (!*(++dbp))
+	  if (*++cp == '\0')
 	    break;
-	  linecharno += dbp - lasthit;
-	  lasthit = dbp;
+	  linecharno += cp - lasthit;
+	  lasthit = cp;
 	  i = TEX_Token (lasthit);
-	  if (0 <= i)
+	  if (i >= 0)
 	    {
-	      pfnote (NULL, TRUE,
-		      lb.buffer, strlen (lb.buffer), lineno, linecharno);
-#if TeX_named_tokens
-	      TEX_getit (lasthit, TEX_toktab[i].len);
-#endif
-	      break;		/* We only save a line once */
+	      /* We seem to include the TeX command in the tag name.
+	      register char *p;
+	      for (p = lasthit + TEX_toktab[i].len;
+		   *p != '\0' && *p != TEX_clgrp;
+		   p++)
+		continue; */
+	      pfnote (/*savenstr (lasthit, p-lasthit)*/ (char *)NULL, TRUE,
+		      lb.buffer, lb.len, lineno, linecharno);
+	      break;		/* We only tag a line once */
 	    }
 	}
     }
@@ -3394,6 +4254,8 @@ TEX_mode (inf)
       TEX_opgrp = '<';
       TEX_clgrp = '>';
     }
+  /* If the input file is compressed, inf is a pipe, and rewind may fail.
+     No attempt is made to correct the situation. */
   rewind (inf);
 }
 
@@ -3414,11 +4276,14 @@ TEX_decode_env (evarname, defenv)
   if (!env)
     env = defenv;
   else
-    env = concat (env, defenv, "");
+    {
+      char *oldenv = env;
+      env = concat (oldenv, defenv, "");
+    }
 
   /* Allocate a token table */
   for (size = 1, p = env; p;)
-    if ((p = etags_strchr (p, ':')) && *(++p))
+    if ((p = etags_strchr (p, ':')) && *++p != '\0')
       size++;
   /* Add 1 to leave room for null terminator.  */
   tab = xnew (size + 1, struct TEX_tabent);
@@ -3448,32 +4313,10 @@ TEX_decode_env (evarname, defenv)
   return tab;
 }
 
-#if TeX_named_tokens
-/* Record a tag defined by a TeX command of length LEN and starting at NAME.
-   The name being defined actually starts at (NAME + LEN + 1).
-   But we seem to include the TeX command in the tag name.  */
-void
-TEX_getit (name, len)
-     char *name;
-     int len;
-{
-  char *p = name + len;
-
-  if (*name == '\0')
-    return;
-
-  /* Let tag name extend to next group close (or end of line) */
-  while (*p && *p != TEX_clgrp)
-    p++;
-  pfnote (savenstr (name, p-name), TRUE,
-	  lb.buffer, strlen (lb.buffer), lineno, linecharno);
-}
-#endif
-
 /* If the text at CP matches one of the tag-defining TeX command names,
    return the pointer to the first occurrence of that command in TEX_toktab.
    Otherwise return -1.
-   Keep the capital `T' in `Token' for dumb truncating compilers
+   Keep the capital `T' in `token' for dumb truncating compilers
    (this distinguishes it from `TEX_toktab' */
 int
 TEX_Token (cp)
@@ -3491,16 +4334,17 @@ TEX_Token (cp)
  * Prolog support (rewritten) by Anders Lindgren, Mar. 96
  *
  * Assumes that the predicate starts at column 0.
- * Only the first clause of a predicate is added. 
+ * Only the first clause of a predicate is added.
  */
+int prolog_pred ();
+void prolog_skip_comment ();
+int prolog_atom ();
+
 void
 Prolog_functions (inf)
      FILE *inf;
 {
-  int prolog_pred ();
-  void prolog_skip_comment ();
-
-  char * last;
+  char *cp, *last;
   int len;
   int allocated;
 
@@ -3508,32 +4352,24 @@ Prolog_functions (inf)
   len = 0;
   last = NULL;
 
-  lineno = 0;
-  linecharno = 0;
-  charno = 0;
-
-  while (!feof (inf))
+  LOOP_ON_INPUT_LINES (inf, lb, cp)
     {
-      lineno++;
-      linecharno += charno;
-      charno = readline (&lb, inf);
-      dbp = lb.buffer;
-      if (dbp[0] == '\0')	/* Empty line */
+      if (cp[0] == '\0')	/* Empty line */
 	continue;
-      else if (isspace (dbp[0])) /* Not a predicate */
+      else if (isspace (cp[0])) /* Not a predicate */
 	continue;
-      else if (dbp[0] == '/' && dbp[1] == '*')	/* comment. */
-	prolog_skip_comment (&lb, inf, &lineno, &linecharno);
-      else if (len = prolog_pred (dbp, last)) 
+      else if (cp[0] == '/' && cp[1] == '*')	/* comment. */
+	prolog_skip_comment (&lb, inf);
+      else if ((len = prolog_pred (cp, last)) > 0)
 	{
 	  /* Predicate.  Store the function name so that we only
-	   * generates a tag for the first clause.  */
+	     generate a tag for the first clause.  */
 	  if (last == NULL)
 	    last = xnew(len + 1, char);
 	  else if (len + 1 > allocated)
-	    last = (char *) xrealloc(last, len + 1);
+	    last = xrnew (last, len + 1, char);
 	  allocated = len + 1;
-	  strncpy (last, dbp, len);
+	  strncpy (last, cp, len);
 	  last[len] = '\0';
 	}
     }
@@ -3542,7 +4378,7 @@ Prolog_functions (inf)
 
 void
 prolog_skip_comment (plb, inf)
-     struct linebuffer *plb;
+     linebuffer *plb;
      FILE *inf;
 {
   char *cp;
@@ -3573,18 +4409,15 @@ prolog_pred (s, last)
      char *s;
      char *last;		/* Name of last clause. */
 {
-  int prolog_atom();
-  int prolog_white();
-
   int pos;
   int len;
 
-  pos = prolog_atom(s, 0);
+  pos = prolog_atom (s, 0);
   if (pos < 1)
     return 0;
 
   len = pos;
-  pos += prolog_white(s, pos);
+  pos = skip_spaces (s + pos) - s;
 
   if ((s[pos] == '(') || (s[pos] == '.'))
     {
@@ -3592,12 +4425,11 @@ prolog_pred (s, last)
 	pos++;
 
       /* Save only the first clause. */
-      if ((last == NULL) ||
-	  (len != strlen(last)) ||
-	  (strncmp(s, last, len) != 0))
+      if (last == NULL
+	  || len != (int)strlen (last)
+	  || !strneq (s, last, len))
 	{
-	  pfnote ((CTAGS) ? savenstr (s, len) : NULL, TRUE,
-		  s, pos, lineno, linecharno);
+	  pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
 	  return len;
 	}
     }
@@ -3636,7 +4468,7 @@ prolog_atom (s, pos)
     {
       pos++;
 
-      while (1) 
+      while (1)
 	{
 	  if (s[pos] == '\'')
 	    {
@@ -3662,38 +4494,23 @@ prolog_atom (s, pos)
   else
     return -1;
 }
-
-/* Consume whitespace.  Return the number of bytes eaten. */
-int
-prolog_white (s, pos)
-     char *s;
-     int pos;
-{
-  int origpos;
-
-  origpos = pos;
-
-  while (isspace(s[pos]))
-    pos++;
-
-  return pos - origpos;
-}
 
-/* 
+/*
  * Support for Erlang  --  Anders Lindgren, Feb 1996.
  *
  * Generates tags for functions, defines, and records.
  *
  * Assumes that Erlang functions start at column 0.
  */
+int erlang_func ();
+void erlang_attribute ();
+int erlang_atom ();
+
 void
 Erlang_functions (inf)
      FILE *inf;
 {
-  int erlang_func ();
-  void erlang_attribute ();
-
-  char * last;
+  char *cp, *last;
   int len;
   int allocated;
 
@@ -3701,41 +4518,33 @@ Erlang_functions (inf)
   len = 0;
   last = NULL;
 
-  lineno = 0;
-  linecharno = 0;
-  charno = 0;
-
-  while (!feof (inf))
+  LOOP_ON_INPUT_LINES (inf, lb, cp)
     {
-      lineno++;
-      linecharno += charno;
-      charno = readline (&lb, inf);
-      dbp = lb.buffer;
-      if (dbp[0] == '\0')	/* Empty line */
+      if (cp[0] == '\0')	/* Empty line */
 	continue;
-      else if (isspace (dbp[0])) /* Not function nor attribute */
+      else if (isspace (cp[0])) /* Not function nor attribute */
 	continue;
-      else if (dbp[0] == '%')	/* comment */
+      else if (cp[0] == '%')	/* comment */
 	continue;
-      else if (dbp[0] == '"')	/* Sometimes, strings start in column one */
+      else if (cp[0] == '"')	/* Sometimes, strings start in column one */
 	continue;
-      else if (dbp[0] == '-') 	/* attribute, e.g. "-define" */
+      else if (cp[0] == '-') 	/* attribute, e.g. "-define" */
 	{
-	  erlang_attribute(dbp);
+	  erlang_attribute (cp);
 	  last = NULL;
 	}
-      else if (len = erlang_func (dbp, last)) 
+      else if ((len = erlang_func (cp, last)) > 0)
 	{
-	  /* 
+	  /*
 	   * Function.  Store the function name so that we only
 	   * generates a tag for the first clause.
 	   */
 	  if (last == NULL)
-	    last = xnew(len + 1, char);
+	    last = xnew (len + 1, char);
 	  else if (len + 1 > allocated)
-	    last = (char *) xrealloc(last, len + 1);
+	    last = xrnew (last, len + 1, char);
 	  allocated = len + 1;
-	  strncpy (last, dbp, len);
+	  strncpy (last, cp, len);
 	  last[len] = '\0';
 	}
     }
@@ -3757,37 +4566,32 @@ erlang_func (s, last)
      char *s;
      char *last;		/* Name of last clause. */
 {
-  int erlang_atom ();
-  int erlang_white ();
-
   int pos;
   int len;
 
-  pos = erlang_atom(s, 0);
+  pos = erlang_atom (s, 0);
   if (pos < 1)
     return 0;
 
   len = pos;
-  pos += erlang_white(s, pos);
+  pos = skip_spaces (s + pos) - s;
 
-  if (s[pos++] == '(')
-    {
-      /* Save only the first clause. */
-      if ((last == NULL) ||
-	  (len != strlen(last)) ||
-	  (strncmp(s, last, len) != 0))
+  /* Save only the first clause. */
+  if (s[pos++] == '('
+      && (last == NULL
+	  || len != (int)strlen (last)
+	  || !strneq (s, last, len)))
 	{
-	  pfnote ((CTAGS) ? savenstr (s, len) : NULL, TRUE,
-		  s, pos, lineno, linecharno);
+	  pfnote (savenstr (s, len), TRUE, s, pos, lineno, linecharno);
 	  return len;
 	}
-    }
+
   return 0;
 }
 
 
 /*
- * Handle attributes.  Currently, tags are generated for defines 
+ * Handle attributes.  Currently, tags are generated for defines
  * and records.
  *
  * They are on the form:
@@ -3799,27 +4603,19 @@ void
 erlang_attribute (s)
      char *s;
 {
-  int erlang_atom ();
-  int erlang_white ();
-
   int pos;
   int len;
 
-  if ((strncmp(s, "-define", 7) == 0) ||
-      (strncmp(s, "-record", 7) == 0))
+  if (strneq (s, "-define", 7) || strneq (s, "-record", 7))
     {
-      pos = 7;
-      pos += erlang_white(s, pos);
-
-      if (s[pos++] == '(') 
+      pos = skip_spaces (s + 7) - s;
+      if (s[pos++] == '(')
 	{
-	  pos += erlang_white(s, pos);
-	
-	  if (len = erlang_atom(s, pos))
-	    {
-	      pfnote ((CTAGS) ? savenstr (& s[pos], len) : NULL, TRUE,
-		      s, pos + len, lineno, linecharno);
-	    }
+	  pos = skip_spaces (s + pos) - s;
+	  len = erlang_atom (s, pos);
+	  if (len != 0)
+	    pfnote (savenstr (& s[pos], len), TRUE,
+		    s, pos + len, lineno, linecharno);
 	}
     }
   return;
@@ -3851,7 +4647,7 @@ erlang_atom (s, pos)
     {
       pos++;
 
-      while (1) 
+      while (1)
 	{
 	  if (s[pos] == '\'')
 	    {
@@ -3875,24 +4671,9 @@ erlang_atom (s, pos)
   else
     return -1;
 }
-
-/* Consume whitespace.  Return the number of bytes eaten */
-int
-erlang_white (s, pos)
-     char *s;
-     int pos;
-{
-  int origpos;
-
-  origpos = pos;
-
-  while (isspace (s[pos]))
-    pos++;
-
-  return pos - origpos;
-}
 
 #ifdef ETAGS_REGEXPS
+
 /* Take a string like "/blah/" and turn it into "blah", making sure
    that the first and last characters are the same, and handling
    quoted separator characters.  Actually, stops on the occurrence of
@@ -3905,7 +4686,7 @@ scan_separators (name)
 {
   char sep = name[0];
   char *copyto = name;
-  logical quoted = FALSE;
+  bool quoted = FALSE;
 
   for (++name; *name != '\0'; ++name)
     {
@@ -3936,29 +4717,90 @@ scan_separators (name)
   return name;
 }
 
+/* Look at the argument of --regex or --no-regex and do the right
+   thing.  Same for each line of a regexp file. */
+void
+analyse_regex (regex_arg, ignore_case)
+     char *regex_arg;
+     bool ignore_case;
+{
+  if (regex_arg == NULL)
+    free_patterns ();		/* --no-regex: remove existing regexps */
+
+  /* A real --regexp option or a line in a regexp file. */
+  switch (regex_arg[0])
+    {
+      /* Comments in regexp file or null arg to --regex. */
+    case '\0':
+    case ' ':
+    case '\t':
+      break;
+
+      /* Read a regex file.  This is recursive and may result in a
+	 loop, which will stop when the file descriptors are exhausted. */
+    case '@':
+      {
+	FILE *regexfp;
+	linebuffer regexbuf;
+	char *regexfile = regex_arg + 1;
+
+	/* regexfile is a file containing regexps, one per line. */
+	regexfp = fopen (regexfile, "r");
+	if (regexfp == NULL)
+	  {
+	    pfatal (regexfile);
+	    return;
+	  }
+	initbuffer (&regexbuf);
+	while (readline_internal (&regexbuf, regexfp) > 0)
+	  analyse_regex (regexbuf.buffer, ignore_case);
+	free (regexbuf.buffer);
+	fclose (regexfp);
+      }
+      break;
+
+      /* Regexp to be used for a specific language only. */
+    case '{':
+      {
+	language *lang;
+	char *lang_name = regex_arg + 1;
+	char *cp;
+
+	for (cp = lang_name; *cp != '}'; cp++)
+	  if (*cp == '\0')
+	    {
+	      error ("unterminated language name in regex: %s", regex_arg);
+	      return;
+	    }
+	*cp = '\0';
+	lang = get_language_from_name (lang_name);
+	if (lang == NULL)
+	  return;
+	add_regex (cp + 1, ignore_case, lang);
+      }
+      break;
+
+      /* Regexp to be used for any language. */
+    default:
+      add_regex (regex_arg, ignore_case, NULL);
+      break;
+    }
+}
+
 /* Turn a name, which is an ed-style (but Emacs syntax) regular
    expression, into a real regular expression by compiling it. */
 void
-add_regex (regexp_pattern)
+add_regex (regexp_pattern, ignore_case, lang)
      char *regexp_pattern;
+     bool ignore_case;
+     language *lang;
 {
   char *name;
   const char *err;
   struct re_pattern_buffer *patbuf;
+  pattern *pp;
 
-  if (regexp_pattern == NULL)
-    {
-      /* Remove existing regexps. */
-      num_patterns = 0;
-      patterns = NULL;
-      return;
-    }
 
-  if (regexp_pattern[0] == '\0')
-    {
-      error ("missing regexp", 0);
-      return;
-    }
   if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
     {
       error ("%s: unterminated regexp", regexp_pattern);
@@ -3967,13 +4809,14 @@ add_regex (regexp_pattern)
   name = scan_separators (regexp_pattern);
   if (regexp_pattern[0] == '\0')
     {
-      error ("null regexp", 0);
+      error ("null regexp", (char *)NULL);
       return;
     }
   (void) scan_separators (name);
 
   patbuf = xnew (1, struct re_pattern_buffer);
-  patbuf->translate = NULL;
+  /* Translation table to fold case if appropriate. */
+  patbuf->translate = (ignore_case) ? lc_trans : NULL;
   patbuf->fastmap = NULL;
   patbuf->buffer = NULL;
   patbuf->allocated = 0;
@@ -3985,16 +4828,14 @@ add_regex (regexp_pattern)
       return;
     }
 
-  num_patterns += 1;
-  if (num_patterns == 1)
-    patterns = xnew (1, struct pattern);
-  else
-    patterns = ((struct pattern *)
-		xrealloc (patterns,
-			  (num_patterns * sizeof (struct pattern))));
-  patterns[num_patterns - 1].pattern = patbuf;
-  patterns[num_patterns - 1].name_pattern = savestr (name);
-  patterns[num_patterns - 1].error_signaled = FALSE;
+  pp = p_head;
+  p_head = xnew (1, pattern);
+  p_head->regex = savestr (regexp_pattern);
+  p_head->p_next = pp;
+  p_head->language = lang;
+  p_head->pattern = patbuf;
+  p_head->name_pattern = savestr (name);
+  p_head->error_signaled = FALSE;
 }
 
 /*
@@ -4006,93 +4847,125 @@ substitute (in, out, regs)
      char *in, *out;
      struct re_registers *regs;
 {
-  char *result = NULL, *t;
-  int size = 0;
-
-  /* Pass 1: figure out how much size to allocate. */
-  for (t = out; *t; ++t)
-    {
-      if (*t == '\\')
-	{
-	  ++t;
-	  if (!*t)
-	    {
-	      fprintf (stderr, "%s: pattern substitution ends prematurely\n",
-		       progname);
-	      return NULL;
-	    }
-	  if (isdigit (*t))
-	    {
-	      int dig = *t - '0';
-	      size += regs->end[dig] - regs->start[dig];
-	    }
-	}
-    }
+  char *result, *t;
+  int size, dig, diglen;
+
+  result = NULL;
+  size = strlen (out);
+
+  /* Pass 1: figure out how much to allocate by finding all \N strings. */
+  if (out[size - 1] == '\\')
+    fatal ("pattern error in \"%s\"", out);
+  for (t = etags_strchr (out, '\\');
+       t != NULL;
+       t = etags_strchr (t + 2, '\\'))
+    if (isdigit (t[1]))
+      {
+	dig = t[1] - '0';
+	diglen = regs->end[dig] - regs->start[dig];
+	size += diglen - 2;
+      }
+    else
+      size -= 1;
 
   /* Allocate space and do the substitutions. */
   result = xnew (size + 1, char);
-  size = 0;
-  for (; *out; ++out)
-    {
-      if (*out == '\\')
-	{
-	  ++out;
-	  if (isdigit (*out))
-	    {
-	      /* Using "dig2" satisfies my debugger.  Bleah. */
-	      int dig2 = *out - '0';
-	      strncpy (result + size, in + regs->start[dig2],
-		       regs->end[dig2] - regs->start[dig2]);
-	      size += regs->end[dig2] - regs->start[dig2];
-	    }
-	  else
-	    result[size++] = *out;
-	}
-      else
-	result[size++] = *out;
-    }
-  result[size] = '\0';
+
+  for (t = result; *out != '\0'; out++)
+    if (*out == '\\' && isdigit (*++out))
+      {
+	/* Using "dig2" satisfies my debugger.  Bleah. */
+	dig = *out - '0';
+	diglen = regs->end[dig] - regs->start[dig];
+	strncpy (t, in + regs->start[dig], diglen);
+	t += diglen;
+      }
+    else
+      *t++ = *out;
+  *t = '\0';
+
+  if (DEBUG && (t > result + size || t - result != (int)strlen (result)))
+    abort ();
 
   return result;
 }
+
+/* Deallocate all patterns. */
+void
+free_patterns ()
+{
+  pattern *pp;
+  while (p_head != NULL)
+    {
+      pp = p_head->p_next;
+      free (p_head->regex);
+      free (p_head->name_pattern);
+      free (p_head);
+      p_head = pp;
+    }
+  return;
+}
 
+void
+get_tag (bp)
+     register char *bp;
+{
+  register char *cp;
+
+  if (*bp == '\0')
+    return;
+  /* Go till you get to white space or a syntactic break */
+  for (cp = bp + 1;
+       *cp != '\0' && *cp != '(' && *cp != ')' && !isspace (*cp);
+       cp++)
+    continue;
+  pfnote (savenstr (bp, cp-bp), TRUE,
+	  lb.buffer, cp - lb.buffer + 1, lineno, linecharno);
+}
+
 #endif /* ETAGS_REGEXPS */
 /* Initialize a linebuffer for use */
 void
-initbuffer (linebuffer)
-     struct linebuffer *linebuffer;
+initbuffer (lbp)
+     linebuffer *lbp;
 {
-  linebuffer->size = 200;
-  linebuffer->buffer = xnew (200, char);
+  lbp->size = 200;
+  lbp->buffer = xnew (200, char);
 }
 
 /*
- * Read a line of text from `stream' into `linebuffer'.
- * Return the number of characters read from `stream',
- * which is the length of the line including the newline, if any.
+ * Read a line of text from `stream' into `lbp', excluding the
+ * newline or CR-NL, if any.  Return the number of characters read from
+ * `stream', which is the length of the line including the newline.
+ *
+ * On DOS or Windows we do not count the CR character, if any, before the
+ * NL, in the returned length; this mirrors the behavior of emacs on those
+ * platforms (for text files, it translates CR-NL to NL as it reads in the
+ * file).
  */
 long
-readline_internal (linebuffer, stream)
-     struct linebuffer *linebuffer;
+readline_internal (lbp, stream)
+     linebuffer *lbp;
      register FILE *stream;
 {
-  char *buffer = linebuffer->buffer;
-  register char *p = linebuffer->buffer;
+  char *buffer = lbp->buffer;
+  register char *p = lbp->buffer;
   register char *pend;
   int chars_deleted;
 
-  pend = p + linebuffer->size;	/* Separate to avoid 386/IX compiler bug.  */
+  pend = p + lbp->size;		/* Separate to avoid 386/IX compiler bug.  */
 
   while (1)
     {
       register int c = getc (stream);
       if (p == pend)
 	{
-	  linebuffer->size *= 2;
-	  buffer = (char *) xrealloc (buffer, linebuffer->size);
-	  p += buffer - linebuffer->buffer;
-	  pend = buffer + linebuffer->size;
-	  linebuffer->buffer = buffer;
+	  /* We're at the end of linebuffer: expand it. */
+	  lbp->size *= 2;
+	  buffer = xrnew (buffer, lbp->size, char);
+	  p += buffer - lbp->buffer;
+	  pend = buffer + lbp->size;
+	  lbp->buffer = buffer;
 	}
       if (c == EOF)
 	{
@@ -4104,99 +4977,92 @@ readline_internal (linebuffer, stream)
 	{
 	  if (p > buffer && p[-1] == '\r')
 	    {
-	      *--p = '\0';
+	      p -= 1;
+#ifdef DOS_NT
+	     /* Assume CRLF->LF translation will be performed by Emacs
+		when loading this file, so CRs won't appear in the buffer.
+		It would be cleaner to compensate within Emacs;
+		however, Emacs does not know how many CRs were deleted
+		before any given point in the file.  */
+	      chars_deleted = 1;
+#else
 	      chars_deleted = 2;
+#endif
 	    }
 	  else
 	    {
-	      *p = '\0';
 	      chars_deleted = 1;
 	    }
+	  *p = '\0';
 	  break;
 	}
       *p++ = c;
     }
+  lbp->len = p - buffer;
 
-  return p - buffer + chars_deleted;
+  return lbp->len + chars_deleted;
 }
 
 /*
- * Like readline_internal, above, but try to match the input
- * line against any existing regular expressions.
+ * Like readline_internal, above, but in addition try to match the
+ * input line against relevant regular expressions.
  */
 long
-readline (linebuffer, stream)
-     struct linebuffer *linebuffer;
+readline (lbp, stream)
+     linebuffer *lbp;
      FILE *stream;
 {
   /* Read new line. */
-  long result = readline_internal (linebuffer, stream);
+  long result = readline_internal (lbp, stream);
 #ifdef ETAGS_REGEXPS
-  int i;
+  int match;
+  pattern *pp;
 
-  /* Match against all listed patterns. */
-  for (i = 0; i < num_patterns; ++i)
-    {
-      int match = re_match (patterns[i].pattern, linebuffer->buffer,
-			    (int)result, 0, &patterns[i].regs);
-      switch (match)
-	{
-	case -2:
-	  /* Some error. */
-	  if (!patterns[i].error_signaled)
-	    {
-	      error ("error while matching pattern %d", i);
-	      patterns[i].error_signaled = TRUE;
-	    }
-	  break;
-	case -1:
-	  /* No match. */
-	  break;
-	default:
-	  /* Match occurred.  Construct a tag. */
-	  if (patterns[i].name_pattern[0] != '\0')
-	    {
-	      /* Make a named tag. */
-	      char *name = substitute (linebuffer->buffer,
-				       patterns[i].name_pattern,
-				       &patterns[i].regs);
-	      if (name != NULL)
-		pfnote (name, TRUE,
-			linebuffer->buffer, match, lineno, linecharno);
-	    }
-	  else
-	    {
-	      /* Make an unnamed tag. */
-	      pfnote (NULL, TRUE,
-		      linebuffer->buffer, match, lineno, linecharno);
-	    }
-	  break;
-	}
-    }
+  /* Match against relevant patterns. */
+  if (lbp->len > 0)
+    for (pp = p_head; pp != NULL; pp = pp->p_next)
+      {
+	/* Only use generic regexps or those for the current language. */
+	if (pp->language != NULL && pp->language != curlang)
+	  continue;
+
+	match = re_match (pp->pattern, lbp->buffer, lbp->len, 0, &pp->regs);
+	switch (match)
+	  {
+	  case -2:
+	    /* Some error. */
+	    if (!pp->error_signaled)
+	      {
+		error ("error while matching \"%s\"", pp->regex);
+		pp->error_signaled = TRUE;
+	      }
+	    break;
+	  case -1:
+	    /* No match. */
+	    break;
+	  default:
+	    /* Match occurred.  Construct a tag. */
+	    if (pp->name_pattern[0] != '\0')
+	      {
+		/* Make a named tag. */
+		char *name = substitute (lbp->buffer,
+					 pp->name_pattern, &pp->regs);
+		if (name != NULL)
+		  pfnote (name, TRUE, lbp->buffer, match, lineno, linecharno);
+	      }
+	    else
+	      {
+		/* Make an unnamed tag. */
+		pfnote ((char *)NULL, TRUE,
+			lbp->buffer, match, lineno, linecharno);
+	      }
+	    break;
+	  }
+      }
 #endif /* ETAGS_REGEXPS */
 
   return result;
 }
-
-/*
- * Read a file, but do no processing.  This is used to do regexp
- * matching on files that have no language defined.
- */
-void
-just_read_file (inf)
-     FILE *inf;
-{
-  lineno = 0;
-  charno = 0;
-
-  while (!feof (inf))
-    {
-      ++lineno;
-      linecharno = charno;
-      charno += readline (&lb, inf) + 1;
-    }
-}
-
 
 /*
  * Return a pointer to a space of size strlen(cp)+1 allocated
@@ -4266,6 +5132,26 @@ etags_strchr (sp, c)
   return NULL;
 }
 
+/* Skip spaces, return new pointer. */
+char *
+skip_spaces (cp)
+     char *cp;
+{
+  while (isspace (*cp))		/* isspace('\0')==FALSE */
+    cp++;
+  return cp;
+}
+
+/* Skip non spaces, return new pointer. */
+char *
+skip_non_spaces (cp)
+     char *cp;
+{
+  while (!iswhite (*cp))	/* iswhite('\0')==TRUE */
+    cp++;
+  return cp;
+}
+
 /* Print error message and exit.  */
 void
 fatal (s1, s2)
@@ -4286,8 +5172,14 @@ pfatal (s1)
 void
 suggest_asking_for_help ()
 {
-  fprintf (stderr, "\tTry `%s --help' for a complete list of options.\n",
-	   progname);
+  fprintf (stderr, "\tTry `%s %s' for a complete list of options.\n",
+	   progname,
+#ifdef LONG_OPTIONS
+	   "--help"
+#else
+	   "-h"
+#endif
+	   );
   exit (BAD);
 }
 
@@ -4323,19 +5215,7 @@ concat (s1, s2, s3)
 char *
 etags_getcwd ()
 {
-#ifdef MSDOS
-  char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
-
-  getwd (path);
-  for (p = path; *p != '\0'; p++)
-    if (*p == '\\')
-      *p = '/';
-    else
-      *p = lowcase (*p);
-
-  return strdup (path);
-#else /* not MSDOS */
-#if HAVE_GETCWD
+#ifdef HAVE_GETCWD
   int bufsize = 200;
   char *path = xnew (bufsize, char);
 
@@ -4344,12 +5224,28 @@ etags_getcwd ()
       if (errno != ERANGE)
 	pfatal ("getcwd");
       bufsize *= 2;
+      free (path);
       path = xnew (bufsize, char);
     }
 
+  canonicalize_filename (path);
   return path;
-#else /* not MSDOS and not HAVE_GETCWD */
-  struct linebuffer path;
+
+#else /* not HAVE_GETCWD */
+#ifdef MSDOS
+  char *p, path[MAXPATHLEN + 1]; /* Fixed size is safe on MSDOS.  */
+
+  getwd (path);
+
+  for (p = path; *p != '\0'; p++)
+    if (*p == '\\')
+      *p = '/';
+    else
+      *p = lowcase (*p);
+
+  return strdup (path);
+#else /* not MSDOS */
+  linebuffer path;
   FILE *pipe;
 
   initbuffer (&path);
@@ -4359,66 +5255,68 @@ etags_getcwd ()
   pclose (pipe);
 
   return path.buffer;
-#endif /* not HAVE_GETCWD */
 #endif /* not MSDOS */
+#endif /* not HAVE_GETCWD */
 }
 
-/* Return a newly allocated string containing the filename
-   of FILE relative to the absolute directory DIR (which
-   should end with a slash). */
+/* Return a newly allocated string containing the file name of FILE
+   relative to the absolute directory DIR (which should end with a slash). */
 char *
 relative_filename (file, dir)
      char *file, *dir;
 {
-  char *fp, *dp, *abs, *res;
+  char *fp, *dp, *afn, *res;
+  int i;
 
-  /* Find the common root of file and dir. */
-  abs = absolute_filename (file, cwd);
-  fp = abs;
+  /* Find the common root of file and dir (with a trailing slash). */
+  afn = absolute_filename (file, cwd);
+  fp = afn;
   dp = dir;
   while (*fp++ == *dp++)
     continue;
-  do
-    {
-      fp--;
-      dp--;
-    }
+  fp--, dp--;			/* back to the first differing char */
+#ifdef DOS_NT
+  if (fp == afn && afn[0] != '/') /* cannot build a relative name */
+    return afn;
+#endif
+  do				/* look at the equal chars until '/' */
+    fp--, dp--;
   while (*fp != '/');
 
-  /* Build a sequence of "../" strings for the resulting relative filename. */
-  for (dp = etags_strchr (dp + 1, '/'), res = "";
-       dp != NULL;
-       dp = etags_strchr (dp + 1, '/'))
-    {
-      res = concat (res, "../", "");
-    }
+  /* Build a sequence of "../" strings for the resulting relative file name. */
+  i = 0;
+  while ((dp = etags_strchr (dp + 1, '/')) != NULL)
+    i += 1;
+  res = xnew (3*i + strlen (fp + 1) + 1, char);
+  res[0] = '\0';
+  while (i-- > 0)
+    strcat (res, "../");
 
-  /* Add the filename relative to the common root of file and dir. */
-  res = concat (res, fp + 1, "");
-  free (abs);
+  /* Add the file name relative to the common root of file and dir. */
+  strcat (res, fp + 1);
+  free (afn);
 
   return res;
 }
 
-/* Return a newly allocated string containing the
-   absolute filename of FILE given CWD (which should
-   end with a slash). */
+/* Return a newly allocated string containing the absolute file name
+   of FILE given DIR (which should end with a slash). */
 char *
-absolute_filename (file, cwd)
-     char *file, *cwd;
+absolute_filename (file, dir)
+     char *file, *dir;
 {
   char *slashp, *cp, *res;
 
-  if (absolutefn (file))
-    res = concat (file, "", "");
+  if (filename_is_absolute (file))
+    res = savestr (file);
 #ifdef DOS_NT
-  /* We don't support non-absolute filenames with a drive
+  /* We don't support non-absolute file names with a drive
      letter, like `d:NAME' (it's too much hassle).  */
   else if (file[1] == ':')
-    fatal ("%s: relative filenames with drive letters not supported", file);
+    fatal ("%s: relative file names with drive letters not supported", file);
 #endif
   else
-    res = concat (cwd, file, "");
+    res = concat (dir, file, "");
 
   /* Delete the "/dirname/.." and "/." substrings. */
   slashp = etags_strchr (res, '/');
@@ -4432,25 +5330,17 @@ absolute_filename (file, cwd)
 	      cp = slashp;
 	      do
 		cp--;
-	      while (cp >= res && !absolutefn (cp));
-	      if (*cp == '/')
-		{
-		  strcpy (cp, slashp + 3);
-		}
+	      while (cp >= res && !filename_is_absolute (cp));
+	      if (cp < res)
+		cp = slashp;	/* the absolute name begins with "/.." */
 #ifdef DOS_NT
 	      /* Under MSDOS and NT we get `d:/NAME' as absolute
-		 filename, so the luser could say `d:/../NAME'.
+		 file name, so the luser could say `d:/../NAME'.
 		 We silently treat this as `d:/NAME'.  */
-	      else if (cp[1] == ':')
-		strcpy (cp + 3, slashp + 4);
+	      else if (cp[0] != '/')
+		cp = slashp;
 #endif
-	      else		/* else (cp == res) */
-		{
-		  if (slashp[3] != '\0')
-		    strcpy (cp, slashp + 4);
-		  else
-		    return ".";
-		}
+	      strcpy (cp, slashp + 3);
 	      slashp = cp;
 	      continue;
 	    }
@@ -4464,37 +5354,77 @@ absolute_filename (file, cwd)
       slashp = etags_strchr (slashp + 1, '/');
     }
 
-  return res;
+  if (res[0] == '\0')
+    return savestr ("/");
+  else
+    return res;
 }
 
 /* Return a newly allocated string containing the absolute
-   filename of dir where FILE resides given CWD (which should
+   file name of dir where FILE resides given DIR (which should
    end with a slash). */
 char *
-absolute_dirname (file, cwd)
-     char *file, *cwd;
+absolute_dirname (file, dir)
+     char *file, *dir;
 {
   char *slashp, *res;
   char save;
-#ifdef DOS_NT
-  char *p;
-
-  for (p = file; *p != '\0'; p++)
-    if (*p == '\\')
-      *p = '/';
-#endif
 
+  canonicalize_filename (file);
   slashp = etags_strrchr (file, '/');
   if (slashp == NULL)
-    return cwd;
+    return savestr (dir);
   save = slashp[1];
   slashp[1] = '\0';
-  res = absolute_filename (file, cwd);
+  res = absolute_filename (file, dir);
   slashp[1] = save;
 
   return res;
 }
 
+/* Whether the argument string is an absolute file name.  The argument
+   string must have been canonicalized with canonicalize_filename. */
+bool
+filename_is_absolute (fn)
+     char *fn;
+{
+  return (fn[0] == '/'
+#ifdef DOS_NT
+	  || (isalpha(fn[0]) && fn[1] == ':' && fn[2] == '/')
+#endif
+	  );
+}
+
+/* Translate backslashes into slashes.  Works in place. */
+void
+canonicalize_filename (fn)
+     register char *fn;
+{
+#ifdef DOS_NT
+  /* Canonicalize drive letter case.  */
+  if (islower (fn[0]))
+    fn[0] = toupper (fn[0]);
+  /* Convert backslashes to slashes.  */
+  for (; *fn != '\0'; fn++)
+    if (*fn == '\\')
+      *fn = '/';
+#else
+  /* No action. */
+  fn = NULL;			/* shut up the compiler */
+#endif
+}
+
+/* Increase the size of a linebuffer. */
+void
+grow_linebuffer (lbp, toksize)
+     linebuffer *lbp;
+     int toksize;
+{
+  while (lbp->size < toksize)
+    lbp->size *= 2;
+  lbp->buffer = xrnew (lbp->buffer, lbp->size, char);
+}
+
 /* Like malloc but get fatal error if memory is exhausted.  */
 long *
 xmalloc (size)
@@ -4502,7 +5432,7 @@ xmalloc (size)
 {
   long *result = (long *) malloc (size);
   if (result == NULL)
-    fatal ("virtual memory exhausted", 0);
+    fatal ("virtual memory exhausted", (char *)NULL);
   return result;
 }
 
@@ -4513,6 +5443,6 @@ xrealloc (ptr, size)
 {
   long *result =  (long *) realloc (ptr, size);
   if (result == NULL)
-    fatal ("virtual memory exhausted");
+    fatal ("virtual memory exhausted", (char *)NULL);
   return result;
 }