From 24dbe96a108cfb899cf1f54ea4b42aceb159776f Mon Sep 17 00:00:00 2001
From: =?utf8?q?Francesco=20Potort=C3=AC?= <pot@gnu.org>
Date: Thu, 13 Jun 2002 10:44:15 +0000
Subject: [PATCH] New multi-line regexp and new regexp syntax. (arg_type):
 at_icregexp label removed (obsolete). (pattern): New member multi_line for
 multi-line regexps. (filebuf): A global buffer containing the whole file as a
 string for multi-line regexp matching. (need_filebuf): Global flag raised if
 multi-line regexps used. (print_help): Document new regexp modifiers, remove
 references to obsolete option --ignore-case-regexp. (main): Do not set regexp
 syntax and translation table here. (main): Treat -c option as a backward
 compatibility hack. (main, find_entries): Init and free filebuf.
 (find_entries): Call regex_tag_multiline after the regular parser.
 (scan_separators): Check for untermintaed regexp and return NULL.
 (analyse_regex, add_regex): Remove the ignore_case argument, which is now a
 modifier to the regexp.  All callers changed. (add_regex): Manage the regexp
 modifiers. (regex_tag_multiline): New function.  Reads from filebuf.
 (readline_internal): If necessary, copy the whole file into filebuf.
 (readline): Skip multi-line regexps, leave them to regex_tag_multiline.

(add_regex): Better check for null regexps.
(readline): Check for regex matching null string.
(find_entries): Reorganisation.
---
 lib-src/etags.c | 406 ++++++++++++++++++++++++++++++++++--------------
 1 file changed, 292 insertions(+), 114 deletions(-)

diff --git a/lib-src/etags.c b/lib-src/etags.c
index 6b4b379f13..be60b47697 100644
--- a/lib-src/etags.c
+++ b/lib-src/etags.c
@@ -2,21 +2,21 @@
    Copyright (C) 1984, 1987-1989, 1993-1995, 1998-2001, 2002
    Free Software Foundation, Inc. and Ken Arnold
 
-This file is not considered part of GNU Emacs.
+ This file is not considered part of GNU Emacs.
 
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2 of the License, or
-(at your option) any later version.
+ This program is free software; you can redistribute it and/or modify
+ it under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 2 of the License, or
+ (at your option) any later version.
 
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
+ This program is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ GNU General Public License for more details.
 
-You should have received a copy of the GNU General Public License
-along with this program; if not, write to the Free Software Foundation,
-Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
+ You should have received a copy of the GNU General Public License
+ along with this program; if not, write to the Free Software Foundation,
+ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
 
 /*
  * Authors:
@@ -34,7 +34,7 @@ Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */
  *	Francesco Potortì <pot@gnu.org> has maintained it since 1993.
  */
 
-char pot_etags_version[] = "@(#) pot revision number is 16.10";
+char pot_etags_version[] = "@(#) pot revision number is 16.19";
 
 #define	TRUE	1
 #define	FALSE	0
@@ -288,7 +288,6 @@ typedef struct
   enum {
     at_language,		/* a language specification */
     at_regexp,			/* a regular expression */
-    at_icregexp,		/* same, but with case ignored */
     at_filename,		/* a file name */
     at_stdin			/* read from stdin here */
   } arg_type;			/* argument type */
@@ -308,6 +307,7 @@ typedef struct pattern
   char *name_pattern;
   bool error_signaled;
   bool ignore_case;
+  bool multi_line;
 } pattern;
 #endif /* ETAGS_REGEXPS */
 
@@ -355,9 +355,9 @@ static bool nocase_tail __P((char *));
 static char *get_tag __P((char *));
 
 #ifdef ETAGS_REGEXPS
-static void analyse_regex __P((char *, bool));
-static void add_regex __P((char *, bool, language *));
+static void analyse_regex __P((char *));
 static void free_patterns __P((void));
+static void regex_tag_multiline __P((void));
 #endif /* ETAGS_REGEXPS */
 static void error __P((const char *, const char *));
 static void suggest_asking_for_help __P((void));
@@ -417,6 +417,7 @@ static node *nodehead;		/* the head of the binary tree of tags */
 static node *last_node;		/* the last node created */
 
 static linebuffer lb;		/* the current line */
+static linebuffer filebuf;	/* a buffer containing the whole file */
 
 /* boolean "functions" (see init)	*/
 static bool _wht[CHARS], _nin[CHARS], _itk[CHARS], _btk[CHARS], _etk[CHARS];
@@ -457,13 +458,10 @@ static bool packages_only;	/* --packages-only: in Ada, only tag packages*/
 static bool parsing_stdin;	/* --parse-stdin used */
 
 #ifdef ETAGS_REGEXPS
-/* List of all regexps. */
-static pattern *p_head;
-
-/* How many characters in the character set.  (From regex.c.)  */
-#define CHAR_SET_SIZE 256
-/* Translation table for case-insensitive matching. */
-static char lc_trans[CHAR_SET_SIZE];
+static pattern *p_head;		/* list of all regexps */
+static bool need_filebuf;	/* some regexes are multi-line */
+#else
+# define need_filebuf FALSE
 #endif /* ETAGS_REGEXPS */
 
 #ifdef LONG_OPTIONS
@@ -680,10 +678,10 @@ Compressed files are supported using gzip and bzip2.");
 }
 
 #ifndef EMACS_NAME
-# define EMACS_NAME "GNU Emacs"
+# define EMACS_NAME "standalone"
 #endif
 #ifndef VERSION
-# define VERSION "21"
+# define VERSION "version"
 #endif
 static void
 print_version ()
@@ -775,9 +773,11 @@ Relative ones are stored relative to the output file's directory.\n");
 	REGEXP is anchored (as if preceded by ^).\n\
 	The form /REGEXP/NAME/ creates a named tag.\n\
 	For example Tcl named tags can be created with:\n\
-	--regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\"");
-  puts ("-c /REGEXP/, --ignore-case-regex=/REGEXP/ or --ignore-case-regex=@regexfile\n\
-        Like -r, --regex but ignore case when matching expressions.");
+	--regex=\"/proc[ \\t]+\\([^ \\t]+\\)/\\1/.\".");
+  puts ("In the form /REGEXP/MODS or /REGEXP/NAME/MODS, MODS are\n\
+	one-letter modifiers: `i' means to ignore case, `m' means\n\
+	allow multi-line matches, `s' implies `m' and additionally\n\
+	causes dot to match the newline character.");
   puts ("-R, --no-regex\n\
         Don't create tags from regexps for the following files.");
 #endif /* ETAGS_REGEXPS */
@@ -996,14 +996,6 @@ main (argc, argv)
      is small. */
   argbuffer = xnew (argc, argument);
 
-#ifdef ETAGS_REGEXPS
-  /* Set syntax for regular expression routines. */
-  re_set_syntax (RE_SYNTAX_EMACS | RE_INTERVALS);
-  /* Translation table for case-insensitive search. */
-  for (i = 0; i < CHAR_SET_SIZE; i++)
-    lc_trans[i] = lowcase (i);
-#endif /* ETAGS_REGEXPS */
-
   /*
    * If etags, always find typedefs and structure tags.  Why not?
    * Also default to find macro constants, enum constants and
@@ -1079,6 +1071,10 @@ main (argc, argv)
 	    }
 	}
 	break;
+      case 'c':
+	/* Backward compatibility: support obsolete --ignore-case-regexp. */
+	optarg = concat (optarg, "i", ""); /* memory leak here */
+	/* FALLTHRU */
       case 'r':
 	argbuffer[current_arg].arg_type = at_regexp;
 	argbuffer[current_arg].what = optarg;
@@ -1089,11 +1085,6 @@ main (argc, argv)
 	argbuffer[current_arg].what = NULL;
 	++current_arg;
 	break;
-      case 'c':
-	argbuffer[current_arg].arg_type = at_icregexp;
-	argbuffer[current_arg].what = optarg;
-	++current_arg;
-	break;
       case 'V':
 	print_version ();
 	break;
@@ -1152,6 +1143,7 @@ main (argc, argv)
 
   initbuffer (&lb);
   initbuffer (&filename_lb);
+  initbuffer (&filebuf);
 
   if (!CTAGS)
     {
@@ -1186,10 +1178,7 @@ main (argc, argv)
 	  break;
 #ifdef ETAGS_REGEXPS
 	case at_regexp:
-	  analyse_regex (argbuffer[i].what, FALSE);
-	  break;
-	case at_icregexp:
-	  analyse_regex (argbuffer[i].what, TRUE);
+	  analyse_regex (argbuffer[i].what);
 	  break;
 #endif
 	case at_filename:
@@ -1234,6 +1223,7 @@ main (argc, argv)
 #ifdef ETAGS_REGEXPS
   free_patterns ();
 #endif /* ETAGS_REGEXPS */
+  free (filebuf.buffer);
 
   if (!CTAGS || cxref_style)
     {
@@ -1648,7 +1638,6 @@ find_entries (inf)
      FILE *inf;
 {
   char *cp;
-  node *old_last_node;
   language *lang = curfdp->lang;
   Lang_function *parser = NULL;
 
@@ -1703,7 +1692,7 @@ find_entries (inf)
   /* We rewind here, even if inf may be a pipe.  We fail if the
      length of the first line is longer than the pipe block size,
      which is unlikely. */
-    rewind (inf);
+  rewind (inf);
 
   /* Else try to guess the language given the case insensitive file name. */
   if (parser == NULL)
@@ -1716,6 +1705,26 @@ find_entries (inf)
 	}
     }
 
+  /* Else try Fortran or C. */
+  if (parser == NULL)
+    {
+      node *old_last_node = last_node;
+
+      curfdp->lang = get_language_from_langname ("fortran");
+      find_entries (inf);
+
+      if (old_last_node == last_node)
+	/* No Fortran entries found.  Try C. */
+	{
+	  /* We do not tag if rewind fails.
+	     Only the file name will be recorded in the tags file. */
+	  rewind (inf);
+	  curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
+	  find_entries (inf);
+	}
+      return;
+    }
+
   if (!no_line_directive
       && curfdp->lang != NULL && curfdp->lang->metasource)
     /* It may be that this is a bingo.y file, and we already parsed a bingo.c
@@ -1748,32 +1757,21 @@ find_entries (inf)
 	  fdpp = &(*fdpp)->next; /* advance the list pointer */
     }
 
-  if (parser != NULL)
-    {
-      /* Generic initialisations before reading from file. */
-      lineno = 0;		/* reset global line number */
-      charno = 0;		/* reset global char number */
-      linecharno = 0;		/* reset global char number of line start */
+  assert (parser != NULL);
 
-      parser (inf);
-      return;
-    }
+  /* Generic initialisations before reading from file. */
+  filebuf.len = 0;		/* reset the file buffer */
 
-  /* Else try Fortran. */
-  old_last_node = last_node;
-  curfdp->lang = get_language_from_langname ("fortran");
-  find_entries (inf);
+  /* Generic initialisations before parsing file with readline. */
+  lineno = 0;		       /* reset global line number */
+  charno = 0;		       /* reset global char number */
+  linecharno = 0;	       /* reset global char number of line start */
 
-  if (old_last_node == last_node)
-    /* No Fortran entries found.  Try C. */
-    {
-      /* We do not tag if rewind fails.
-	 Only the file name will be recorded in the tags file. */
-      rewind (inf);
-      curfdp->lang = get_language_from_langname (cplusplus ? "c++" : "c");
-      find_entries (inf);
-    }
-  return;
+  parser (inf);
+
+#ifdef ETAGS_REGEXPS
+  regex_tag_multiline ();
+#endif /* ETAGS_REGEXPS */
 }
 
 
@@ -2014,6 +2012,11 @@ add_node (np, cur_node_p)
  * invalidate_nodes ()
  *	Scan the node tree and invalidate all nodes pointing to the
  *	given file description (CTAGS case) or free them (ETAGS case).
+ *
+ * This function most likely contains a bug, but I cannot tell where.
+ * I have a case of a binary that crashes inside this function with a bus
+ * error.  Unfortunately, the binary does not contain debug information, and
+ * compiling with debugging information makes the bug disappear.
  */
 static void
 invalidate_nodes (badfdp, npp)
@@ -2030,7 +2033,7 @@ invalidate_nodes (badfdp, npp)
       if (np->left != NULL)
 	invalidate_nodes (badfdp, &np->left);
       if (np->fdp == badfdp)
-	np-> valid = FALSE;
+	np->valid = FALSE;
       if (np->right != NULL)
 	invalidate_nodes (badfdp, &np->right);
     }
@@ -5263,17 +5266,18 @@ erlang_atom (s, pos)
 #ifdef ETAGS_REGEXPS
 
 static char *scan_separators __P((char *));
-static void analyse_regex __P((char *, bool));
-static void add_regex __P((char *, bool, language *));
+static void add_regex __P((char *, language *));
 static char *substitute __P((char *, char *, struct re_registers *));
 
-/* Take a string like "/blah/" and turn it into "blah", making sure
-   that the first and last characters are the same, and handling
-   quoted separator characters.  Actually, stops on the occurrence of
-   an unquoted separator.  Also turns "\t" into a Tab character, and
-   similarly for all character escape sequences supported by Gcc.
-   Returns pointer to terminating separator.  Works in place.  Null
-   terminates name string. */
+/*
+ * Take a string like "/blah/" and turn it into "blah", verifying
+ * that the first and last characters are the same, and handling
+ * quoted separator characters.  Actually, stops on the occurrence of
+ * an unquoted separator.  Also process \t, \n, etc. and turn into
+ * appropriate characters. Works in place.  Null terminates name string.
+ * Returns pointer to terminating separator, or NULL for
+ * unterminated regexps.
+ */
 static char *
 scan_separators (name)
      char *name;
@@ -5288,15 +5292,15 @@ scan_separators (name)
 	{
 	  switch (*name)
 	    {
-	    case 'a': *copyto++ = '\007'; break;
-	    case 'b': *copyto++ = '\b'; break;
-	    case 'd': *copyto++ = 0177; break;
-	    case 'e': *copyto++ = 033; break;
-	    case 'f': *copyto++ = '\f'; break;
-	    case 'n': *copyto++ = '\n'; break;
-	    case 'r': *copyto++ = '\r'; break;
-	    case 't': *copyto++ = '\t'; break;
-	    case 'v': *copyto++ = '\v'; break;
+	    case 'a': *copyto++ = '\007'; break; /* BEL (bell)		 */
+	    case 'b': *copyto++ = '\b'; break;	 /* BS (back space)	 */
+	    case 'd': *copyto++ = 0177; break;	 /* DEL (delete)	 */
+	    case 'e': *copyto++ = 033; break;	 /* ESC (delete)	 */
+	    case 'f': *copyto++ = '\f'; break;	 /* FF (form feed)	 */
+	    case 'n': *copyto++ = '\n'; break;	 /* NL (new line)	 */
+	    case 'r': *copyto++ = '\r'; break;	 /* CR (carriage return) */
+	    case 't': *copyto++ = '\t'; break;	 /* TAB (horizontal tab) */
+	    case 'v': *copyto++ = '\v'; break;	 /* VT (vertical tab)    */
 	    default:
 	      if (*name == sep)
 		*copyto++ = sep;
@@ -5317,6 +5321,8 @@ scan_separators (name)
       else
 	*copyto++ = *name;
     }
+  if (*name != sep)
+    name = NULL;		/* signal unterminated regexp */
 
   /* Terminate copied string. */
   *copyto = '\0';
@@ -5326,9 +5332,8 @@ scan_separators (name)
 /* Look at the argument of --regex or --no-regex and do the right
    thing.  Same for each line of a regexp file. */
 static void
-analyse_regex (regex_arg, ignore_case)
+analyse_regex (regex_arg)
      char *regex_arg;
-     bool ignore_case;
 {
   if (regex_arg == NULL)
     {
@@ -5362,7 +5367,7 @@ analyse_regex (regex_arg, ignore_case)
 	  }
 	initbuffer (&regexbuf);
 	while (readline_internal (&regexbuf, regexfp) > 0)
-	  analyse_regex (regexbuf.buffer, ignore_case);
+	  analyse_regex (regexbuf.buffer);
 	free (regexbuf.buffer);
 	fclose (regexfp);
       }
@@ -5381,17 +5386,17 @@ analyse_regex (regex_arg, ignore_case)
 	      error ("unterminated language name in regex: %s", regex_arg);
 	      return;
 	    }
-	*cp = '\0';
+	*cp++ = '\0';
 	lang = get_language_from_langname (lang_name);
 	if (lang == NULL)
 	  return;
-	add_regex (cp + 1, ignore_case, lang);
+	add_regex (cp, lang);
       }
       break;
 
       /* Regexp to be used for any language. */
     default:
-      add_regex (regex_arg, ignore_case, NULL);
+      add_regex (regex_arg, NULL);
       break;
     }
 }
@@ -5399,37 +5404,91 @@ analyse_regex (regex_arg, ignore_case)
 /* Turn a name, which is an ed-style (but Emacs syntax) regular
    expression, into a real regular expression by compiling it. */
 static void
-add_regex (regexp_pattern, ignore_case, lang)
+add_regex (regexp_pattern, lang)
      char *regexp_pattern;
-     bool ignore_case;
      language *lang;
 {
   static struct re_pattern_buffer zeropattern;
-  char *name;
+  char sep, *pat, *name, *modifiers;
   const char *err;
   struct re_pattern_buffer *patbuf;
   pattern *pp;
+  bool ignore_case, multi_line, single_line;
 
 
-  if (regexp_pattern[strlen(regexp_pattern)-1] != regexp_pattern[0])
+  if (strlen(regexp_pattern) < 3)
     {
-      error ("%s: unterminated regexp", regexp_pattern);
+      error ("null regexp", (char *)NULL);
       return;
     }
+  sep = regexp_pattern[0];
   name = scan_separators (regexp_pattern);
-  if (regexp_pattern[0] == '\0')
+  if (name == NULL)
     {
-      error ("null regexp", (char *)NULL);
+      error ("%s: unterminated regexp", regexp_pattern);
+      return;
+    }
+  if (name[1] == sep)
+    {
+      error ("null name for regexp \"%s\"", regexp_pattern);
       return;
     }
-  (void) scan_separators (name);
+  modifiers = scan_separators (name);
+  if (modifiers == NULL)	/* no terminating separator --> no name */
+    {
+      modifiers = name;
+      name = "";
+    }
+  else
+    modifiers += 1;		/* skip separator */
+
+  /* Parse regex modifiers. */
+  ignore_case = FALSE;		/* case is significant */
+  multi_line = FALSE;		/* matches are done one line at a time */
+  single_line = FALSE;		/* dot does not match newline */
+  for (; modifiers[0] != '\0'; modifiers++)
+    switch (modifiers[0])
+      {
+      case 'i':
+	ignore_case = TRUE;
+	break;
+      case 's':
+	single_line = TRUE;
+	/* FALLTHRU */
+      case 'm':
+	multi_line = TRUE;
+	need_filebuf = TRUE;
+	break;
+      default:
+	modifiers[1] = '\0';
+	error ("invalid regexp modifier `%s'", modifiers);
+	return;
+      }
 
   patbuf = xnew (1, struct re_pattern_buffer);
   *patbuf = zeropattern;
   if (ignore_case)
-    patbuf->translate = lc_trans;	/* translation table to fold case  */
+    {
+      static char lc_trans[CHARS];
+      int i;
+      for (i = 0; i < CHARS; i++)
+	lc_trans[i] = lowcase (i);
+      patbuf->translate = lc_trans;	/* translation table to fold case  */
+    }
+
+  if (multi_line)
+    pat = concat ("^", regexp_pattern, ""); /* anchor to beginning of line */
+  else
+    pat = regexp_pattern;
 
-  err = re_compile_pattern (regexp_pattern, strlen (regexp_pattern), patbuf);
+  if (single_line)
+    re_set_syntax (RE_SYNTAX_EMACS | RE_DOT_NEWLINE);
+  else
+    re_set_syntax (RE_SYNTAX_EMACS);
+
+  err = re_compile_pattern (pat, strlen (regexp_pattern), patbuf);
+  if (multi_line)
+    free (pat);
   if (err != NULL)
     {
       error ("%s while compiling pattern", err);
@@ -5445,6 +5504,7 @@ add_regex (regexp_pattern, ignore_case, lang)
   p_head->name_pattern = savestr (name);
   p_head->error_signaled = FALSE;
   p_head->ignore_case = ignore_case;
+  p_head->multi_line = multi_line;
 }
 
 /*
@@ -5512,6 +5572,92 @@ free_patterns ()
     }
   return;
 }
+
+/*
+ * Reads the whole file as a single string from `filebuf' and looks for
+ * multi-line regular expressions, creating tags on matches.
+ * readline already dealt with normal regexps.
+ *
+ * Idea by Ben Wing <ben@666.com> (2002).
+ */
+static void
+regex_tag_multiline ()
+{
+  char *buffer = filebuf.buffer;
+  pattern *pp;
+
+  for (pp = p_head; pp != NULL; pp = pp->p_next)
+    {
+      int match = 0;
+
+      if (!pp->multi_line)
+	continue;		/* skip normal regexps */
+
+      /* Generic initialisations before parsing file from memory. */
+      lineno = 1;		/* reset global line number */
+      charno = 0;		/* reset global char number */
+      linecharno = 0;		/* reset global char number of line start */
+
+      /* Only use generic regexps or those for the current language. */
+      if (pp->lang != NULL && pp->lang != curfdp->lang)
+	continue;
+
+      while (match >= 0 && match < filebuf.len)
+	{
+	  match = re_search (pp->pat, buffer, filebuf.len, charno,
+			     filebuf.len - match, &pp->regs);
+	  switch (match)
+	    {
+	    case -2:
+	      /* Some error. */
+	      if (!pp->error_signaled)
+		{
+		  error ("regexp stack overflow while matching \"%s\"",
+			 pp->regex);
+		  pp->error_signaled = TRUE;
+		}
+	      break;
+	    case -1:
+	      /* No match. */
+	      break;
+	    default:
+	      if (match == pp->regs.end[0])
+		{
+		  if (!pp->error_signaled)
+		    {
+		      error ("regexp matches the empty string: \"%s\"",
+			     pp->regex);
+		      pp->error_signaled = TRUE;
+		    }
+		  match = -3;	/* exit from while loop */
+		  break;
+		}
+
+	      /* Match occurred.  Construct a tag. */
+	      while (charno < pp->regs.end[0])
+		if (buffer[charno++] == '\n')
+		  lineno++, linecharno = charno;
+	      if (pp->name_pattern[0] != '\0')
+		{
+		  /* Make a named tag. */
+		  char *name = substitute (buffer,
+					   pp->name_pattern, &pp->regs);
+		  if (name != NULL)
+		    pfnote (name, TRUE, buffer + linecharno,
+			    charno - linecharno + 1, lineno, linecharno);
+		}
+	      else
+		{
+		  /* Make an unnamed tag. */
+		  pfnote ((char *)NULL, TRUE, buffer + linecharno,
+			  charno - linecharno + 1, lineno, linecharno);
+		}
+	      break;
+	    }
+	}
+    }
+}
+
 #endif /* ETAGS_REGEXPS */
 
 
@@ -5564,10 +5710,13 @@ initbuffer (lbp)
  * newline or CR-NL, if any.  Return the number of characters read from
  * `stream', which is the length of the line including the newline.
  *
- * On DOS or Windows we do not count the CR character, if any, before the
- * NL, in the returned length; this mirrors the behavior of emacs on those
+ * On DOS or Windows we do not count the CR character, if any before the
+ * NL, in the returned length; this mirrors the behavior of Emacs on those
  * platforms (for text files, it translates CR-NL to NL as it reads in the
  * file).
+ *
+ * If multi-line regular expressions are requested, each line read is
+ * appended to `filebuf'.
  */
 static long
 readline_internal (lbp, stream)
@@ -5626,12 +5775,28 @@ readline_internal (lbp, stream)
     }
   lbp->len = p - buffer;
 
+  if (need_filebuf		/* we need filebuf for multi-line regexps */
+      && chars_deleted > 0)	/* not at EOF */
+    {
+      while (filebuf.size <= filebuf.len + lbp->len + 1) /* +1 for \n */
+	{
+	  /* Expand filebuf. */
+	  filebuf.size *= 2;
+	  xrnew (filebuf.buffer, filebuf.size, char);
+	}
+      strncpy (filebuf.buffer + filebuf.len, lbp->buffer, lbp->len);
+      filebuf.len += lbp->len;
+      filebuf.buffer[filebuf.len++] = '\n';
+      filebuf.buffer[filebuf.len] = '\0';
+    }
+
   return lbp->len + chars_deleted;
 }
 
 /*
  * Like readline_internal, above, but in addition try to match the
- * input line against relevant regular expressions.
+ * input line against relevant regular expressions and manage #line
+ * directives.
  */
 static void
 readline (lbp, stream)
@@ -5752,8 +5917,8 @@ readline (lbp, stream)
 	{
 	  if (result > 0)
 	    {
-	    /* Do a tail recursion on ourselves, thus discarding the contents
-	       of the line buffer. */
+	      /* Do a tail recursion on ourselves, thus discarding the contents
+		 of the line buffer. */
 	      readline (lbp, stream);
 	      return;
 	    }
@@ -5772,8 +5937,11 @@ readline (lbp, stream)
     if (lbp->len > 0)
       for (pp = p_head; pp != NULL; pp = pp->p_next)
 	{
-	  /* Only use generic regexps or those for the current language. */
-	  if (pp->lang != NULL && pp->lang != fdhead->lang)
+	  /* Only use generic regexps or those for the current language.
+	     Also do not use multiline regexps, which is the job of
+	     regex_tag_multiline. */
+	  if ((pp->lang != NULL && pp->lang != fdhead->lang)
+	      || pp->multi_line)
 	    continue;
 
 	  match = re_match (pp->pat, lbp->buffer, lbp->len, 0, &pp->regs);
@@ -5783,13 +5951,23 @@ readline (lbp, stream)
 	      /* Some error. */
 	      if (!pp->error_signaled)
 		{
-		  error ("error while matching \"%s\"", pp->regex);
+		  error ("regexp stack overflow while matching \"%s\"",
+			 pp->regex);
 		  pp->error_signaled = TRUE;
 		}
 	      break;
 	    case -1:
 	      /* No match. */
 	      break;
+	    case 0:
+	      /* Empty string matched. */
+	      if (!pp->error_signaled)
+		{
+		  error ("regexp matches the empty string: \"%s\"",
+			 pp->regex);
+		  pp->error_signaled = TRUE;
+		}
+	      break;
 	    default:
 	      /* Match occurred.  Construct a tag. */
 	      if (pp->name_pattern[0] != '\0')
@@ -6229,6 +6407,6 @@ xrealloc (ptr, size)
  * indent-tabs-mode: t
  * tab-width: 8
  * fill-column: 79
- * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node")
+ * c-font-lock-extra-types: ("FILE" "bool" "language" "linebuffer" "fdesc" "node" "pattern")
  * End:
  */
-- 
2.20.1