src/search.c

   1 /* String search routines for GNU Emacs.
   2
   3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2013 Free Software
   4 Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23
  24 #include "lisp.h"
  25 #include "category.h"
  26 #include "character.h"
  27 #include "buffer.h"
  28 #include "syntax.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* True means regexp was compiled to do full POSIX backtracking.  */
  53   bool posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 static struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* Error condition signaled when regexp compile_pattern fails.  */
  88 static Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches.  */
  91 static Lisp_Object Qsearch_failed;
  92
  93 static void set_search_regs (ptrdiff_t, ptrdiff_t);
  94 static void save_search_regs (void);
  95 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
  96                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
  97                                 ptrdiff_t, ptrdiff_t);
  98 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
  99                               Lisp_Object, Lisp_Object, ptrdiff_t,
 100                               ptrdiff_t, int);
 101 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
 102                                 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
 103                                 Lisp_Object, Lisp_Object, bool);
 104
 105 static _Noreturn void
 106 matcher_overflow (void)
 107 {
 108   error ("Stack overflow in regexp matcher");
 109 }
 110
 111 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 112    PATTERN is the pattern to compile.
 113    CP is the place to put the result.
 114    TRANSLATE is a translation table for ignoring case, or nil for none.
 115    POSIX is true if we want full backtracking (POSIX style) for this pattern.
 116    False means backtrack only enough to get a valid match.
 117
 118    The behavior also depends on Vsearch_spaces_regexp.  */
 119
 120 static void
 121 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
 122                    Lisp_Object translate, bool posix)
 123 {
 124   char *val;
 125   reg_syntax_t old;
 126
 127   cp->regexp = Qnil;
 128   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 129   cp->posix = posix;
 130   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 131   cp->buf.charset_unibyte = charset_unibyte;
 132   if (STRINGP (Vsearch_spaces_regexp))
 133     cp->whitespace_regexp = Vsearch_spaces_regexp;
 134   else
 135     cp->whitespace_regexp = Qnil;
 136
 137   /* rms: I think BLOCK_INPUT is not needed here any more,
 138      because regex.c defines malloc to call xmalloc.
 139      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 140      So let's turn it off.  */
 141   /*  BLOCK_INPUT;  */
 142   old = re_set_syntax (RE_SYNTAX_EMACS
 143                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 144
 145   if (STRINGP (Vsearch_spaces_regexp))
 146     re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
 147   else
 148     re_set_whitespace_regexp (NULL);
 149
 150   val = (char *) re_compile_pattern (SSDATA (pattern),
 151                                      SBYTES (pattern), &cp->buf);
 152
 153   /* If the compiled pattern hard codes some of the contents of the
 154      syntax-table, it can only be reused with *this* syntax table.  */
 155   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
 156
 157   re_set_whitespace_regexp (NULL);
 158
 159   re_set_syntax (old);
 160   /* unblock_input ();  */
 161   if (val)
 162     xsignal1 (Qinvalid_regexp, build_string (val));
 163
 164   cp->regexp = Fcopy_sequence (pattern);
 165 }
 166
 167 /* Shrink each compiled regexp buffer in the cache
 168    to the size actually used right now.
 169    This is called from garbage collection.  */
 170
 171 void
 172 shrink_regexp_cache (void)
 173 {
 174   struct regexp_cache *cp;
 175
 176   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 177     {
 178       cp->buf.allocated = cp->buf.used;
 179       cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
 180     }
 181 }
 182
 183 /* Clear the regexp cache w.r.t. a particular syntax table,
 184    because it was changed.
 185    There is no danger of memory leak here because re_compile_pattern
 186    automagically manages the memory in each re_pattern_buffer struct,
 187    based on its `allocated' and `buffer' values.  */
 188 void
 189 clear_regexp_cache (void)
 190 {
 191   int i;
 192
 193   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 194     /* It's tempting to compare with the syntax-table we've actually changed,
 195        but it's not sufficient because char-table inheritance means that
 196        modifying one syntax-table can change others at the same time.  */
 197     if (!EQ (searchbufs[i].syntax_table, Qt))
 198       searchbufs[i].regexp = Qnil;
 199 }
 200
 201 /* Compile a regexp if necessary, but first check to see if there's one in
 202    the cache.
 203    PATTERN is the pattern to compile.
 204    TRANSLATE is a translation table for ignoring case, or nil for none.
 205    REGP is the structure that says where to store the "register"
 206    values that will result from matching this pattern.
 207    If it is 0, we should compile the pattern not to record any
 208    subexpression bounds.
 209    POSIX is true if we want full backtracking (POSIX style) for this pattern.
 210    False means backtrack only enough to get a valid match.  */
 211
 212 struct re_pattern_buffer *
 213 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
 214                  Lisp_Object translate, bool posix, bool multibyte)
 215 {
 216   struct regexp_cache *cp, **cpp;
 217
 218   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 219     {
 220       cp = *cpp;
 221       /* Entries are initialized to nil, and may be set to nil by
 222          compile_pattern_1 if the pattern isn't valid.  Don't apply
 223          string accessors in those cases.  However, compile_pattern_1
 224          is only applied to the cache entry we pick here to reuse.  So
 225          nil should never appear before a non-nil entry.  */
 226       if (NILP (cp->regexp))
 227         goto compile_it;
 228       if (SCHARS (cp->regexp) == SCHARS (pattern)
 229           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 230           && !NILP (Fstring_equal (cp->regexp, pattern))
 231           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 232           && cp->posix == posix
 233           && (EQ (cp->syntax_table, Qt)
 234               || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
 235           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 236           && cp->buf.charset_unibyte == charset_unibyte)
 237         break;
 238
 239       /* If we're at the end of the cache, compile into the nil cell
 240          we found, or the last (least recently used) cell with a
 241          string value.  */
 242       if (cp->next == 0)
 243         {
 244         compile_it:
 245           compile_pattern_1 (cp, pattern, translate, posix);
 246           break;
 247         }
 248     }
 249
 250   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 251      either because we found it in the cache or because we just compiled it.
 252      Move it to the front of the queue to mark it as most recently used.  */
 253   *cpp = cp->next;
 254   cp->next = searchbuf_head;
 255   searchbuf_head = cp;
 256
 257   /* Advise the searching functions about the space we have allocated
 258      for register data.  */
 259   if (regp)
 260     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 261
 262   /* The compiled pattern can be used both for multibyte and unibyte
 263      target.  But, we have to tell which the pattern is used for. */
 264   cp->buf.target_multibyte = multibyte;
 265
 266   return &cp->buf;
 267 }
 268
 269 \f
 270 static Lisp_Object
 271 looking_at_1 (Lisp_Object string, bool posix)
 272 {
 273   Lisp_Object val;
 274   unsigned char *p1, *p2;
 275   ptrdiff_t s1, s2;
 276   register ptrdiff_t i;
 277   struct re_pattern_buffer *bufp;
 278
 279   if (running_asynch_code)
 280     save_search_regs ();
 281
 282   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 283   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 284                          BVAR (current_buffer, case_eqv_table));
 285
 286   CHECK_STRING (string);
 287   bufp = compile_pattern (string,
 288                           (NILP (Vinhibit_changing_match_data)
 289                            ? &search_regs : NULL),
 290                           (!NILP (BVAR (current_buffer, case_fold_search))
 291                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 292                           posix,
 293                           !NILP (BVAR (current_buffer, enable_multibyte_characters)));
 294
 295   immediate_quit = 1;
 296   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 297
 298   /* Get pointers and sizes of the two strings
 299      that make up the visible portion of the buffer. */
 300
 301   p1 = BEGV_ADDR;
 302   s1 = GPT_BYTE - BEGV_BYTE;
 303   p2 = GAP_END_ADDR;
 304   s2 = ZV_BYTE - GPT_BYTE;
 305   if (s1 < 0)
 306     {
 307       p2 = p1;
 308       s2 = ZV_BYTE - BEGV_BYTE;
 309       s1 = 0;
 310     }
 311   if (s2 < 0)
 312     {
 313       s1 = ZV_BYTE - BEGV_BYTE;
 314       s2 = 0;
 315     }
 316
 317   re_match_object = Qnil;
 318
 319   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 320                   PT_BYTE - BEGV_BYTE,
 321                   (NILP (Vinhibit_changing_match_data)
 322                    ? &search_regs : NULL),
 323                   ZV_BYTE - BEGV_BYTE);
 324   immediate_quit = 0;
 325
 326   if (i == -2)
 327     matcher_overflow ();
 328
 329   val = (i >= 0 ? Qt : Qnil);
 330   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 331   {
 332     for (i = 0; i < search_regs.num_regs; i++)
 333       if (search_regs.start[i] >= 0)
 334         {
 335           search_regs.start[i]
 336             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 337          search_regs.end[i]
 338            = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 339        }
 340     /* Set last_thing_searched only when match data is changed.  */
 341     XSETBUFFER (last_thing_searched, current_buffer);
 342   }
 343
 344   return val;
 345 }
 346
 347 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 348        doc: /* Return t if text after point matches regular expression REGEXP.
 349 This function modifies the match data that `match-beginning',
 350 `match-end' and `match-data' access; save and restore the match
 351 data if you want to preserve them.  */)
 352   (Lisp_Object regexp)
 353 {
 354   return looking_at_1 (regexp, 0);
 355 }
 356
 357 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 358        doc: /* Return t if text after point matches regular expression REGEXP.
 359 Find the longest match, in accord with Posix regular expression rules.
 360 This function modifies the match data that `match-beginning',
 361 `match-end' and `match-data' access; save and restore the match
 362 data if you want to preserve them.  */)
 363   (Lisp_Object regexp)
 364 {
 365   return looking_at_1 (regexp, 1);
 366 }
 367 \f
 368 static Lisp_Object
 369 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
 370                 bool posix)
 371 {
 372   ptrdiff_t val;
 373   struct re_pattern_buffer *bufp;
 374   EMACS_INT pos;
 375   ptrdiff_t pos_byte, i;
 376
 377   if (running_asynch_code)
 378     save_search_regs ();
 379
 380   CHECK_STRING (regexp);
 381   CHECK_STRING (string);
 382
 383   if (NILP (start))
 384     pos = 0, pos_byte = 0;
 385   else
 386     {
 387       ptrdiff_t len = SCHARS (string);
 388
 389       CHECK_NUMBER (start);
 390       pos = XINT (start);
 391       if (pos < 0 && -pos <= len)
 392         pos = len + pos;
 393       else if (0 > pos || pos > len)
 394         args_out_of_range (string, start);
 395       pos_byte = string_char_to_byte (string, pos);
 396     }
 397
 398   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 399   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 400                          BVAR (current_buffer, case_eqv_table));
 401
 402   bufp = compile_pattern (regexp,
 403                           (NILP (Vinhibit_changing_match_data)
 404                            ? &search_regs : NULL),
 405                           (!NILP (BVAR (current_buffer, case_fold_search))
 406                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 407                           posix,
 408                           STRING_MULTIBYTE (string));
 409   immediate_quit = 1;
 410   re_match_object = string;
 411
 412   val = re_search (bufp, SSDATA (string),
 413                    SBYTES (string), pos_byte,
 414                    SBYTES (string) - pos_byte,
 415                    (NILP (Vinhibit_changing_match_data)
 416                     ? &search_regs : NULL));
 417   immediate_quit = 0;
 418
 419   /* Set last_thing_searched only when match data is changed.  */
 420   if (NILP (Vinhibit_changing_match_data))
 421     last_thing_searched = Qt;
 422
 423   if (val == -2)
 424     matcher_overflow ();
 425   if (val < 0) return Qnil;
 426
 427   if (NILP (Vinhibit_changing_match_data))
 428     for (i = 0; i < search_regs.num_regs; i++)
 429       if (search_regs.start[i] >= 0)
 430         {
 431           search_regs.start[i]
 432             = string_byte_to_char (string, search_regs.start[i]);
 433           search_regs.end[i]
 434             = string_byte_to_char (string, search_regs.end[i]);
 435         }
 436
 437   return make_number (string_byte_to_char (string, val));
 438 }
 439
 440 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 441        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 442 Matching ignores case if `case-fold-search' is non-nil.
 443 If third arg START is non-nil, start search at that index in STRING.
 444 For index of first char beyond the match, do (match-end 0).
 445 `match-end' and `match-beginning' also give indices of substrings
 446 matched by parenthesis constructs in the pattern.
 447
 448 You can use the function `match-string' to extract the substrings
 449 matched by the parenthesis constructions in REGEXP. */)
 450   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 451 {
 452   return string_match_1 (regexp, string, start, 0);
 453 }
 454
 455 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 456        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 457 Find the longest match, in accord with Posix regular expression rules.
 458 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 459 If third arg START is non-nil, start search at that index in STRING.
 460 For index of first char beyond the match, do (match-end 0).
 461 `match-end' and `match-beginning' also give indices of substrings
 462 matched by parenthesis constructs in the pattern.  */)
 463   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 464 {
 465   return string_match_1 (regexp, string, start, 1);
 466 }
 467
 468 /* Match REGEXP against STRING, searching all of STRING,
 469    and return the index of the match, or negative on failure.
 470    This does not clobber the match data.  */
 471
 472 ptrdiff_t
 473 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 474 {
 475   ptrdiff_t val;
 476   struct re_pattern_buffer *bufp;
 477
 478   bufp = compile_pattern (regexp, 0, Qnil,
 479                           0, STRING_MULTIBYTE (string));
 480   immediate_quit = 1;
 481   re_match_object = string;
 482
 483   val = re_search (bufp, SSDATA (string),
 484                    SBYTES (string), 0,
 485                    SBYTES (string), 0);
 486   immediate_quit = 0;
 487   return val;
 488 }
 489
 490 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 491    and return the index of the match, or negative on failure.
 492    This does not clobber the match data.
 493    We assume that STRING contains single-byte characters.  */
 494
 495 ptrdiff_t
 496 fast_c_string_match_ignore_case (Lisp_Object regexp,
 497                                  const char *string, ptrdiff_t len)
 498 {
 499   ptrdiff_t val;
 500   struct re_pattern_buffer *bufp;
 501
 502   regexp = string_make_unibyte (regexp);
 503   re_match_object = Qt;
 504   bufp = compile_pattern (regexp, 0,
 505                           Vascii_canon_table, 0,
 506                           0);
 507   immediate_quit = 1;
 508   val = re_search (bufp, string, len, 0, len, 0);
 509   immediate_quit = 0;
 510   return val;
 511 }
 512
 513 /* Like fast_string_match but ignore case.  */
 514
 515 ptrdiff_t
 516 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 517 {
 518   ptrdiff_t val;
 519   struct re_pattern_buffer *bufp;
 520
 521   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 522                           0, STRING_MULTIBYTE (string));
 523   immediate_quit = 1;
 524   re_match_object = string;
 525
 526   val = re_search (bufp, SSDATA (string),
 527                    SBYTES (string), 0,
 528                    SBYTES (string), 0);
 529   immediate_quit = 0;
 530   return val;
 531 }
 532 \f
 533 /* Match REGEXP against the characters after POS to LIMIT, and return
 534    the number of matched characters.  If STRING is non-nil, match
 535    against the characters in it.  In that case, POS and LIMIT are
 536    indices into the string.  This function doesn't modify the match
 537    data.  */
 538
 539 ptrdiff_t
 540 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
 541                  ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
 542 {
 543   bool multibyte;
 544   struct re_pattern_buffer *buf;
 545   unsigned char *p1, *p2;
 546   ptrdiff_t s1, s2;
 547   ptrdiff_t len;
 548
 549   if (STRINGP (string))
 550     {
 551       if (pos_byte < 0)
 552         pos_byte = string_char_to_byte (string, pos);
 553       if (limit_byte < 0)
 554         limit_byte = string_char_to_byte (string, limit);
 555       p1 = NULL;
 556       s1 = 0;
 557       p2 = SDATA (string);
 558       s2 = SBYTES (string);
 559       re_match_object = string;
 560       multibyte = STRING_MULTIBYTE (string);
 561     }
 562   else
 563     {
 564       if (pos_byte < 0)
 565         pos_byte = CHAR_TO_BYTE (pos);
 566       if (limit_byte < 0)
 567         limit_byte = CHAR_TO_BYTE (limit);
 568       pos_byte -= BEGV_BYTE;
 569       limit_byte -= BEGV_BYTE;
 570       p1 = BEGV_ADDR;
 571       s1 = GPT_BYTE - BEGV_BYTE;
 572       p2 = GAP_END_ADDR;
 573       s2 = ZV_BYTE - GPT_BYTE;
 574       if (s1 < 0)
 575         {
 576           p2 = p1;
 577           s2 = ZV_BYTE - BEGV_BYTE;
 578           s1 = 0;
 579         }
 580       if (s2 < 0)
 581         {
 582           s1 = ZV_BYTE - BEGV_BYTE;
 583           s2 = 0;
 584         }
 585       re_match_object = Qnil;
 586       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
 587     }
 588
 589   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 590   immediate_quit = 1;
 591   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 592                     pos_byte, NULL, limit_byte);
 593   immediate_quit = 0;
 594
 595   return len;
 596 }
 597
 598 \f
 599 /* The newline cache: remembering which sections of text have no newlines.  */
 600
 601 /* If the user has requested the long scans caching, make sure it's on.
 602    Otherwise, make sure it's off.
 603    This is our cheezy way of associating an action with the change of
 604    state of a buffer-local variable.  */
 605 static void
 606 newline_cache_on_off (struct buffer *buf)
 607 {
 608   if (NILP (BVAR (buf, cache_long_scans)))
 609     {
 610       /* It should be off.  */
 611       if (buf->newline_cache)
 612         {
 613           free_region_cache (buf->newline_cache);
 614           buf->newline_cache = 0;
 615         }
 616     }
 617   else
 618     {
 619       /* It should be on.  */
 620       if (buf->newline_cache == 0)
 621         buf->newline_cache = new_region_cache ();
 622     }
 623 }
 624
 625 \f
 626 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
 627
 628    If COUNT is positive, search forwards; END must be >= START.
 629    If COUNT is negative, search backwards for the -COUNTth instance;
 630       END must be <= START.
 631    If COUNT is zero, do anything you please; run rogue, for all I care.
 632
 633    If END is zero, use BEGV or ZV instead, as appropriate for the
 634    direction indicated by COUNT.
 635
 636    If we find COUNT instances, set *SHORTAGE to zero, and return the
 637    position past the COUNTth match.  Note that for reverse motion
 638    this is not the same as the usual convention for Emacs motion commands.
 639
 640    If we don't find COUNT instances before reaching END, set *SHORTAGE
 641    to the number of newlines left unfound, and return END.
 642
 643    If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
 644    to the returned character position.
 645
 646    If ALLOW_QUIT, set immediate_quit.  That's good to do
 647    except when inside redisplay.  */
 648
 649 ptrdiff_t
 650 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
 651               ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
 652               ptrdiff_t *bytepos, bool allow_quit)
 653 {
 654   struct region_cache *newline_cache;
 655   int direction;
 656
 657   if (count > 0)
 658     {
 659       direction = 1;
 660       if (!end)
 661         end = ZV, end_byte = ZV_BYTE;
 662     }
 663   else
 664     {
 665       direction = -1;
 666       if (!end)
 667         end = BEGV, end_byte = BEGV_BYTE;
 668     }
 669   if (end_byte == -1)
 670     end_byte = CHAR_TO_BYTE (end);
 671
 672   newline_cache_on_off (current_buffer);
 673   newline_cache = current_buffer->newline_cache;
 674
 675   if (shortage != 0)
 676     *shortage = 0;
 677
 678   immediate_quit = allow_quit;
 679
 680   if (count > 0)
 681     while (start != end)
 682       {
 683         /* Our innermost scanning loop is very simple; it doesn't know
 684            about gaps, buffer ends, or the newline cache.  ceiling is
 685            the position of the last character before the next such
 686            obstacle --- the last character the dumb search loop should
 687            examine.  */
 688         ptrdiff_t tem, ceiling_byte = end_byte - 1;
 689
 690         /* If we're looking for a newline, consult the newline cache
 691            to see where we can avoid some scanning.  */
 692         if (newline_cache)
 693           {
 694             ptrdiff_t next_change;
 695             immediate_quit = 0;
 696             while (region_cache_forward
 697                    (current_buffer, newline_cache, start, &next_change))
 698               start = next_change;
 699             immediate_quit = allow_quit;
 700
 701             start_byte = CHAR_TO_BYTE (start);
 702
 703             /* START should never be after END.  */
 704             if (start_byte > ceiling_byte)
 705               start_byte = ceiling_byte;
 706
 707             /* Now the text after start is an unknown region, and
 708                next_change is the position of the next known region. */
 709             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
 710           }
 711         else if (start_byte == -1)
 712           start_byte = CHAR_TO_BYTE (start);
 713
 714         /* The dumb loop can only scan text stored in contiguous
 715            bytes. BUFFER_CEILING_OF returns the last character
 716            position that is contiguous, so the ceiling is the
 717            position after that.  */
 718         tem = BUFFER_CEILING_OF (start_byte);
 719         ceiling_byte = min (tem, ceiling_byte);
 720
 721         {
 722           /* The termination address of the dumb loop.  */
 723           register unsigned char *ceiling_addr
 724             = BYTE_POS_ADDR (ceiling_byte) + 1;
 725           register unsigned char *cursor
 726             = BYTE_POS_ADDR (start_byte);
 727           unsigned char *base = cursor;
 728
 729           while (cursor < ceiling_addr)
 730             {
 731               /* The dumb loop.  */
 732               unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
 733
 734               /* If we're looking for newlines, cache the fact that
 735                  the region from start to cursor is free of them. */
 736               if (newline_cache)
 737                 {
 738                   unsigned char *low = cursor;
 739                   unsigned char *lim = nl ? nl : ceiling_addr;
 740                   know_region_cache (current_buffer, newline_cache,
 741                                      BYTE_TO_CHAR (low - base + start_byte),
 742                                      BYTE_TO_CHAR (lim - base + start_byte));
 743                 }
 744
 745               if (! nl)
 746                 break;
 747
 748               if (--count == 0)
 749                 {
 750                   immediate_quit = 0;
 751                   if (bytepos)
 752                     *bytepos = nl + 1 - base + start_byte;
 753                   return BYTE_TO_CHAR (nl + 1 - base + start_byte);
 754                 }
 755               cursor = nl + 1;
 756             }
 757
 758           start_byte += ceiling_addr - base;
 759           start = BYTE_TO_CHAR (start_byte);
 760         }
 761       }
 762   else
 763     while (start > end)
 764       {
 765         /* The last character to check before the next obstacle.  */
 766         ptrdiff_t tem, ceiling_byte = end_byte;
 767
 768         /* Consult the newline cache, if appropriate.  */
 769         if (newline_cache)
 770           {
 771             ptrdiff_t next_change;
 772             immediate_quit = 0;
 773             while (region_cache_backward
 774                    (current_buffer, newline_cache, start, &next_change))
 775               start = next_change;
 776             immediate_quit = allow_quit;
 777
 778             start_byte = CHAR_TO_BYTE (start);
 779
 780             /* Start should never be at or before end.  */
 781             if (start_byte <= ceiling_byte)
 782               start_byte = ceiling_byte + 1;
 783
 784             /* Now the text before start is an unknown region, and
 785                next_change is the position of the next known region. */
 786             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
 787           }
 788         else if (start_byte == -1)
 789           start_byte = CHAR_TO_BYTE (start);
 790
 791         /* Stop scanning before the gap.  */
 792         tem = BUFFER_FLOOR_OF (start_byte - 1);
 793         ceiling_byte = max (tem, ceiling_byte);
 794
 795         {
 796           /* The termination address of the dumb loop.  */
 797           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 798           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 799           unsigned char *base = cursor;
 800
 801           while (cursor >= ceiling_addr)
 802             {
 803               unsigned char *nl = memrchr (ceiling_addr, '\n',
 804                                            cursor + 1 - ceiling_addr);
 805
 806               /* If we're looking for newlines, cache the fact that
 807                  the region from after the cursor to start is free of them.  */
 808               if (newline_cache)
 809                 {
 810                   unsigned char *low = nl ? nl : ceiling_addr - 1;
 811                   unsigned char *lim = cursor;
 812                   know_region_cache (current_buffer, newline_cache,
 813                                      BYTE_TO_CHAR (low - base + start_byte),
 814                                      BYTE_TO_CHAR (lim - base + start_byte));
 815                 }
 816
 817               if (! nl)
 818                 break;
 819
 820               if (++count >= 0)
 821                 {
 822                   immediate_quit = 0;
 823                   if (bytepos)
 824                     *bytepos = nl - base + start_byte;
 825                   return BYTE_TO_CHAR (nl - base + start_byte);
 826                 }
 827               cursor = nl - 1;
 828             }
 829
 830           start_byte += ceiling_addr - 1 - base;
 831           start = BYTE_TO_CHAR (start_byte);
 832         }
 833       }
 834
 835   immediate_quit = 0;
 836   if (shortage)
 837     *shortage = count * direction;
 838   if (bytepos)
 839     {
 840       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
 841       eassert (*bytepos == CHAR_TO_BYTE (start));
 842     }
 843   return start;
 844 }
 845 \f
 846 /* Search for COUNT instances of a line boundary.
 847    Start at START.  If COUNT is negative, search backwards.
 848
 849    We report the resulting position by calling TEMP_SET_PT_BOTH.
 850
 851    If we find COUNT instances. we position after (always after,
 852    even if scanning backwards) the COUNTth match, and return 0.
 853
 854    If we don't find COUNT instances before reaching the end of the
 855    buffer (or the beginning, if scanning backwards), we return
 856    the number of line boundaries left unfound, and position at
 857    the limit we bumped up against.
 858
 859    If ALLOW_QUIT, set immediate_quit.  That's good to do
 860    except in special cases.  */
 861
 862 ptrdiff_t
 863 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
 864               ptrdiff_t limit, ptrdiff_t limit_byte,
 865               ptrdiff_t count, bool allow_quit)
 866 {
 867   ptrdiff_t charpos, bytepos, shortage;
 868
 869   charpos = find_newline (start, start_byte, limit, limit_byte,
 870                           count, &shortage, &bytepos, allow_quit);
 871   if (shortage)
 872     TEMP_SET_PT_BOTH (limit, limit_byte);
 873   else
 874     TEMP_SET_PT_BOTH (charpos, bytepos);
 875   return shortage;
 876 }
 877
 878 /* Like find_newline, but doesn't allow QUITting and doesn't return
 879    SHORTAGE.  */
 880 ptrdiff_t
 881 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
 882                       ptrdiff_t cnt, ptrdiff_t *bytepos)
 883 {
 884   return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
 885 }
 886
 887 /* Like find_newline, but returns position before the newline, not
 888    after, and only search up to TO.
 889    This isn't just find_newline_no_quit (...)-1, because you might hit TO.  */
 890
 891 ptrdiff_t
 892 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
 893                           ptrdiff_t cnt, ptrdiff_t *bytepos)
 894 {
 895   ptrdiff_t shortage;
 896   ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1);
 897
 898   if (shortage == 0)
 899     {
 900       if (bytepos)
 901         DEC_BOTH (pos, *bytepos);
 902       else
 903         pos--;
 904     }
 905   return pos;
 906 }
 907 \f
 908 /* Subroutines of Lisp buffer search functions. */
 909
 910 static Lisp_Object
 911 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
 912                 Lisp_Object count, int direction, int RE, bool posix)
 913 {
 914   EMACS_INT np;
 915   EMACS_INT lim;
 916   ptrdiff_t lim_byte;
 917   EMACS_INT n = direction;
 918
 919   if (!NILP (count))
 920     {
 921       CHECK_NUMBER (count);
 922       n *= XINT (count);
 923     }
 924
 925   CHECK_STRING (string);
 926   if (NILP (bound))
 927     {
 928       if (n > 0)
 929         lim = ZV, lim_byte = ZV_BYTE;
 930       else
 931         lim = BEGV, lim_byte = BEGV_BYTE;
 932     }
 933   else
 934     {
 935       CHECK_NUMBER_COERCE_MARKER (bound);
 936       lim = XINT (bound);
 937       if (n > 0 ? lim < PT : lim > PT)
 938         error ("Invalid search bound (wrong side of point)");
 939       if (lim > ZV)
 940         lim = ZV, lim_byte = ZV_BYTE;
 941       else if (lim < BEGV)
 942         lim = BEGV, lim_byte = BEGV_BYTE;
 943       else
 944         lim_byte = CHAR_TO_BYTE (lim);
 945     }
 946
 947   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 948   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 949                          BVAR (current_buffer, case_eqv_table));
 950
 951   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 952                       (!NILP (BVAR (current_buffer, case_fold_search))
 953                        ? BVAR (current_buffer, case_canon_table)
 954                        : Qnil),
 955                       (!NILP (BVAR (current_buffer, case_fold_search))
 956                        ? BVAR (current_buffer, case_eqv_table)
 957                        : Qnil),
 958                       posix);
 959   if (np <= 0)
 960     {
 961       if (NILP (noerror))
 962         xsignal1 (Qsearch_failed, string);
 963
 964       if (!EQ (noerror, Qt))
 965         {
 966           eassert (BEGV <= lim && lim <= ZV);
 967           SET_PT_BOTH (lim, lim_byte);
 968           return Qnil;
 969 #if 0 /* This would be clean, but maybe programs depend on
 970          a value of nil here.  */
 971           np = lim;
 972 #endif
 973         }
 974       else
 975         return Qnil;
 976     }
 977
 978   eassert (BEGV <= np && np <= ZV);
 979   SET_PT (np);
 980
 981   return make_number (np);
 982 }
 983 \f
 984 /* Return true if REGEXP it matches just one constant string.  */
 985
 986 static bool
 987 trivial_regexp_p (Lisp_Object regexp)
 988 {
 989   ptrdiff_t len = SBYTES (regexp);
 990   unsigned char *s = SDATA (regexp);
 991   while (--len >= 0)
 992     {
 993       switch (*s++)
 994         {
 995         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 996           return 0;
 997         case '\\':
 998           if (--len < 0)
 999             return 0;
1000           switch (*s++)
1001             {
1002             case '|': case '(': case ')': case '`': case '\'': case 'b':
1003             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1004             case 'S': case '=': case '{': case '}': case '_':
1005             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1006             case '1': case '2': case '3': case '4': case '5':
1007             case '6': case '7': case '8': case '9':
1008               return 0;
1009             }
1010         }
1011     }
1012   return 1;
1013 }
1014
1015 /* Search for the n'th occurrence of STRING in the current buffer,
1016    starting at position POS and stopping at position LIM,
1017    treating STRING as a literal string if RE is false or as
1018    a regular expression if RE is true.
1019
1020    If N is positive, searching is forward and LIM must be greater than POS.
1021    If N is negative, searching is backward and LIM must be less than POS.
1022
1023    Returns -x if x occurrences remain to be found (x > 0),
1024    or else the position at the beginning of the Nth occurrence
1025    (if searching backward) or the end (if searching forward).
1026
1027    POSIX is nonzero if we want full backtracking (POSIX style)
1028    for this pattern.  0 means backtrack only enough to get a valid match.  */
1029
1030 #define TRANSLATE(out, trt, d)                  \
1031 do                                              \
1032   {                                             \
1033     if (! NILP (trt))                           \
1034       {                                         \
1035         Lisp_Object temp;                       \
1036         temp = Faref (trt, make_number (d));    \
1037         if (INTEGERP (temp))                    \
1038           out = XINT (temp);                    \
1039         else                                    \
1040           out = d;                              \
1041       }                                         \
1042     else                                        \
1043       out = d;                                  \
1044   }                                             \
1045 while (0)
1046
1047 /* Only used in search_buffer, to record the end position of the match
1048    when searching regexps and SEARCH_REGS should not be changed
1049    (i.e. Vinhibit_changing_match_data is non-nil).  */
1050 static struct re_registers search_regs_1;
1051
1052 static EMACS_INT
1053 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1054                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1055                int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1056 {
1057   ptrdiff_t len = SCHARS (string);
1058   ptrdiff_t len_byte = SBYTES (string);
1059   register ptrdiff_t i;
1060
1061   if (running_asynch_code)
1062     save_search_regs ();
1063
1064   /* Searching 0 times means don't move.  */
1065   /* Null string is found at starting position.  */
1066   if (len == 0 || n == 0)
1067     {
1068       set_search_regs (pos_byte, 0);
1069       return pos;
1070     }
1071
1072   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1073     {
1074       unsigned char *p1, *p2;
1075       ptrdiff_t s1, s2;
1076       struct re_pattern_buffer *bufp;
1077
1078       bufp = compile_pattern (string,
1079                               (NILP (Vinhibit_changing_match_data)
1080                                ? &search_regs : &search_regs_1),
1081                               trt, posix,
1082                               !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1083
1084       immediate_quit = 1;       /* Quit immediately if user types ^G,
1085                                    because letting this function finish
1086                                    can take too long. */
1087       QUIT;                     /* Do a pending quit right away,
1088                                    to avoid paradoxical behavior */
1089       /* Get pointers and sizes of the two strings
1090          that make up the visible portion of the buffer. */
1091
1092       p1 = BEGV_ADDR;
1093       s1 = GPT_BYTE - BEGV_BYTE;
1094       p2 = GAP_END_ADDR;
1095       s2 = ZV_BYTE - GPT_BYTE;
1096       if (s1 < 0)
1097         {
1098           p2 = p1;
1099           s2 = ZV_BYTE - BEGV_BYTE;
1100           s1 = 0;
1101         }
1102       if (s2 < 0)
1103         {
1104           s1 = ZV_BYTE - BEGV_BYTE;
1105           s2 = 0;
1106         }
1107       re_match_object = Qnil;
1108
1109       while (n < 0)
1110         {
1111           ptrdiff_t val;
1112
1113           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1114                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1115                              (NILP (Vinhibit_changing_match_data)
1116                               ? &search_regs : &search_regs_1),
1117                              /* Don't allow match past current point */
1118                              pos_byte - BEGV_BYTE);
1119           if (val == -2)
1120             {
1121               matcher_overflow ();
1122             }
1123           if (val >= 0)
1124             {
1125               if (NILP (Vinhibit_changing_match_data))
1126                 {
1127                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1128                   for (i = 0; i < search_regs.num_regs; i++)
1129                     if (search_regs.start[i] >= 0)
1130                       {
1131                         search_regs.start[i]
1132                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1133                         search_regs.end[i]
1134                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1135                       }
1136                   XSETBUFFER (last_thing_searched, current_buffer);
1137                   /* Set pos to the new position. */
1138                   pos = search_regs.start[0];
1139                 }
1140               else
1141                 {
1142                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1143                   /* Set pos to the new position.  */
1144                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1145                 }
1146             }
1147           else
1148             {
1149               immediate_quit = 0;
1150               return (n);
1151             }
1152           n++;
1153         }
1154       while (n > 0)
1155         {
1156           ptrdiff_t val;
1157
1158           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1159                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1160                              (NILP (Vinhibit_changing_match_data)
1161                               ? &search_regs : &search_regs_1),
1162                              lim_byte - BEGV_BYTE);
1163           if (val == -2)
1164             {
1165               matcher_overflow ();
1166             }
1167           if (val >= 0)
1168             {
1169               if (NILP (Vinhibit_changing_match_data))
1170                 {
1171                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1172                   for (i = 0; i < search_regs.num_regs; i++)
1173                     if (search_regs.start[i] >= 0)
1174                       {
1175                         search_regs.start[i]
1176                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1177                         search_regs.end[i]
1178                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1179                       }
1180                   XSETBUFFER (last_thing_searched, current_buffer);
1181                   pos = search_regs.end[0];
1182                 }
1183               else
1184                 {
1185                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1186                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1187                 }
1188             }
1189           else
1190             {
1191               immediate_quit = 0;
1192               return (0 - n);
1193             }
1194           n--;
1195         }
1196       immediate_quit = 0;
1197       return (pos);
1198     }
1199   else                          /* non-RE case */
1200     {
1201       unsigned char *raw_pattern, *pat;
1202       ptrdiff_t raw_pattern_size;
1203       ptrdiff_t raw_pattern_size_byte;
1204       unsigned char *patbuf;
1205       bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1206       unsigned char *base_pat;
1207       /* Set to positive if we find a non-ASCII char that need
1208          translation.  Otherwise set to zero later.  */
1209       int char_base = -1;
1210       bool boyer_moore_ok = 1;
1211
1212       /* MULTIBYTE says whether the text to be searched is multibyte.
1213          We must convert PATTERN to match that, or we will not really
1214          find things right.  */
1215
1216       if (multibyte == STRING_MULTIBYTE (string))
1217         {
1218           raw_pattern = SDATA (string);
1219           raw_pattern_size = SCHARS (string);
1220           raw_pattern_size_byte = SBYTES (string);
1221         }
1222       else if (multibyte)
1223         {
1224           raw_pattern_size = SCHARS (string);
1225           raw_pattern_size_byte
1226             = count_size_as_multibyte (SDATA (string),
1227                                        raw_pattern_size);
1228           raw_pattern = alloca (raw_pattern_size_byte + 1);
1229           copy_text (SDATA (string), raw_pattern,
1230                      SCHARS (string), 0, 1);
1231         }
1232       else
1233         {
1234           /* Converting multibyte to single-byte.
1235
1236              ??? Perhaps this conversion should be done in a special way
1237              by subtracting nonascii-insert-offset from each non-ASCII char,
1238              so that only the multibyte chars which really correspond to
1239              the chosen single-byte character set can possibly match.  */
1240           raw_pattern_size = SCHARS (string);
1241           raw_pattern_size_byte = SCHARS (string);
1242           raw_pattern = alloca (raw_pattern_size + 1);
1243           copy_text (SDATA (string), raw_pattern,
1244                      SBYTES (string), 1, 0);
1245         }
1246
1247       /* Copy and optionally translate the pattern.  */
1248       len = raw_pattern_size;
1249       len_byte = raw_pattern_size_byte;
1250       patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
1251       pat = patbuf;
1252       base_pat = raw_pattern;
1253       if (multibyte)
1254         {
1255           /* Fill patbuf by translated characters in STRING while
1256              checking if we can use boyer-moore search.  If TRT is
1257              non-nil, we can use boyer-moore search only if TRT can be
1258              represented by the byte array of 256 elements.  For that,
1259              all non-ASCII case-equivalents of all case-sensitive
1260              characters in STRING must belong to the same character
1261              group (two characters belong to the same group iff their
1262              multibyte forms are the same except for the last byte;
1263              i.e. every 64 characters form a group; U+0000..U+003F,
1264              U+0040..U+007F, U+0080..U+00BF, ...).  */
1265
1266           while (--len >= 0)
1267             {
1268               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1269               int c, translated, inverse;
1270               int in_charlen, charlen;
1271
1272               /* If we got here and the RE flag is set, it's because we're
1273                  dealing with a regexp known to be trivial, so the backslash
1274                  just quotes the next character.  */
1275               if (RE && *base_pat == '\\')
1276                 {
1277                   len--;
1278                   raw_pattern_size--;
1279                   len_byte--;
1280                   base_pat++;
1281                 }
1282
1283               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1284
1285               if (NILP (trt))
1286                 {
1287                   str = base_pat;
1288                   charlen = in_charlen;
1289                 }
1290               else
1291                 {
1292                   /* Translate the character.  */
1293                   TRANSLATE (translated, trt, c);
1294                   charlen = CHAR_STRING (translated, str_base);
1295                   str = str_base;
1296
1297                   /* Check if C has any other case-equivalents.  */
1298                   TRANSLATE (inverse, inverse_trt, c);
1299                   /* If so, check if we can use boyer-moore.  */
1300                   if (c != inverse && boyer_moore_ok)
1301                     {
1302                       /* Check if all equivalents belong to the same
1303                          group of characters.  Note that the check of C
1304                          itself is done by the last iteration.  */
1305                       int this_char_base = -1;
1306
1307                       while (boyer_moore_ok)
1308                         {
1309                           if (ASCII_BYTE_P (inverse))
1310                             {
1311                               if (this_char_base > 0)
1312                                 boyer_moore_ok = 0;
1313                               else
1314                                 this_char_base = 0;
1315                             }
1316                           else if (CHAR_BYTE8_P (inverse))
1317                             /* Boyer-moore search can't handle a
1318                                translation of an eight-bit
1319                                character.  */
1320                             boyer_moore_ok = 0;
1321                           else if (this_char_base < 0)
1322                             {
1323                               this_char_base = inverse & ~0x3F;
1324                               if (char_base < 0)
1325                                 char_base = this_char_base;
1326                               else if (this_char_base != char_base)
1327                                 boyer_moore_ok = 0;
1328                             }
1329                           else if ((inverse & ~0x3F) != this_char_base)
1330                             boyer_moore_ok = 0;
1331                           if (c == inverse)
1332                             break;
1333                           TRANSLATE (inverse, inverse_trt, inverse);
1334                         }
1335                     }
1336                 }
1337
1338               /* Store this character into the translated pattern.  */
1339               memcpy (pat, str, charlen);
1340               pat += charlen;
1341               base_pat += in_charlen;
1342               len_byte -= in_charlen;
1343             }
1344
1345           /* If char_base is still negative we didn't find any translated
1346              non-ASCII characters.  */
1347           if (char_base < 0)
1348             char_base = 0;
1349         }
1350       else
1351         {
1352           /* Unibyte buffer.  */
1353           char_base = 0;
1354           while (--len >= 0)
1355             {
1356               int c, translated, inverse;
1357
1358               /* If we got here and the RE flag is set, it's because we're
1359                  dealing with a regexp known to be trivial, so the backslash
1360                  just quotes the next character.  */
1361               if (RE && *base_pat == '\\')
1362                 {
1363                   len--;
1364                   raw_pattern_size--;
1365                   base_pat++;
1366                 }
1367               c = *base_pat++;
1368               TRANSLATE (translated, trt, c);
1369               *pat++ = translated;
1370               /* Check that none of C's equivalents violates the
1371                  assumptions of boyer_moore.  */
1372               TRANSLATE (inverse, inverse_trt, c);
1373               while (1)
1374                 {
1375                   if (inverse >= 0200)
1376                     {
1377                       boyer_moore_ok = 0;
1378                       break;
1379                     }
1380                   if (c == inverse)
1381                     break;
1382                   TRANSLATE (inverse, inverse_trt, inverse);
1383                 }
1384             }
1385         }
1386
1387       len_byte = pat - patbuf;
1388       pat = base_pat = patbuf;
1389
1390       if (boyer_moore_ok)
1391         return boyer_moore (n, pat, len_byte, trt, inverse_trt,
1392                             pos_byte, lim_byte,
1393                             char_base);
1394       else
1395         return simple_search (n, pat, raw_pattern_size, len_byte, trt,
1396                               pos, pos_byte, lim, lim_byte);
1397     }
1398 }
1399 \f
1400 /* Do a simple string search N times for the string PAT,
1401    whose length is LEN/LEN_BYTE,
1402    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1403    TRT is the translation table.
1404
1405    Return the character position where the match is found.
1406    Otherwise, if M matches remained to be found, return -M.
1407
1408    This kind of search works regardless of what is in PAT and
1409    regardless of what is in TRT.  It is used in cases where
1410    boyer_moore cannot work.  */
1411
1412 static EMACS_INT
1413 simple_search (EMACS_INT n, unsigned char *pat,
1414                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1415                ptrdiff_t pos, ptrdiff_t pos_byte,
1416                ptrdiff_t lim, ptrdiff_t lim_byte)
1417 {
1418   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1419   bool forward = n > 0;
1420   /* Number of buffer bytes matched.  Note that this may be different
1421      from len_byte in a multibyte buffer.  */
1422   ptrdiff_t match_byte = PTRDIFF_MIN;
1423
1424   if (lim > pos && multibyte)
1425     while (n > 0)
1426       {
1427         while (1)
1428           {
1429             /* Try matching at position POS.  */
1430             ptrdiff_t this_pos = pos;
1431             ptrdiff_t this_pos_byte = pos_byte;
1432             ptrdiff_t this_len = len;
1433             unsigned char *p = pat;
1434             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1435               goto stop;
1436
1437             while (this_len > 0)
1438               {
1439                 int charlen, buf_charlen;
1440                 int pat_ch, buf_ch;
1441
1442                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1443                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1444                                                  buf_charlen);
1445                 TRANSLATE (buf_ch, trt, buf_ch);
1446
1447                 if (buf_ch != pat_ch)
1448                   break;
1449
1450                 this_len--;
1451                 p += charlen;
1452
1453                 this_pos_byte += buf_charlen;
1454                 this_pos++;
1455               }
1456
1457             if (this_len == 0)
1458               {
1459                 match_byte = this_pos_byte - pos_byte;
1460                 pos += len;
1461                 pos_byte += match_byte;
1462                 break;
1463               }
1464
1465             INC_BOTH (pos, pos_byte);
1466           }
1467
1468         n--;
1469       }
1470   else if (lim > pos)
1471     while (n > 0)
1472       {
1473         while (1)
1474           {
1475             /* Try matching at position POS.  */
1476             ptrdiff_t this_pos = pos;
1477             ptrdiff_t this_len = len;
1478             unsigned char *p = pat;
1479
1480             if (pos + len > lim)
1481               goto stop;
1482
1483             while (this_len > 0)
1484               {
1485                 int pat_ch = *p++;
1486                 int buf_ch = FETCH_BYTE (this_pos);
1487                 TRANSLATE (buf_ch, trt, buf_ch);
1488
1489                 if (buf_ch != pat_ch)
1490                   break;
1491
1492                 this_len--;
1493                 this_pos++;
1494               }
1495
1496             if (this_len == 0)
1497               {
1498                 match_byte = len;
1499                 pos += len;
1500                 break;
1501               }
1502
1503             pos++;
1504           }
1505
1506         n--;
1507       }
1508   /* Backwards search.  */
1509   else if (lim < pos && multibyte)
1510     while (n < 0)
1511       {
1512         while (1)
1513           {
1514             /* Try matching at position POS.  */
1515             ptrdiff_t this_pos = pos;
1516             ptrdiff_t this_pos_byte = pos_byte;
1517             ptrdiff_t this_len = len;
1518             const unsigned char *p = pat + len_byte;
1519
1520             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1521               goto stop;
1522
1523             while (this_len > 0)
1524               {
1525                 int pat_ch, buf_ch;
1526
1527                 DEC_BOTH (this_pos, this_pos_byte);
1528                 PREV_CHAR_BOUNDARY (p, pat);
1529                 pat_ch = STRING_CHAR (p);
1530                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1531                 TRANSLATE (buf_ch, trt, buf_ch);
1532
1533                 if (buf_ch != pat_ch)
1534                   break;
1535
1536                 this_len--;
1537               }
1538
1539             if (this_len == 0)
1540               {
1541                 match_byte = pos_byte - this_pos_byte;
1542                 pos = this_pos;
1543                 pos_byte = this_pos_byte;
1544                 break;
1545               }
1546
1547             DEC_BOTH (pos, pos_byte);
1548           }
1549
1550         n++;
1551       }
1552   else if (lim < pos)
1553     while (n < 0)
1554       {
1555         while (1)
1556           {
1557             /* Try matching at position POS.  */
1558             ptrdiff_t this_pos = pos - len;
1559             ptrdiff_t this_len = len;
1560             unsigned char *p = pat;
1561
1562             if (this_pos < lim)
1563               goto stop;
1564
1565             while (this_len > 0)
1566               {
1567                 int pat_ch = *p++;
1568                 int buf_ch = FETCH_BYTE (this_pos);
1569                 TRANSLATE (buf_ch, trt, buf_ch);
1570
1571                 if (buf_ch != pat_ch)
1572                   break;
1573                 this_len--;
1574                 this_pos++;
1575               }
1576
1577             if (this_len == 0)
1578               {
1579                 match_byte = len;
1580                 pos -= len;
1581                 break;
1582               }
1583
1584             pos--;
1585           }
1586
1587         n++;
1588       }
1589
1590  stop:
1591   if (n == 0)
1592     {
1593       eassert (match_byte != PTRDIFF_MIN);
1594       if (forward)
1595         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1596       else
1597         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1598
1599       return pos;
1600     }
1601   else if (n > 0)
1602     return -n;
1603   else
1604     return n;
1605 }
1606 \f
1607 /* Do Boyer-Moore search N times for the string BASE_PAT,
1608    whose length is LEN_BYTE,
1609    from buffer position POS_BYTE until LIM_BYTE.
1610    DIRECTION says which direction we search in.
1611    TRT and INVERSE_TRT are translation tables.
1612    Characters in PAT are already translated by TRT.
1613
1614    This kind of search works if all the characters in BASE_PAT that
1615    have nontrivial translation are the same aside from the last byte.
1616    This makes it possible to translate just the last byte of a
1617    character, and do so after just a simple test of the context.
1618    CHAR_BASE is nonzero if there is such a non-ASCII character.
1619
1620    If that criterion is not satisfied, do not call this function.  */
1621
1622 static EMACS_INT
1623 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1624              ptrdiff_t len_byte,
1625              Lisp_Object trt, Lisp_Object inverse_trt,
1626              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1627              int char_base)
1628 {
1629   int direction = ((n > 0) ? 1 : -1);
1630   register ptrdiff_t dirlen;
1631   ptrdiff_t limit;
1632   int stride_for_teases = 0;
1633   int BM_tab[0400];
1634   register unsigned char *cursor, *p_limit;
1635   register ptrdiff_t i;
1636   register int j;
1637   unsigned char *pat, *pat_end;
1638   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1639
1640   unsigned char simple_translate[0400];
1641   /* These are set to the preceding bytes of a byte to be translated
1642      if char_base is nonzero.  As the maximum byte length of a
1643      multibyte character is 5, we have to check at most four previous
1644      bytes.  */
1645   int translate_prev_byte1 = 0;
1646   int translate_prev_byte2 = 0;
1647   int translate_prev_byte3 = 0;
1648
1649   /* The general approach is that we are going to maintain that we know
1650      the first (closest to the present position, in whatever direction
1651      we're searching) character that could possibly be the last
1652      (furthest from present position) character of a valid match.  We
1653      advance the state of our knowledge by looking at that character
1654      and seeing whether it indeed matches the last character of the
1655      pattern.  If it does, we take a closer look.  If it does not, we
1656      move our pointer (to putative last characters) as far as is
1657      logically possible.  This amount of movement, which I call a
1658      stride, will be the length of the pattern if the actual character
1659      appears nowhere in the pattern, otherwise it will be the distance
1660      from the last occurrence of that character to the end of the
1661      pattern.  If the amount is zero we have a possible match.  */
1662
1663   /* Here we make a "mickey mouse" BM table.  The stride of the search
1664      is determined only by the last character of the putative match.
1665      If that character does not match, we will stride the proper
1666      distance to propose a match that superimposes it on the last
1667      instance of a character that matches it (per trt), or misses
1668      it entirely if there is none. */
1669
1670   dirlen = len_byte * direction;
1671
1672   /* Record position after the end of the pattern.  */
1673   pat_end = base_pat + len_byte;
1674   /* BASE_PAT points to a character that we start scanning from.
1675      It is the first character in a forward search,
1676      the last character in a backward search.  */
1677   if (direction < 0)
1678     base_pat = pat_end - 1;
1679
1680   /* A character that does not appear in the pattern induces a
1681      stride equal to the pattern length.  */
1682   for (i = 0; i < 0400; i++)
1683     BM_tab[i] = dirlen;
1684
1685   /* We use this for translation, instead of TRT itself.
1686      We fill this in to handle the characters that actually
1687      occur in the pattern.  Others don't matter anyway!  */
1688   for (i = 0; i < 0400; i++)
1689     simple_translate[i] = i;
1690
1691   if (char_base)
1692     {
1693       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1694          byte following them are the target of translation.  */
1695       unsigned char str[MAX_MULTIBYTE_LENGTH];
1696       int cblen = CHAR_STRING (char_base, str);
1697
1698       translate_prev_byte1 = str[cblen - 2];
1699       if (cblen > 2)
1700         {
1701           translate_prev_byte2 = str[cblen - 3];
1702           if (cblen > 3)
1703             translate_prev_byte3 = str[cblen - 4];
1704         }
1705     }
1706
1707   i = 0;
1708   while (i != dirlen)
1709     {
1710       unsigned char *ptr = base_pat + i;
1711       i += direction;
1712       if (! NILP (trt))
1713         {
1714           /* If the byte currently looking at is the last of a
1715              character to check case-equivalents, set CH to that
1716              character.  An ASCII character and a non-ASCII character
1717              matching with CHAR_BASE are to be checked.  */
1718           int ch = -1;
1719
1720           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1721             ch = *ptr;
1722           else if (char_base
1723                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1724             {
1725               unsigned char *charstart = ptr - 1;
1726
1727               while (! (CHAR_HEAD_P (*charstart)))
1728                 charstart--;
1729               ch = STRING_CHAR (charstart);
1730               if (char_base != (ch & ~0x3F))
1731                 ch = -1;
1732             }
1733
1734           if (ch >= 0200 && multibyte)
1735             j = (ch & 0x3F) | 0200;
1736           else
1737             j = *ptr;
1738
1739           if (i == dirlen)
1740             stride_for_teases = BM_tab[j];
1741
1742           BM_tab[j] = dirlen - i;
1743           /* A translation table is accompanied by its inverse -- see
1744              comment following downcase_table for details.  */
1745           if (ch >= 0)
1746             {
1747               int starting_ch = ch;
1748               int starting_j = j;
1749
1750               while (1)
1751                 {
1752                   TRANSLATE (ch, inverse_trt, ch);
1753                   if (ch >= 0200 && multibyte)
1754                     j = (ch & 0x3F) | 0200;
1755                   else
1756                     j = ch;
1757
1758                   /* For all the characters that map into CH,
1759                      set up simple_translate to map the last byte
1760                      into STARTING_J.  */
1761                   simple_translate[j] = starting_j;
1762                   if (ch == starting_ch)
1763                     break;
1764                   BM_tab[j] = dirlen - i;
1765                 }
1766             }
1767         }
1768       else
1769         {
1770           j = *ptr;
1771
1772           if (i == dirlen)
1773             stride_for_teases = BM_tab[j];
1774           BM_tab[j] = dirlen - i;
1775         }
1776       /* stride_for_teases tells how much to stride if we get a
1777          match on the far character but are subsequently
1778          disappointed, by recording what the stride would have been
1779          for that character if the last character had been
1780          different.  */
1781     }
1782   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1783   /* loop invariant - POS_BYTE points at where last char (first
1784      char if reverse) of pattern would align in a possible match.  */
1785   while (n != 0)
1786     {
1787       ptrdiff_t tail_end;
1788       unsigned char *tail_end_ptr;
1789
1790       /* It's been reported that some (broken) compiler thinks that
1791          Boolean expressions in an arithmetic context are unsigned.
1792          Using an explicit ?1:0 prevents this.  */
1793       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1794           < 0)
1795         return (n * (0 - direction));
1796       /* First we do the part we can by pointers (maybe nothing) */
1797       QUIT;
1798       pat = base_pat;
1799       limit = pos_byte - dirlen + direction;
1800       if (direction > 0)
1801         {
1802           limit = BUFFER_CEILING_OF (limit);
1803           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1804              can take on without hitting edge of buffer or the gap.  */
1805           limit = min (limit, pos_byte + 20000);
1806           limit = min (limit, lim_byte - 1);
1807         }
1808       else
1809         {
1810           limit = BUFFER_FLOOR_OF (limit);
1811           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1812              can take on without hitting edge of buffer or the gap.  */
1813           limit = max (limit, pos_byte - 20000);
1814           limit = max (limit, lim_byte);
1815         }
1816       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1817       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1818
1819       if ((limit - pos_byte) * direction > 20)
1820         {
1821           unsigned char *p2;
1822
1823           p_limit = BYTE_POS_ADDR (limit);
1824           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1825           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1826           while (1)             /* use one cursor setting as long as i can */
1827             {
1828               if (direction > 0) /* worth duplicating */
1829                 {
1830                   while (cursor <= p_limit)
1831                     {
1832                       if (BM_tab[*cursor] == 0)
1833                         goto hit;
1834                       cursor += BM_tab[*cursor];
1835                     }
1836                 }
1837               else
1838                 {
1839                   while (cursor >= p_limit)
1840                     {
1841                       if (BM_tab[*cursor] == 0)
1842                         goto hit;
1843                       cursor += BM_tab[*cursor];
1844                     }
1845                 }
1846               /* If you are here, cursor is beyond the end of the
1847                  searched region.  You fail to match within the
1848                  permitted region and would otherwise try a character
1849                  beyond that region.  */
1850               break;
1851
1852             hit:
1853               i = dirlen - direction;
1854               if (! NILP (trt))
1855                 {
1856                   while ((i -= direction) + direction != 0)
1857                     {
1858                       int ch;
1859                       cursor -= direction;
1860                       /* Translate only the last byte of a character.  */
1861                       if (! multibyte
1862                           || ((cursor == tail_end_ptr
1863                                || CHAR_HEAD_P (cursor[1]))
1864                               && (CHAR_HEAD_P (cursor[0])
1865                                   /* Check if this is the last byte of
1866                                      a translatable character.  */
1867                                   || (translate_prev_byte1 == cursor[-1]
1868                                       && (CHAR_HEAD_P (translate_prev_byte1)
1869                                           || (translate_prev_byte2 == cursor[-2]
1870                                               && (CHAR_HEAD_P (translate_prev_byte2)
1871                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1872                         ch = simple_translate[*cursor];
1873                       else
1874                         ch = *cursor;
1875                       if (pat[i] != ch)
1876                         break;
1877                     }
1878                 }
1879               else
1880                 {
1881                   while ((i -= direction) + direction != 0)
1882                     {
1883                       cursor -= direction;
1884                       if (pat[i] != *cursor)
1885                         break;
1886                     }
1887                 }
1888               cursor += dirlen - i - direction; /* fix cursor */
1889               if (i + direction == 0)
1890                 {
1891                   ptrdiff_t position, start, end;
1892
1893                   cursor -= direction;
1894
1895                   position = pos_byte + cursor - p2 + ((direction > 0)
1896                                                        ? 1 - len_byte : 0);
1897                   set_search_regs (position, len_byte);
1898
1899                   if (NILP (Vinhibit_changing_match_data))
1900                     {
1901                       start = search_regs.start[0];
1902                       end = search_regs.end[0];
1903                     }
1904                   else
1905                     /* If Vinhibit_changing_match_data is non-nil,
1906                        search_regs will not be changed.  So let's
1907                        compute start and end here.  */
1908                     {
1909                       start = BYTE_TO_CHAR (position);
1910                       end = BYTE_TO_CHAR (position + len_byte);
1911                     }
1912
1913                   if ((n -= direction) != 0)
1914                     cursor += dirlen; /* to resume search */
1915                   else
1916                     return direction > 0 ? end : start;
1917                 }
1918               else
1919                 cursor += stride_for_teases; /* <sigh> we lose -  */
1920             }
1921           pos_byte += cursor - p2;
1922         }
1923       else
1924         /* Now we'll pick up a clump that has to be done the hard
1925            way because it covers a discontinuity.  */
1926         {
1927           limit = ((direction > 0)
1928                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1929                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1930           limit = ((direction > 0)
1931                    ? min (limit + len_byte, lim_byte - 1)
1932                    : max (limit - len_byte, lim_byte));
1933           /* LIMIT is now the last value POS_BYTE can have
1934              and still be valid for a possible match.  */
1935           while (1)
1936             {
1937               /* This loop can be coded for space rather than
1938                  speed because it will usually run only once.
1939                  (the reach is at most len + 21, and typically
1940                  does not exceed len).  */
1941               while ((limit - pos_byte) * direction >= 0)
1942                 {
1943                   int ch = FETCH_BYTE (pos_byte);
1944                   if (BM_tab[ch] == 0)
1945                     goto hit2;
1946                   pos_byte += BM_tab[ch];
1947                 }
1948               break;    /* ran off the end */
1949
1950             hit2:
1951               /* Found what might be a match.  */
1952               i = dirlen - direction;
1953               while ((i -= direction) + direction != 0)
1954                 {
1955                   int ch;
1956                   unsigned char *ptr;
1957                   pos_byte -= direction;
1958                   ptr = BYTE_POS_ADDR (pos_byte);
1959                   /* Translate only the last byte of a character.  */
1960                   if (! multibyte
1961                       || ((ptr == tail_end_ptr
1962                            || CHAR_HEAD_P (ptr[1]))
1963                           && (CHAR_HEAD_P (ptr[0])
1964                               /* Check if this is the last byte of a
1965                                  translatable character.  */
1966                               || (translate_prev_byte1 == ptr[-1]
1967                                   && (CHAR_HEAD_P (translate_prev_byte1)
1968                                       || (translate_prev_byte2 == ptr[-2]
1969                                           && (CHAR_HEAD_P (translate_prev_byte2)
1970                                               || translate_prev_byte3 == ptr[-3])))))))
1971                     ch = simple_translate[*ptr];
1972                   else
1973                     ch = *ptr;
1974                   if (pat[i] != ch)
1975                     break;
1976                 }
1977               /* Above loop has moved POS_BYTE part or all the way
1978                  back to the first pos (last pos if reverse).
1979                  Set it once again at the last (first if reverse) char.  */
1980               pos_byte += dirlen - i - direction;
1981               if (i + direction == 0)
1982                 {
1983                   ptrdiff_t position, start, end;
1984                   pos_byte -= direction;
1985
1986                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1987                   set_search_regs (position, len_byte);
1988
1989                   if (NILP (Vinhibit_changing_match_data))
1990                     {
1991                       start = search_regs.start[0];
1992                       end = search_regs.end[0];
1993                     }
1994                   else
1995                     /* If Vinhibit_changing_match_data is non-nil,
1996                        search_regs will not be changed.  So let's
1997                        compute start and end here.  */
1998                     {
1999                       start = BYTE_TO_CHAR (position);
2000                       end = BYTE_TO_CHAR (position + len_byte);
2001                     }
2002
2003                   if ((n -= direction) != 0)
2004                     pos_byte += dirlen; /* to resume search */
2005                   else
2006                     return direction > 0 ? end : start;
2007                 }
2008               else
2009                 pos_byte += stride_for_teases;
2010             }
2011           }
2012       /* We have done one clump.  Can we continue? */
2013       if ((lim_byte - pos_byte) * direction < 0)
2014         return ((0 - n) * direction);
2015     }
2016   return BYTE_TO_CHAR (pos_byte);
2017 }
2018
2019 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2020    for the overall match just found in the current buffer.
2021    Also clear out the match data for registers 1 and up.  */
2022
2023 static void
2024 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2025 {
2026   ptrdiff_t i;
2027
2028   if (!NILP (Vinhibit_changing_match_data))
2029     return;
2030
2031   /* Make sure we have registers in which to store
2032      the match position.  */
2033   if (search_regs.num_regs == 0)
2034     {
2035       search_regs.start = xmalloc (2 * sizeof (regoff_t));
2036       search_regs.end = xmalloc (2 * sizeof (regoff_t));
2037       search_regs.num_regs = 2;
2038     }
2039
2040   /* Clear out the other registers.  */
2041   for (i = 1; i < search_regs.num_regs; i++)
2042     {
2043       search_regs.start[i] = -1;
2044       search_regs.end[i] = -1;
2045     }
2046
2047   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2048   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2049   XSETBUFFER (last_thing_searched, current_buffer);
2050 }
2051 \f
2052 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2053        "MSearch backward: ",
2054        doc: /* Search backward from point for STRING.
2055 Set point to the beginning of the occurrence found, and return point.
2056 An optional second argument bounds the search; it is a buffer position.
2057 The match found must not extend before that position.
2058 Optional third argument, if t, means if fail just return nil (no error).
2059  If not nil and not t, position at limit of search and return nil.
2060 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2061  successive occurrences.  If COUNT is negative, search forward,
2062  instead of backward, for -COUNT occurrences.
2063
2064 Search case-sensitivity is determined by the value of the variable
2065 `case-fold-search', which see.
2066
2067 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2068   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2069 {
2070   return search_command (string, bound, noerror, count, -1, 0, 0);
2071 }
2072
2073 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2074        doc: /* Search forward from point for STRING.
2075 Set point to the end of the occurrence found, and return point.
2076 An optional second argument bounds the search; it is a buffer position.
2077 The match found must not extend after that position.  A value of nil is
2078   equivalent to (point-max).
2079 Optional third argument, if t, means if fail just return nil (no error).
2080   If not nil and not t, move to limit of search and return nil.
2081 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2082  successive occurrences.  If COUNT is negative, search backward,
2083  instead of forward, for -COUNT occurrences.
2084
2085 Search case-sensitivity is determined by the value of the variable
2086 `case-fold-search', which see.
2087
2088 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2089   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2090 {
2091   return search_command (string, bound, noerror, count, 1, 0, 0);
2092 }
2093
2094 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2095        "sRE search backward: ",
2096        doc: /* Search backward from point for match for regular expression REGEXP.
2097 Set point to the beginning of the match, and return point.
2098 The match found is the one starting last in the buffer
2099 and yet ending before the origin of the search.
2100 An optional second argument bounds the search; it is a buffer position.
2101 The match found must start at or after that position.
2102 Optional third argument, if t, means if fail just return nil (no error).
2103   If not nil and not t, move to limit of search and return nil.
2104 Optional fourth argument is repeat count--search for successive occurrences.
2105
2106 Search case-sensitivity is determined by the value of the variable
2107 `case-fold-search', which see.
2108
2109 See also the functions `match-beginning', `match-end', `match-string',
2110 and `replace-match'.  */)
2111   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2112 {
2113   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2114 }
2115
2116 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2117        "sRE search: ",
2118        doc: /* Search forward from point for regular expression REGEXP.
2119 Set point to the end of the occurrence found, and return point.
2120 An optional second argument bounds the search; it is a buffer position.
2121 The match found must not extend after that position.
2122 Optional third argument, if t, means if fail just return nil (no error).
2123   If not nil and not t, move to limit of search and return nil.
2124 Optional fourth argument is repeat count--search for successive occurrences.
2125
2126 Search case-sensitivity is determined by the value of the variable
2127 `case-fold-search', which see.
2128
2129 See also the functions `match-beginning', `match-end', `match-string',
2130 and `replace-match'.  */)
2131   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2132 {
2133   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2134 }
2135
2136 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2137        "sPosix search backward: ",
2138        doc: /* Search backward from point for match for regular expression REGEXP.
2139 Find the longest match in accord with Posix regular expression rules.
2140 Set point to the beginning of the match, and return point.
2141 The match found is the one starting last in the buffer
2142 and yet ending before the origin of the search.
2143 An optional second argument bounds the search; it is a buffer position.
2144 The match found must start at or after that position.
2145 Optional third argument, if t, means if fail just return nil (no error).
2146   If not nil and not t, move to limit of search and return nil.
2147 Optional fourth argument is repeat count--search for successive occurrences.
2148
2149 Search case-sensitivity is determined by the value of the variable
2150 `case-fold-search', which see.
2151
2152 See also the functions `match-beginning', `match-end', `match-string',
2153 and `replace-match'.  */)
2154   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2155 {
2156   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2157 }
2158
2159 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2160        "sPosix search: ",
2161        doc: /* Search forward from point for regular expression REGEXP.
2162 Find the longest match in accord with Posix regular expression rules.
2163 Set point to the end of the occurrence found, and return point.
2164 An optional second argument bounds the search; it is a buffer position.
2165 The match found must not extend after that position.
2166 Optional third argument, if t, means if fail just return nil (no error).
2167   If not nil and not t, move to limit of search and return nil.
2168 Optional fourth argument is repeat count--search for successive occurrences.
2169
2170 Search case-sensitivity is determined by the value of the variable
2171 `case-fold-search', which see.
2172
2173 See also the functions `match-beginning', `match-end', `match-string',
2174 and `replace-match'.  */)
2175   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2176 {
2177   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2178 }
2179 \f
2180 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2181        doc: /* Replace text matched by last search with NEWTEXT.
2182 Leave point at the end of the replacement text.
2183
2184 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2185 the replacement text.  Otherwise, maybe capitalize the whole text, or
2186 maybe just word initials, based on the replaced text.  If the replaced
2187 text has only capital letters and has at least one multiletter word,
2188 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
2189 in the replaced text, capitalize each word in NEWTEXT.
2190
2191 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2192 Otherwise treat `\\' as special:
2193   `\\&' in NEWTEXT means substitute original matched text.
2194   `\\N' means substitute what matched the Nth `\\(...\\)'.
2195        If Nth parens didn't match, substitute nothing.
2196   `\\\\' means insert one `\\'.
2197   `\\?' is treated literally
2198        (for compatibility with `query-replace-regexp').
2199   Any other character following `\\' signals an error.
2200 Case conversion does not apply to these substitutions.
2201
2202 If optional fourth argument STRING is non-nil, it should be a string
2203 to act on; this should be the string on which the previous match was
2204 done via `string-match'.  In this case, `replace-match' creates and
2205 returns a new string, made by copying STRING and replacing the part of
2206 STRING that was matched (the original STRING itself is not altered).
2207
2208 The optional fifth argument SUBEXP specifies a subexpression;
2209 it says to replace just that subexpression with NEWTEXT,
2210 rather than replacing the entire matched text.
2211 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2212 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2213 NEWTEXT in place of subexp N.
2214 This is useful only after a regular expression search or match,
2215 since only regular expressions have distinguished subexpressions.  */)
2216   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2217 {
2218   enum { nochange, all_caps, cap_initial } case_action;
2219   ptrdiff_t pos, pos_byte;
2220   bool some_multiletter_word;
2221   bool some_lowercase;
2222   bool some_uppercase;
2223   bool some_nonuppercase_initial;
2224   int c, prevc;
2225   ptrdiff_t sub;
2226   ptrdiff_t opoint, newpoint;
2227
2228   CHECK_STRING (newtext);
2229
2230   if (! NILP (string))
2231     CHECK_STRING (string);
2232
2233   case_action = nochange;       /* We tried an initialization */
2234                                 /* but some C compilers blew it */
2235
2236   if (search_regs.num_regs <= 0)
2237     error ("`replace-match' called before any match found");
2238
2239   if (NILP (subexp))
2240     sub = 0;
2241   else
2242     {
2243       CHECK_NUMBER (subexp);
2244       if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
2245         args_out_of_range (subexp, make_number (search_regs.num_regs));
2246       sub = XINT (subexp);
2247     }
2248
2249   if (NILP (string))
2250     {
2251       if (search_regs.start[sub] < BEGV
2252           || search_regs.start[sub] > search_regs.end[sub]
2253           || search_regs.end[sub] > ZV)
2254         args_out_of_range (make_number (search_regs.start[sub]),
2255                            make_number (search_regs.end[sub]));
2256     }
2257   else
2258     {
2259       if (search_regs.start[sub] < 0
2260           || search_regs.start[sub] > search_regs.end[sub]
2261           || search_regs.end[sub] > SCHARS (string))
2262         args_out_of_range (make_number (search_regs.start[sub]),
2263                            make_number (search_regs.end[sub]));
2264     }
2265
2266   if (NILP (fixedcase))
2267     {
2268       /* Decide how to casify by examining the matched text. */
2269       ptrdiff_t last;
2270
2271       pos = search_regs.start[sub];
2272       last = search_regs.end[sub];
2273
2274       if (NILP (string))
2275         pos_byte = CHAR_TO_BYTE (pos);
2276       else
2277         pos_byte = string_char_to_byte (string, pos);
2278
2279       prevc = '\n';
2280       case_action = all_caps;
2281
2282       /* some_multiletter_word is set nonzero if any original word
2283          is more than one letter long. */
2284       some_multiletter_word = 0;
2285       some_lowercase = 0;
2286       some_nonuppercase_initial = 0;
2287       some_uppercase = 0;
2288
2289       while (pos < last)
2290         {
2291           if (NILP (string))
2292             {
2293               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2294               INC_BOTH (pos, pos_byte);
2295             }
2296           else
2297             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2298
2299           if (lowercasep (c))
2300             {
2301               /* Cannot be all caps if any original char is lower case */
2302
2303               some_lowercase = 1;
2304               if (SYNTAX (prevc) != Sword)
2305                 some_nonuppercase_initial = 1;
2306               else
2307                 some_multiletter_word = 1;
2308             }
2309           else if (uppercasep (c))
2310             {
2311               some_uppercase = 1;
2312               if (SYNTAX (prevc) != Sword)
2313                 ;
2314               else
2315                 some_multiletter_word = 1;
2316             }
2317           else
2318             {
2319               /* If the initial is a caseless word constituent,
2320                  treat that like a lowercase initial.  */
2321               if (SYNTAX (prevc) != Sword)
2322                 some_nonuppercase_initial = 1;
2323             }
2324
2325           prevc = c;
2326         }
2327
2328       /* Convert to all caps if the old text is all caps
2329          and has at least one multiletter word.  */
2330       if (! some_lowercase && some_multiletter_word)
2331         case_action = all_caps;
2332       /* Capitalize each word, if the old text has all capitalized words.  */
2333       else if (!some_nonuppercase_initial && some_multiletter_word)
2334         case_action = cap_initial;
2335       else if (!some_nonuppercase_initial && some_uppercase)
2336         /* Should x -> yz, operating on X, give Yz or YZ?
2337            We'll assume the latter.  */
2338         case_action = all_caps;
2339       else
2340         case_action = nochange;
2341     }
2342
2343   /* Do replacement in a string.  */
2344   if (!NILP (string))
2345     {
2346       Lisp_Object before, after;
2347
2348       before = Fsubstring (string, make_number (0),
2349                            make_number (search_regs.start[sub]));
2350       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2351
2352       /* Substitute parts of the match into NEWTEXT
2353          if desired.  */
2354       if (NILP (literal))
2355         {
2356           ptrdiff_t lastpos = 0;
2357           ptrdiff_t lastpos_byte = 0;
2358           /* We build up the substituted string in ACCUM.  */
2359           Lisp_Object accum;
2360           Lisp_Object middle;
2361           ptrdiff_t length = SBYTES (newtext);
2362
2363           accum = Qnil;
2364
2365           for (pos_byte = 0, pos = 0; pos_byte < length;)
2366             {
2367               ptrdiff_t substart = -1;
2368               ptrdiff_t subend = 0;
2369               bool delbackslash = 0;
2370
2371               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2372
2373               if (c == '\\')
2374                 {
2375                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2376
2377                   if (c == '&')
2378                     {
2379                       substart = search_regs.start[sub];
2380                       subend = search_regs.end[sub];
2381                     }
2382                   else if (c >= '1' && c <= '9')
2383                     {
2384                       if (c - '0' < search_regs.num_regs
2385                           && search_regs.start[c - '0'] >= 0)
2386                         {
2387                           substart = search_regs.start[c - '0'];
2388                           subend = search_regs.end[c - '0'];
2389                         }
2390                       else
2391                         {
2392                           /* If that subexp did not match,
2393                              replace \\N with nothing.  */
2394                           substart = 0;
2395                           subend = 0;
2396                         }
2397                     }
2398                   else if (c == '\\')
2399                     delbackslash = 1;
2400                   else if (c != '?')
2401                     error ("Invalid use of `\\' in replacement text");
2402                 }
2403               if (substart >= 0)
2404                 {
2405                   if (pos - 2 != lastpos)
2406                     middle = substring_both (newtext, lastpos,
2407                                              lastpos_byte,
2408                                              pos - 2, pos_byte - 2);
2409                   else
2410                     middle = Qnil;
2411                   accum = concat3 (accum, middle,
2412                                    Fsubstring (string,
2413                                                make_number (substart),
2414                                                make_number (subend)));
2415                   lastpos = pos;
2416                   lastpos_byte = pos_byte;
2417                 }
2418               else if (delbackslash)
2419                 {
2420                   middle = substring_both (newtext, lastpos,
2421                                            lastpos_byte,
2422                                            pos - 1, pos_byte - 1);
2423
2424                   accum = concat2 (accum, middle);
2425                   lastpos = pos;
2426                   lastpos_byte = pos_byte;
2427                 }
2428             }
2429
2430           if (pos != lastpos)
2431             middle = substring_both (newtext, lastpos,
2432                                      lastpos_byte,
2433                                      pos, pos_byte);
2434           else
2435             middle = Qnil;
2436
2437           newtext = concat2 (accum, middle);
2438         }
2439
2440       /* Do case substitution in NEWTEXT if desired.  */
2441       if (case_action == all_caps)
2442         newtext = Fupcase (newtext);
2443       else if (case_action == cap_initial)
2444         newtext = Fupcase_initials (newtext);
2445
2446       return concat3 (before, newtext, after);
2447     }
2448
2449   /* Record point, then move (quietly) to the start of the match.  */
2450   if (PT >= search_regs.end[sub])
2451     opoint = PT - ZV;
2452   else if (PT > search_regs.start[sub])
2453     opoint = search_regs.end[sub] - ZV;
2454   else
2455     opoint = PT;
2456
2457   /* If we want non-literal replacement,
2458      perform substitution on the replacement string.  */
2459   if (NILP (literal))
2460     {
2461       ptrdiff_t length = SBYTES (newtext);
2462       unsigned char *substed;
2463       ptrdiff_t substed_alloc_size, substed_len;
2464       bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2465       bool str_multibyte = STRING_MULTIBYTE (newtext);
2466       bool really_changed = 0;
2467
2468       substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2469                             ? length * 2 + 100
2470                             : STRING_BYTES_BOUND);
2471       substed = xmalloc (substed_alloc_size);
2472       substed_len = 0;
2473
2474       /* Go thru NEWTEXT, producing the actual text to insert in
2475          SUBSTED while adjusting multibyteness to that of the current
2476          buffer.  */
2477
2478       for (pos_byte = 0, pos = 0; pos_byte < length;)
2479         {
2480           unsigned char str[MAX_MULTIBYTE_LENGTH];
2481           const unsigned char *add_stuff = NULL;
2482           ptrdiff_t add_len = 0;
2483           ptrdiff_t idx = -1;
2484
2485           if (str_multibyte)
2486             {
2487               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2488               if (!buf_multibyte)
2489                 c = multibyte_char_to_unibyte (c);
2490             }
2491           else
2492             {
2493               /* Note that we don't have to increment POS.  */
2494               c = SREF (newtext, pos_byte++);
2495               if (buf_multibyte)
2496                 MAKE_CHAR_MULTIBYTE (c);
2497             }
2498
2499           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2500              or set IDX to a match index, which means put that part
2501              of the buffer text into SUBSTED.  */
2502
2503           if (c == '\\')
2504             {
2505               really_changed = 1;
2506
2507               if (str_multibyte)
2508                 {
2509                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2510                                                       pos, pos_byte);
2511                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2512                     c = multibyte_char_to_unibyte (c);
2513                 }
2514               else
2515                 {
2516                   c = SREF (newtext, pos_byte++);
2517                   if (buf_multibyte)
2518                     MAKE_CHAR_MULTIBYTE (c);
2519                 }
2520
2521               if (c == '&')
2522                 idx = sub;
2523               else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
2524                 {
2525                   if (search_regs.start[c - '0'] >= 1)
2526                     idx = c - '0';
2527                 }
2528               else if (c == '\\')
2529                 add_len = 1, add_stuff = (unsigned char *) "\\";
2530               else
2531                 {
2532                   xfree (substed);
2533                   error ("Invalid use of `\\' in replacement text");
2534                 }
2535             }
2536           else
2537             {
2538               add_len = CHAR_STRING (c, str);
2539               add_stuff = str;
2540             }
2541
2542           /* If we want to copy part of a previous match,
2543              set up ADD_STUFF and ADD_LEN to point to it.  */
2544           if (idx >= 0)
2545             {
2546               ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2547               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2548               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2549                 move_gap_both (search_regs.start[idx], begbyte);
2550               add_stuff = BYTE_POS_ADDR (begbyte);
2551             }
2552
2553           /* Now the stuff we want to add to SUBSTED
2554              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2555
2556           /* Make sure SUBSTED is big enough.  */
2557           if (substed_alloc_size - substed_len < add_len)
2558             substed =
2559               xpalloc (substed, &substed_alloc_size,
2560                        add_len - (substed_alloc_size - substed_len),
2561                        STRING_BYTES_BOUND, 1);
2562
2563           /* Now add to the end of SUBSTED.  */
2564           if (add_stuff)
2565             {
2566               memcpy (substed + substed_len, add_stuff, add_len);
2567               substed_len += add_len;
2568             }
2569         }
2570
2571       if (really_changed)
2572         {
2573           if (buf_multibyte)
2574             {
2575               ptrdiff_t nchars =
2576                 multibyte_chars_in_text (substed, substed_len);
2577
2578               newtext = make_multibyte_string ((char *) substed, nchars,
2579                                                substed_len);
2580             }
2581           else
2582             newtext = make_unibyte_string ((char *) substed, substed_len);
2583         }
2584       xfree (substed);
2585     }
2586
2587   /* Replace the old text with the new in the cleanest possible way.  */
2588   replace_range (search_regs.start[sub], search_regs.end[sub],
2589                  newtext, 1, 0, 1);
2590   newpoint = search_regs.start[sub] + SCHARS (newtext);
2591
2592   if (case_action == all_caps)
2593     Fupcase_region (make_number (search_regs.start[sub]),
2594                     make_number (newpoint));
2595   else if (case_action == cap_initial)
2596     Fupcase_initials_region (make_number (search_regs.start[sub]),
2597                              make_number (newpoint));
2598
2599   /* Adjust search data for this change.  */
2600   {
2601     ptrdiff_t oldend = search_regs.end[sub];
2602     ptrdiff_t oldstart = search_regs.start[sub];
2603     ptrdiff_t change = newpoint - search_regs.end[sub];
2604     ptrdiff_t i;
2605
2606     for (i = 0; i < search_regs.num_regs; i++)
2607       {
2608         if (search_regs.start[i] >= oldend)
2609           search_regs.start[i] += change;
2610         else if (search_regs.start[i] > oldstart)
2611           search_regs.start[i] = oldstart;
2612         if (search_regs.end[i] >= oldend)
2613           search_regs.end[i] += change;
2614         else if (search_regs.end[i] > oldstart)
2615           search_regs.end[i] = oldstart;
2616       }
2617   }
2618
2619   /* Put point back where it was in the text.  */
2620   if (opoint <= 0)
2621     TEMP_SET_PT (opoint + ZV);
2622   else
2623     TEMP_SET_PT (opoint);
2624
2625   /* Now move point "officially" to the start of the inserted replacement.  */
2626   move_if_not_intangible (newpoint);
2627
2628   return Qnil;
2629 }
2630 \f
2631 static Lisp_Object
2632 match_limit (Lisp_Object num, bool beginningp)
2633 {
2634   EMACS_INT n;
2635
2636   CHECK_NUMBER (num);
2637   n = XINT (num);
2638   if (n < 0)
2639     args_out_of_range (num, make_number (0));
2640   if (search_regs.num_regs <= 0)
2641     error ("No match data, because no search succeeded");
2642   if (n >= search_regs.num_regs
2643       || search_regs.start[n] < 0)
2644     return Qnil;
2645   return (make_number ((beginningp) ? search_regs.start[n]
2646                                     : search_regs.end[n]));
2647 }
2648
2649 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2650        doc: /* Return position of start of text matched by last search.
2651 SUBEXP, a number, specifies which parenthesized expression in the last
2652   regexp.
2653 Value is nil if SUBEXPth pair didn't match, or there were less than
2654   SUBEXP pairs.
2655 Zero means the entire text matched by the whole regexp or whole string.  */)
2656   (Lisp_Object subexp)
2657 {
2658   return match_limit (subexp, 1);
2659 }
2660
2661 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2662        doc: /* Return position of end of text matched by last search.
2663 SUBEXP, a number, specifies which parenthesized expression in the last
2664   regexp.
2665 Value is nil if SUBEXPth pair didn't match, or there were less than
2666   SUBEXP pairs.
2667 Zero means the entire text matched by the whole regexp or whole string.  */)
2668   (Lisp_Object subexp)
2669 {
2670   return match_limit (subexp, 0);
2671 }
2672
2673 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2674        doc: /* Return a list containing all info on what the last search matched.
2675 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2676 All the elements are markers or nil (nil if the Nth pair didn't match)
2677 if the last match was on a buffer; integers or nil if a string was matched.
2678 Use `set-match-data' to reinstate the data in this list.
2679
2680 If INTEGERS (the optional first argument) is non-nil, always use
2681 integers \(rather than markers) to represent buffer positions.  In
2682 this case, and if the last match was in a buffer, the buffer will get
2683 stored as one additional element at the end of the list.
2684
2685 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2686 enough to hold all the values, and if INTEGERS is non-nil, no consing
2687 is done.
2688
2689 If optional third arg RESEAT is non-nil, any previous markers on the
2690 REUSE list will be modified to point to nowhere.
2691
2692 Return value is undefined if the last search failed.  */)
2693   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2694 {
2695   Lisp_Object tail, prev;
2696   Lisp_Object *data;
2697   ptrdiff_t i, len;
2698
2699   if (!NILP (reseat))
2700     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2701       if (MARKERP (XCAR (tail)))
2702         {
2703           unchain_marker (XMARKER (XCAR (tail)));
2704           XSETCAR (tail, Qnil);
2705         }
2706
2707   if (NILP (last_thing_searched))
2708     return Qnil;
2709
2710   prev = Qnil;
2711
2712   data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
2713
2714   len = 0;
2715   for (i = 0; i < search_regs.num_regs; i++)
2716     {
2717       ptrdiff_t start = search_regs.start[i];
2718       if (start >= 0)
2719         {
2720           if (EQ (last_thing_searched, Qt)
2721               || ! NILP (integers))
2722             {
2723               XSETFASTINT (data[2 * i], start);
2724               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2725             }
2726           else if (BUFFERP (last_thing_searched))
2727             {
2728               data[2 * i] = Fmake_marker ();
2729               Fset_marker (data[2 * i],
2730                            make_number (start),
2731                            last_thing_searched);
2732               data[2 * i + 1] = Fmake_marker ();
2733               Fset_marker (data[2 * i + 1],
2734                            make_number (search_regs.end[i]),
2735                            last_thing_searched);
2736             }
2737           else
2738             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2739             emacs_abort ();
2740
2741           len = 2 * i + 2;
2742         }
2743       else
2744         data[2 * i] = data[2 * i + 1] = Qnil;
2745     }
2746
2747   if (BUFFERP (last_thing_searched) && !NILP (integers))
2748     {
2749       data[len] = last_thing_searched;
2750       len++;
2751     }
2752
2753   /* If REUSE is not usable, cons up the values and return them.  */
2754   if (! CONSP (reuse))
2755     return Flist (len, data);
2756
2757   /* If REUSE is a list, store as many value elements as will fit
2758      into the elements of REUSE.  */
2759   for (i = 0, tail = reuse; CONSP (tail);
2760        i++, tail = XCDR (tail))
2761     {
2762       if (i < len)
2763         XSETCAR (tail, data[i]);
2764       else
2765         XSETCAR (tail, Qnil);
2766       prev = tail;
2767     }
2768
2769   /* If we couldn't fit all value elements into REUSE,
2770      cons up the rest of them and add them to the end of REUSE.  */
2771   if (i < len)
2772     XSETCDR (prev, Flist (len - i, data + i));
2773
2774   return reuse;
2775 }
2776
2777 /* We used to have an internal use variant of `reseat' described as:
2778
2779       If RESEAT is `evaporate', put the markers back on the free list
2780       immediately.  No other references to the markers must exist in this
2781       case, so it is used only internally on the unwind stack and
2782       save-match-data from Lisp.
2783
2784    But it was ill-conceived: those supposedly-internal markers get exposed via
2785    the undo-list, so freeing them here is unsafe.  */
2786
2787 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2788        doc: /* Set internal data on last search match from elements of LIST.
2789 LIST should have been created by calling `match-data' previously.
2790
2791 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2792   (register Lisp_Object list, Lisp_Object reseat)
2793 {
2794   ptrdiff_t i;
2795   register Lisp_Object marker;
2796
2797   if (running_asynch_code)
2798     save_search_regs ();
2799
2800   CHECK_LIST (list);
2801
2802   /* Unless we find a marker with a buffer or an explicit buffer
2803      in LIST, assume that this match data came from a string.  */
2804   last_thing_searched = Qt;
2805
2806   /* Allocate registers if they don't already exist.  */
2807   {
2808     EMACS_INT length = XFASTINT (Flength (list)) / 2;
2809
2810     if (length > search_regs.num_regs)
2811       {
2812         ptrdiff_t num_regs = search_regs.num_regs;
2813         if (PTRDIFF_MAX < length)
2814           memory_full (SIZE_MAX);
2815         search_regs.start =
2816           xpalloc (search_regs.start, &num_regs, length - num_regs,
2817                    min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2818         search_regs.end =
2819           xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2820
2821         for (i = search_regs.num_regs; i < num_regs; i++)
2822           search_regs.start[i] = -1;
2823
2824         search_regs.num_regs = num_regs;
2825       }
2826
2827     for (i = 0; CONSP (list); i++)
2828       {
2829         marker = XCAR (list);
2830         if (BUFFERP (marker))
2831           {
2832             last_thing_searched = marker;
2833             break;
2834           }
2835         if (i >= length)
2836           break;
2837         if (NILP (marker))
2838           {
2839             search_regs.start[i] = -1;
2840             list = XCDR (list);
2841           }
2842         else
2843           {
2844             Lisp_Object from;
2845             Lisp_Object m;
2846
2847             m = marker;
2848             if (MARKERP (marker))
2849               {
2850                 if (XMARKER (marker)->buffer == 0)
2851                   XSETFASTINT (marker, 0);
2852                 else
2853                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2854               }
2855
2856             CHECK_NUMBER_COERCE_MARKER (marker);
2857             from = marker;
2858
2859             if (!NILP (reseat) && MARKERP (m))
2860               {
2861                 unchain_marker (XMARKER (m));
2862                 XSETCAR (list, Qnil);
2863               }
2864
2865             if ((list = XCDR (list), !CONSP (list)))
2866               break;
2867
2868             m = marker = XCAR (list);
2869
2870             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2871               XSETFASTINT (marker, 0);
2872
2873             CHECK_NUMBER_COERCE_MARKER (marker);
2874             if ((XINT (from) < 0
2875                  ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2876                  : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2877                 && (XINT (marker) < 0
2878                     ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2879                     : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
2880               {
2881                 search_regs.start[i] = XINT (from);
2882                 search_regs.end[i] = XINT (marker);
2883               }
2884             else
2885               {
2886                 search_regs.start[i] = -1;
2887               }
2888
2889             if (!NILP (reseat) && MARKERP (m))
2890               {
2891                 unchain_marker (XMARKER (m));
2892                 XSETCAR (list, Qnil);
2893               }
2894           }
2895         list = XCDR (list);
2896       }
2897
2898     for (; i < search_regs.num_regs; i++)
2899       search_regs.start[i] = -1;
2900   }
2901
2902   return Qnil;
2903 }
2904
2905 /* If true the match data have been saved in saved_search_regs
2906    during the execution of a sentinel or filter. */
2907 static bool search_regs_saved;
2908 static struct re_registers saved_search_regs;
2909 static Lisp_Object saved_last_thing_searched;
2910
2911 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2912    if asynchronous code (filter or sentinel) is running. */
2913 static void
2914 save_search_regs (void)
2915 {
2916   if (!search_regs_saved)
2917     {
2918       saved_search_regs.num_regs = search_regs.num_regs;
2919       saved_search_regs.start = search_regs.start;
2920       saved_search_regs.end = search_regs.end;
2921       saved_last_thing_searched = last_thing_searched;
2922       last_thing_searched = Qnil;
2923       search_regs.num_regs = 0;
2924       search_regs.start = 0;
2925       search_regs.end = 0;
2926
2927       search_regs_saved = 1;
2928     }
2929 }
2930
2931 /* Called upon exit from filters and sentinels. */
2932 void
2933 restore_search_regs (void)
2934 {
2935   if (search_regs_saved)
2936     {
2937       if (search_regs.num_regs > 0)
2938         {
2939           xfree (search_regs.start);
2940           xfree (search_regs.end);
2941         }
2942       search_regs.num_regs = saved_search_regs.num_regs;
2943       search_regs.start = saved_search_regs.start;
2944       search_regs.end = saved_search_regs.end;
2945       last_thing_searched = saved_last_thing_searched;
2946       saved_last_thing_searched = Qnil;
2947       search_regs_saved = 0;
2948     }
2949 }
2950
2951 static void
2952 unwind_set_match_data (Lisp_Object list)
2953 {
2954   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
2955   Fset_match_data (list, Qt);
2956 }
2957
2958 /* Called to unwind protect the match data.  */
2959 void
2960 record_unwind_save_match_data (void)
2961 {
2962   record_unwind_protect (unwind_set_match_data,
2963                          Fmatch_data (Qnil, Qnil, Qnil));
2964 }
2965
2966 /* Quote a string to deactivate reg-expr chars */
2967
2968 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2969        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
2970   (Lisp_Object string)
2971 {
2972   char *in, *out, *end;
2973   char *temp;
2974   ptrdiff_t backslashes_added = 0;
2975
2976   CHECK_STRING (string);
2977
2978   temp = alloca (SBYTES (string) * 2);
2979
2980   /* Now copy the data into the new string, inserting escapes. */
2981
2982   in = SSDATA (string);
2983   end = in + SBYTES (string);
2984   out = temp;
2985
2986   for (; in != end; in++)
2987     {
2988       if (*in == '['
2989           || *in == '*' || *in == '.' || *in == '\\'
2990           || *in == '?' || *in == '+'
2991           || *in == '^' || *in == '$')
2992         *out++ = '\\', backslashes_added++;
2993       *out++ = *in;
2994     }
2995
2996   return make_specified_string (temp,
2997                                 SCHARS (string) + backslashes_added,
2998                                 out - temp,
2999                                 STRING_MULTIBYTE (string));
3000 }
3001 \f
3002 void
3003 syms_of_search (void)
3004 {
3005   register int i;
3006
3007   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3008     {
3009       searchbufs[i].buf.allocated = 100;
3010       searchbufs[i].buf.buffer = xmalloc (100);
3011       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3012       searchbufs[i].regexp = Qnil;
3013       searchbufs[i].whitespace_regexp = Qnil;
3014       searchbufs[i].syntax_table = Qnil;
3015       staticpro (&searchbufs[i].regexp);
3016       staticpro (&searchbufs[i].whitespace_regexp);
3017       staticpro (&searchbufs[i].syntax_table);
3018       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3019     }
3020   searchbuf_head = &searchbufs[0];
3021
3022   DEFSYM (Qsearch_failed, "search-failed");
3023   DEFSYM (Qinvalid_regexp, "invalid-regexp");
3024
3025   Fput (Qsearch_failed, Qerror_conditions,
3026         listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
3027   Fput (Qsearch_failed, Qerror_message,
3028         build_pure_c_string ("Search failed"));
3029
3030   Fput (Qinvalid_regexp, Qerror_conditions,
3031         listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
3032   Fput (Qinvalid_regexp, Qerror_message,
3033         build_pure_c_string ("Invalid regexp"));
3034
3035   last_thing_searched = Qnil;
3036   staticpro (&last_thing_searched);
3037
3038   saved_last_thing_searched = Qnil;
3039   staticpro (&saved_last_thing_searched);
3040
3041   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3042       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3043 Some commands use this for user-specified regexps.
3044 Spaces that occur inside character classes or repetition operators
3045 or other such regexp constructs are not replaced with this.
3046 A value of nil (which is the normal value) means treat spaces literally.  */);
3047   Vsearch_spaces_regexp = Qnil;
3048
3049   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3050       doc: /* Internal use only.
3051 If non-nil, the primitive searching and matching functions
3052 such as `looking-at', `string-match', `re-search-forward', etc.,
3053 do not set the match data.  The proper way to use this variable
3054 is to bind it with `let' around a small expression.  */);
3055   Vinhibit_changing_match_data = Qnil;
3056
3057   defsubr (&Slooking_at);
3058   defsubr (&Sposix_looking_at);
3059   defsubr (&Sstring_match);
3060   defsubr (&Sposix_string_match);
3061   defsubr (&Ssearch_forward);
3062   defsubr (&Ssearch_backward);
3063   defsubr (&Sre_search_forward);
3064   defsubr (&Sre_search_backward);
3065   defsubr (&Sposix_search_forward);
3066   defsubr (&Sposix_search_backward);
3067   defsubr (&Sreplace_match);
3068   defsubr (&Smatch_beginning);
3069   defsubr (&Smatch_end);
3070   defsubr (&Smatch_data);
3071   defsubr (&Sset_match_data);
3072   defsubr (&Sregexp_quote);
3073 }