src/search.c

   1 /* String search routines for GNU Emacs.
   2
   3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2013 Free Software
   4 Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "character.h"
  28 #include "buffer.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 static struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* Error condition signaled when regexp compile_pattern fails.  */
  88 static Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches.  */
  91 static Lisp_Object Qsearch_failed;
  92
  93 static void set_search_regs (ptrdiff_t, ptrdiff_t);
  94 static void save_search_regs (void);
  95 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
  96                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
  97                                 ptrdiff_t, ptrdiff_t);
  98 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
  99                               Lisp_Object, Lisp_Object, ptrdiff_t,
 100                               ptrdiff_t, int);
 101 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
 102                                 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
 103                                 Lisp_Object, Lisp_Object, int);
 104
 105 static _Noreturn void
 106 matcher_overflow (void)
 107 {
 108   error ("Stack overflow in regexp matcher");
 109 }
 110
 111 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 112    PATTERN is the pattern to compile.
 113    CP is the place to put the result.
 114    TRANSLATE is a translation table for ignoring case, or nil for none.
 115    POSIX is nonzero if we want full backtracking (POSIX style)
 116    for this pattern.  0 means backtrack only enough to get a valid match.
 117
 118    The behavior also depends on Vsearch_spaces_regexp.  */
 119
 120 static void
 121 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, int posix)
 122 {
 123   char *val;
 124   reg_syntax_t old;
 125
 126   cp->regexp = Qnil;
 127   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 128   cp->posix = posix;
 129   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 130   cp->buf.charset_unibyte = charset_unibyte;
 131   if (STRINGP (Vsearch_spaces_regexp))
 132     cp->whitespace_regexp = Vsearch_spaces_regexp;
 133   else
 134     cp->whitespace_regexp = Qnil;
 135
 136   /* rms: I think BLOCK_INPUT is not needed here any more,
 137      because regex.c defines malloc to call xmalloc.
 138      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 139      So let's turn it off.  */
 140   /*  BLOCK_INPUT;  */
 141   old = re_set_syntax (RE_SYNTAX_EMACS
 142                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 143
 144   if (STRINGP (Vsearch_spaces_regexp))
 145     re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
 146   else
 147     re_set_whitespace_regexp (NULL);
 148
 149   val = (char *) re_compile_pattern (SSDATA (pattern),
 150                                      SBYTES (pattern), &cp->buf);
 151
 152   /* If the compiled pattern hard codes some of the contents of the
 153      syntax-table, it can only be reused with *this* syntax table.  */
 154   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
 155
 156   re_set_whitespace_regexp (NULL);
 157
 158   re_set_syntax (old);
 159   /* unblock_input ();  */
 160   if (val)
 161     xsignal1 (Qinvalid_regexp, build_string (val));
 162
 163   cp->regexp = Fcopy_sequence (pattern);
 164 }
 165
 166 /* Shrink each compiled regexp buffer in the cache
 167    to the size actually used right now.
 168    This is called from garbage collection.  */
 169
 170 void
 171 shrink_regexp_cache (void)
 172 {
 173   struct regexp_cache *cp;
 174
 175   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 176     {
 177       cp->buf.allocated = cp->buf.used;
 178       cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
 179     }
 180 }
 181
 182 /* Clear the regexp cache w.r.t. a particular syntax table,
 183    because it was changed.
 184    There is no danger of memory leak here because re_compile_pattern
 185    automagically manages the memory in each re_pattern_buffer struct,
 186    based on its `allocated' and `buffer' values.  */
 187 void
 188 clear_regexp_cache (void)
 189 {
 190   int i;
 191
 192   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 193     /* It's tempting to compare with the syntax-table we've actually changed,
 194        but it's not sufficient because char-table inheritance means that
 195        modifying one syntax-table can change others at the same time.  */
 196     if (!EQ (searchbufs[i].syntax_table, Qt))
 197       searchbufs[i].regexp = Qnil;
 198 }
 199
 200 /* Compile a regexp if necessary, but first check to see if there's one in
 201    the cache.
 202    PATTERN is the pattern to compile.
 203    TRANSLATE is a translation table for ignoring case, or nil for none.
 204    REGP is the structure that says where to store the "register"
 205    values that will result from matching this pattern.
 206    If it is 0, we should compile the pattern not to record any
 207    subexpression bounds.
 208    POSIX is nonzero if we want full backtracking (POSIX style)
 209    for this pattern.  0 means backtrack only enough to get a valid match.  */
 210
 211 struct re_pattern_buffer *
 212 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
 213                  Lisp_Object translate, int posix, bool multibyte)
 214 {
 215   struct regexp_cache *cp, **cpp;
 216
 217   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 218     {
 219       cp = *cpp;
 220       /* Entries are initialized to nil, and may be set to nil by
 221          compile_pattern_1 if the pattern isn't valid.  Don't apply
 222          string accessors in those cases.  However, compile_pattern_1
 223          is only applied to the cache entry we pick here to reuse.  So
 224          nil should never appear before a non-nil entry.  */
 225       if (NILP (cp->regexp))
 226         goto compile_it;
 227       if (SCHARS (cp->regexp) == SCHARS (pattern)
 228           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 229           && !NILP (Fstring_equal (cp->regexp, pattern))
 230           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 231           && cp->posix == posix
 232           && (EQ (cp->syntax_table, Qt)
 233               || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
 234           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 235           && cp->buf.charset_unibyte == charset_unibyte)
 236         break;
 237
 238       /* If we're at the end of the cache, compile into the nil cell
 239          we found, or the last (least recently used) cell with a
 240          string value.  */
 241       if (cp->next == 0)
 242         {
 243         compile_it:
 244           compile_pattern_1 (cp, pattern, translate, posix);
 245           break;
 246         }
 247     }
 248
 249   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 250      either because we found it in the cache or because we just compiled it.
 251      Move it to the front of the queue to mark it as most recently used.  */
 252   *cpp = cp->next;
 253   cp->next = searchbuf_head;
 254   searchbuf_head = cp;
 255
 256   /* Advise the searching functions about the space we have allocated
 257      for register data.  */
 258   if (regp)
 259     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 260
 261   /* The compiled pattern can be used both for multibyte and unibyte
 262      target.  But, we have to tell which the pattern is used for. */
 263   cp->buf.target_multibyte = multibyte;
 264
 265   return &cp->buf;
 266 }
 267
 268 \f
 269 static Lisp_Object
 270 looking_at_1 (Lisp_Object string, int posix)
 271 {
 272   Lisp_Object val;
 273   unsigned char *p1, *p2;
 274   ptrdiff_t s1, s2;
 275   register ptrdiff_t i;
 276   struct re_pattern_buffer *bufp;
 277
 278   if (running_asynch_code)
 279     save_search_regs ();
 280
 281   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 282   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 283                          BVAR (current_buffer, case_eqv_table));
 284
 285   CHECK_STRING (string);
 286   bufp = compile_pattern (string,
 287                           (NILP (Vinhibit_changing_match_data)
 288                            ? &search_regs : NULL),
 289                           (!NILP (BVAR (current_buffer, case_fold_search))
 290                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 291                           posix,
 292                           !NILP (BVAR (current_buffer, enable_multibyte_characters)));
 293
 294   immediate_quit = 1;
 295   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 296
 297   /* Get pointers and sizes of the two strings
 298      that make up the visible portion of the buffer. */
 299
 300   p1 = BEGV_ADDR;
 301   s1 = GPT_BYTE - BEGV_BYTE;
 302   p2 = GAP_END_ADDR;
 303   s2 = ZV_BYTE - GPT_BYTE;
 304   if (s1 < 0)
 305     {
 306       p2 = p1;
 307       s2 = ZV_BYTE - BEGV_BYTE;
 308       s1 = 0;
 309     }
 310   if (s2 < 0)
 311     {
 312       s1 = ZV_BYTE - BEGV_BYTE;
 313       s2 = 0;
 314     }
 315
 316   re_match_object = Qnil;
 317
 318   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 319                   PT_BYTE - BEGV_BYTE,
 320                   (NILP (Vinhibit_changing_match_data)
 321                    ? &search_regs : NULL),
 322                   ZV_BYTE - BEGV_BYTE);
 323   immediate_quit = 0;
 324
 325   if (i == -2)
 326     matcher_overflow ();
 327
 328   val = (0 <= i ? Qt : Qnil);
 329   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 330     for (i = 0; i < search_regs.num_regs; i++)
 331       if (search_regs.start[i] >= 0)
 332         {
 333           search_regs.start[i]
 334             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 335           search_regs.end[i]
 336             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 337         }
 338
 339   /* Set last_thing_searched only when match data is changed.  */
 340   if (NILP (Vinhibit_changing_match_data))
 341     XSETBUFFER (last_thing_searched, current_buffer);
 342
 343   return val;
 344 }
 345
 346 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 347        doc: /* Return t if text after point matches regular expression REGEXP.
 348 This function modifies the match data that `match-beginning',
 349 `match-end' and `match-data' access; save and restore the match
 350 data if you want to preserve them.  */)
 351   (Lisp_Object regexp)
 352 {
 353   return looking_at_1 (regexp, 0);
 354 }
 355
 356 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 357        doc: /* Return t if text after point matches regular expression REGEXP.
 358 Find the longest match, in accord with Posix regular expression rules.
 359 This function modifies the match data that `match-beginning',
 360 `match-end' and `match-data' access; save and restore the match
 361 data if you want to preserve them.  */)
 362   (Lisp_Object regexp)
 363 {
 364   return looking_at_1 (regexp, 1);
 365 }
 366 \f
 367 static Lisp_Object
 368 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
 369 {
 370   ptrdiff_t val;
 371   struct re_pattern_buffer *bufp;
 372   EMACS_INT pos;
 373   ptrdiff_t pos_byte, i;
 374
 375   if (running_asynch_code)
 376     save_search_regs ();
 377
 378   CHECK_STRING (regexp);
 379   CHECK_STRING (string);
 380
 381   if (NILP (start))
 382     pos = 0, pos_byte = 0;
 383   else
 384     {
 385       ptrdiff_t len = SCHARS (string);
 386
 387       CHECK_NUMBER (start);
 388       pos = XINT (start);
 389       if (pos < 0 && -pos <= len)
 390         pos = len + pos;
 391       else if (0 > pos || pos > len)
 392         args_out_of_range (string, start);
 393       pos_byte = string_char_to_byte (string, pos);
 394     }
 395
 396   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 397   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 398                          BVAR (current_buffer, case_eqv_table));
 399
 400   bufp = compile_pattern (regexp,
 401                           (NILP (Vinhibit_changing_match_data)
 402                            ? &search_regs : NULL),
 403                           (!NILP (BVAR (current_buffer, case_fold_search))
 404                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 405                           posix,
 406                           STRING_MULTIBYTE (string));
 407   immediate_quit = 1;
 408   re_match_object = string;
 409
 410   val = re_search (bufp, SSDATA (string),
 411                    SBYTES (string), pos_byte,
 412                    SBYTES (string) - pos_byte,
 413                    (NILP (Vinhibit_changing_match_data)
 414                     ? &search_regs : NULL));
 415   immediate_quit = 0;
 416
 417   /* Set last_thing_searched only when match data is changed.  */
 418   if (NILP (Vinhibit_changing_match_data))
 419     last_thing_searched = Qt;
 420
 421   if (val == -2)
 422     matcher_overflow ();
 423   if (val < 0) return Qnil;
 424
 425   if (NILP (Vinhibit_changing_match_data))
 426     for (i = 0; i < search_regs.num_regs; i++)
 427       if (search_regs.start[i] >= 0)
 428         {
 429           search_regs.start[i]
 430             = string_byte_to_char (string, search_regs.start[i]);
 431           search_regs.end[i]
 432             = string_byte_to_char (string, search_regs.end[i]);
 433         }
 434
 435   return make_number (string_byte_to_char (string, val));
 436 }
 437
 438 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 439        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 440 Matching ignores case if `case-fold-search' is non-nil.
 441 If third arg START is non-nil, start search at that index in STRING.
 442 For index of first char beyond the match, do (match-end 0).
 443 `match-end' and `match-beginning' also give indices of substrings
 444 matched by parenthesis constructs in the pattern.
 445
 446 You can use the function `match-string' to extract the substrings
 447 matched by the parenthesis constructions in REGEXP. */)
 448   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 449 {
 450   return string_match_1 (regexp, string, start, 0);
 451 }
 452
 453 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 454        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 455 Find the longest match, in accord with Posix regular expression rules.
 456 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 457 If third arg START is non-nil, start search at that index in STRING.
 458 For index of first char beyond the match, do (match-end 0).
 459 `match-end' and `match-beginning' also give indices of substrings
 460 matched by parenthesis constructs in the pattern.  */)
 461   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 462 {
 463   return string_match_1 (regexp, string, start, 1);
 464 }
 465
 466 /* Match REGEXP against STRING, searching all of STRING,
 467    and return the index of the match, or negative on failure.
 468    This does not clobber the match data.  */
 469
 470 ptrdiff_t
 471 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 472 {
 473   ptrdiff_t val;
 474   struct re_pattern_buffer *bufp;
 475
 476   bufp = compile_pattern (regexp, 0, Qnil,
 477                           0, STRING_MULTIBYTE (string));
 478   immediate_quit = 1;
 479   re_match_object = string;
 480
 481   val = re_search (bufp, SSDATA (string),
 482                    SBYTES (string), 0,
 483                    SBYTES (string), 0);
 484   immediate_quit = 0;
 485   return val;
 486 }
 487
 488 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 489    and return the index of the match, or negative on failure.
 490    This does not clobber the match data.
 491    We assume that STRING contains single-byte characters.  */
 492
 493 ptrdiff_t
 494 fast_c_string_match_ignore_case (Lisp_Object regexp,
 495                                  const char *string, ptrdiff_t len)
 496 {
 497   ptrdiff_t val;
 498   struct re_pattern_buffer *bufp;
 499
 500   regexp = string_make_unibyte (regexp);
 501   re_match_object = Qt;
 502   bufp = compile_pattern (regexp, 0,
 503                           Vascii_canon_table, 0,
 504                           0);
 505   immediate_quit = 1;
 506   val = re_search (bufp, string, len, 0, len, 0);
 507   immediate_quit = 0;
 508   return val;
 509 }
 510
 511 /* Like fast_string_match but ignore case.  */
 512
 513 ptrdiff_t
 514 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 515 {
 516   ptrdiff_t val;
 517   struct re_pattern_buffer *bufp;
 518
 519   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 520                           0, STRING_MULTIBYTE (string));
 521   immediate_quit = 1;
 522   re_match_object = string;
 523
 524   val = re_search (bufp, SSDATA (string),
 525                    SBYTES (string), 0,
 526                    SBYTES (string), 0);
 527   immediate_quit = 0;
 528   return val;
 529 }
 530 \f
 531 /* Match REGEXP against the characters after POS to LIMIT, and return
 532    the number of matched characters.  If STRING is non-nil, match
 533    against the characters in it.  In that case, POS and LIMIT are
 534    indices into the string.  This function doesn't modify the match
 535    data.  */
 536
 537 ptrdiff_t
 538 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
 539                  ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
 540 {
 541   bool multibyte;
 542   struct re_pattern_buffer *buf;
 543   unsigned char *p1, *p2;
 544   ptrdiff_t s1, s2;
 545   ptrdiff_t len;
 546
 547   if (STRINGP (string))
 548     {
 549       if (pos_byte < 0)
 550         pos_byte = string_char_to_byte (string, pos);
 551       if (limit_byte < 0)
 552         limit_byte = string_char_to_byte (string, limit);
 553       p1 = NULL;
 554       s1 = 0;
 555       p2 = SDATA (string);
 556       s2 = SBYTES (string);
 557       re_match_object = string;
 558       multibyte = STRING_MULTIBYTE (string);
 559     }
 560   else
 561     {
 562       if (pos_byte < 0)
 563         pos_byte = CHAR_TO_BYTE (pos);
 564       if (limit_byte < 0)
 565         limit_byte = CHAR_TO_BYTE (limit);
 566       pos_byte -= BEGV_BYTE;
 567       limit_byte -= BEGV_BYTE;
 568       p1 = BEGV_ADDR;
 569       s1 = GPT_BYTE - BEGV_BYTE;
 570       p2 = GAP_END_ADDR;
 571       s2 = ZV_BYTE - GPT_BYTE;
 572       if (s1 < 0)
 573         {
 574           p2 = p1;
 575           s2 = ZV_BYTE - BEGV_BYTE;
 576           s1 = 0;
 577         }
 578       if (s2 < 0)
 579         {
 580           s1 = ZV_BYTE - BEGV_BYTE;
 581           s2 = 0;
 582         }
 583       re_match_object = Qnil;
 584       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
 585     }
 586
 587   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 588   immediate_quit = 1;
 589   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 590                     pos_byte, NULL, limit_byte);
 591   immediate_quit = 0;
 592
 593   return len;
 594 }
 595
 596 \f
 597 /* The newline cache: remembering which sections of text have no newlines.  */
 598
 599 /* If the user has requested newline caching, make sure it's on.
 600    Otherwise, make sure it's off.
 601    This is our cheezy way of associating an action with the change of
 602    state of a buffer-local variable.  */
 603 static void
 604 newline_cache_on_off (struct buffer *buf)
 605 {
 606   if (NILP (BVAR (buf, cache_long_line_scans)))
 607     {
 608       /* It should be off.  */
 609       if (buf->newline_cache)
 610         {
 611           free_region_cache (buf->newline_cache);
 612           buf->newline_cache = 0;
 613         }
 614     }
 615   else
 616     {
 617       /* It should be on.  */
 618       if (buf->newline_cache == 0)
 619         buf->newline_cache = new_region_cache ();
 620     }
 621 }
 622
 623 \f
 624 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
 625
 626    If COUNT is positive, search forwards; END must be >= START.
 627    If COUNT is negative, search backwards for the -COUNTth instance;
 628       END must be <= START.
 629    If COUNT is zero, do anything you please; run rogue, for all I care.
 630
 631    If END is zero, use BEGV or ZV instead, as appropriate for the
 632    direction indicated by COUNT.
 633
 634    If we find COUNT instances, set *SHORTAGE to zero, and return the
 635    position past the COUNTth match.  Note that for reverse motion
 636    this is not the same as the usual convention for Emacs motion commands.
 637
 638    If we don't find COUNT instances before reaching END, set *SHORTAGE
 639    to the number of newlines left unfound, and return END.
 640
 641    If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
 642    to the returned character position.
 643
 644    If ALLOW_QUIT, set immediate_quit.  That's good to do
 645    except when inside redisplay.  */
 646
 647 ptrdiff_t
 648 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
 649               ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
 650               ptrdiff_t *bytepos, bool allow_quit)
 651 {
 652   struct region_cache *newline_cache;
 653   int direction;
 654
 655   if (count > 0)
 656     {
 657       direction = 1;
 658       if (!end)
 659         end = ZV, end_byte = ZV_BYTE;
 660     }
 661   else
 662     {
 663       direction = -1;
 664       if (!end)
 665         end = BEGV, end_byte = BEGV_BYTE;
 666     }
 667   if (end_byte == -1)
 668     end_byte = CHAR_TO_BYTE (end);
 669
 670   newline_cache_on_off (current_buffer);
 671   newline_cache = current_buffer->newline_cache;
 672
 673   if (shortage != 0)
 674     *shortage = 0;
 675
 676   immediate_quit = allow_quit;
 677
 678   if (count > 0)
 679     while (start != end)
 680       {
 681         /* Our innermost scanning loop is very simple; it doesn't know
 682            about gaps, buffer ends, or the newline cache.  ceiling is
 683            the position of the last character before the next such
 684            obstacle --- the last character the dumb search loop should
 685            examine.  */
 686         ptrdiff_t tem, ceiling_byte = end_byte - 1;
 687
 688         /* If we're looking for a newline, consult the newline cache
 689            to see where we can avoid some scanning.  */
 690         if (newline_cache)
 691           {
 692             ptrdiff_t next_change;
 693             immediate_quit = 0;
 694             while (region_cache_forward
 695                    (current_buffer, newline_cache, start, &next_change))
 696               start = next_change;
 697             immediate_quit = allow_quit;
 698
 699             start_byte = CHAR_TO_BYTE (start);
 700
 701             /* START should never be after END.  */
 702             if (start_byte > ceiling_byte)
 703               start_byte = ceiling_byte;
 704
 705             /* Now the text after start is an unknown region, and
 706                next_change is the position of the next known region. */
 707             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
 708           }
 709         else if (start_byte == -1)
 710           start_byte = CHAR_TO_BYTE (start);
 711
 712         /* The dumb loop can only scan text stored in contiguous
 713            bytes. BUFFER_CEILING_OF returns the last character
 714            position that is contiguous, so the ceiling is the
 715            position after that.  */
 716         tem = BUFFER_CEILING_OF (start_byte);
 717         ceiling_byte = min (tem, ceiling_byte);
 718
 719         {
 720           /* The termination address of the dumb loop.  */
 721           register unsigned char *ceiling_addr
 722             = BYTE_POS_ADDR (ceiling_byte) + 1;
 723           register unsigned char *cursor
 724             = BYTE_POS_ADDR (start_byte);
 725           unsigned char *base = cursor;
 726
 727           while (cursor < ceiling_addr)
 728             {
 729               /* The dumb loop.  */
 730               unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
 731
 732               /* If we're looking for newlines, cache the fact that
 733                  the region from start to cursor is free of them. */
 734               if (newline_cache)
 735                 {
 736                   unsigned char *low = cursor;
 737                   unsigned char *lim = nl ? nl : ceiling_addr;
 738                   know_region_cache (current_buffer, newline_cache,
 739                                      BYTE_TO_CHAR (low - base + start_byte),
 740                                      BYTE_TO_CHAR (lim - base + start_byte));
 741                 }
 742
 743               if (! nl)
 744                 break;
 745
 746               if (--count == 0)
 747                 {
 748                   immediate_quit = 0;
 749                   if (bytepos)
 750                     *bytepos = nl + 1 - base + start_byte;
 751                   return BYTE_TO_CHAR (nl + 1 - base + start_byte);
 752                 }
 753               cursor = nl + 1;
 754             }
 755
 756           start_byte += ceiling_addr - base;
 757           start = BYTE_TO_CHAR (start_byte);
 758         }
 759       }
 760   else
 761     while (start > end)
 762       {
 763         /* The last character to check before the next obstacle.  */
 764         ptrdiff_t tem, ceiling_byte = end_byte;
 765
 766         /* Consult the newline cache, if appropriate.  */
 767         if (newline_cache)
 768           {
 769             ptrdiff_t next_change;
 770             immediate_quit = 0;
 771             while (region_cache_backward
 772                    (current_buffer, newline_cache, start, &next_change))
 773               start = next_change;
 774             immediate_quit = allow_quit;
 775
 776             start_byte = CHAR_TO_BYTE (start);
 777
 778             /* Start should never be at or before end.  */
 779             if (start_byte <= ceiling_byte)
 780               start_byte = ceiling_byte + 1;
 781
 782             /* Now the text before start is an unknown region, and
 783                next_change is the position of the next known region. */
 784             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
 785           }
 786         else if (start_byte == -1)
 787           start_byte = CHAR_TO_BYTE (start);
 788
 789         /* Stop scanning before the gap.  */
 790         tem = BUFFER_FLOOR_OF (start_byte - 1);
 791         ceiling_byte = max (tem, ceiling_byte);
 792
 793         {
 794           /* The termination address of the dumb loop.  */
 795           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 796           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 797           unsigned char *base = cursor;
 798
 799           while (cursor >= ceiling_addr)
 800             {
 801               unsigned char *nl = memrchr (ceiling_addr, '\n',
 802                                            cursor + 1 - ceiling_addr);
 803
 804               /* If we're looking for newlines, cache the fact that
 805                  the region from after the cursor to start is free of them.  */
 806               if (newline_cache)
 807                 {
 808                   unsigned char *low = nl ? nl : ceiling_addr - 1;
 809                   unsigned char *lim = cursor;
 810                   know_region_cache (current_buffer, newline_cache,
 811                                      BYTE_TO_CHAR (low - base + start_byte),
 812                                      BYTE_TO_CHAR (lim - base + start_byte));
 813                 }
 814
 815               if (! nl)
 816                 break;
 817
 818               if (++count >= 0)
 819                 {
 820                   immediate_quit = 0;
 821                   if (bytepos)
 822                     *bytepos = nl - base + start_byte;
 823                   return BYTE_TO_CHAR (nl - base + start_byte);
 824                 }
 825               cursor = nl - 1;
 826             }
 827
 828           start_byte += ceiling_addr - 1 - base;
 829           start = BYTE_TO_CHAR (start_byte);
 830         }
 831       }
 832
 833   immediate_quit = 0;
 834   if (shortage)
 835     *shortage = count * direction;
 836   if (bytepos)
 837     {
 838       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
 839       eassert (*bytepos == CHAR_TO_BYTE (start));
 840     }
 841   return start;
 842 }
 843 \f
 844 /* Search for COUNT instances of a line boundary.
 845    Start at START.  If COUNT is negative, search backwards.
 846
 847    We report the resulting position by calling TEMP_SET_PT_BOTH.
 848
 849    If we find COUNT instances. we position after (always after,
 850    even if scanning backwards) the COUNTth match, and return 0.
 851
 852    If we don't find COUNT instances before reaching the end of the
 853    buffer (or the beginning, if scanning backwards), we return
 854    the number of line boundaries left unfound, and position at
 855    the limit we bumped up against.
 856
 857    If ALLOW_QUIT, set immediate_quit.  That's good to do
 858    except in special cases.  */
 859
 860 EMACS_INT
 861 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
 862               ptrdiff_t limit, ptrdiff_t limit_byte,
 863               EMACS_INT count, bool allow_quit)
 864 {
 865   int direction = ((count > 0) ? 1 : -1);
 866
 867   unsigned char *cursor;
 868   unsigned char *base;
 869
 870   ptrdiff_t ceiling;
 871   unsigned char *ceiling_addr;
 872
 873   bool old_immediate_quit = immediate_quit;
 874
 875   if (allow_quit)
 876     immediate_quit++;
 877
 878   if (count > 0)
 879     {
 880       while (start_byte < limit_byte)
 881         {
 882           ceiling =  BUFFER_CEILING_OF (start_byte);
 883           ceiling = min (limit_byte - 1, ceiling);
 884           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 885           base = (cursor = BYTE_POS_ADDR (start_byte));
 886
 887           do
 888             {
 889               unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
 890               if (! nl)
 891                 break;
 892               if (--count == 0)
 893                 {
 894                   immediate_quit = old_immediate_quit;
 895                   start_byte += nl - base + 1;
 896                   start = BYTE_TO_CHAR (start_byte);
 897                   TEMP_SET_PT_BOTH (start, start_byte);
 898                   return 0;
 899                 }
 900               cursor = nl + 1;
 901             }
 902           while (cursor < ceiling_addr);
 903
 904           start_byte += ceiling_addr - base;
 905         }
 906     }
 907   else
 908     {
 909       while (start_byte > limit_byte)
 910         {
 911           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 912           ceiling = max (limit_byte, ceiling);
 913           ceiling_addr = BYTE_POS_ADDR (ceiling);
 914           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 915           while (1)
 916             {
 917               unsigned char *nl = memrchr (ceiling_addr, '\n',
 918                                            cursor - ceiling_addr);
 919               if (! nl)
 920                 break;
 921
 922               if (++count == 0)
 923                 {
 924                   immediate_quit = old_immediate_quit;
 925                   /* Return the position AFTER the match we found.  */
 926                   start_byte += nl - base + 1;
 927                   start = BYTE_TO_CHAR (start_byte);
 928                   TEMP_SET_PT_BOTH (start, start_byte);
 929                   return 0;
 930                 }
 931
 932               cursor = nl;
 933             }
 934           start_byte += ceiling_addr - base;
 935         }
 936     }
 937
 938   TEMP_SET_PT_BOTH (limit, limit_byte);
 939   immediate_quit = old_immediate_quit;
 940
 941   return count * direction;
 942 }
 943
 944 /* Like find_newline, but doesn't allow QUITting and doesn't return
 945    SHORTAGE.  */
 946 ptrdiff_t
 947 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
 948                       ptrdiff_t cnt, ptrdiff_t *bytepos)
 949 {
 950   return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
 951 }
 952
 953 /* Like find_newline, but returns position before the newline, not
 954    after, and only search up to TO.
 955    This isn't just find_newline_no_quit (...)-1, because you might hit TO.  */
 956
 957 ptrdiff_t
 958 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
 959                           ptrdiff_t cnt, ptrdiff_t *bytepos)
 960 {
 961   ptrdiff_t shortage;
 962   ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1);
 963
 964   if (shortage == 0)
 965     {
 966       if (bytepos)
 967         DEC_BOTH (pos, *bytepos);
 968       else
 969         pos--;
 970     }
 971   return pos;
 972 }
 973 \f
 974 /* Subroutines of Lisp buffer search functions. */
 975
 976 static Lisp_Object
 977 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
 978                 Lisp_Object count, int direction, int RE, int posix)
 979 {
 980   register EMACS_INT np;
 981   EMACS_INT lim;
 982   ptrdiff_t lim_byte;
 983   EMACS_INT n = direction;
 984
 985   if (!NILP (count))
 986     {
 987       CHECK_NUMBER (count);
 988       n *= XINT (count);
 989     }
 990
 991   CHECK_STRING (string);
 992   if (NILP (bound))
 993     {
 994       if (n > 0)
 995         lim = ZV, lim_byte = ZV_BYTE;
 996       else
 997         lim = BEGV, lim_byte = BEGV_BYTE;
 998     }
 999   else
1000     {
1001       CHECK_NUMBER_COERCE_MARKER (bound);
1002       lim = XINT (bound);
1003       if (n > 0 ? lim < PT : lim > PT)
1004         error ("Invalid search bound (wrong side of point)");
1005       if (lim > ZV)
1006         lim = ZV, lim_byte = ZV_BYTE;
1007       else if (lim < BEGV)
1008         lim = BEGV, lim_byte = BEGV_BYTE;
1009       else
1010         lim_byte = CHAR_TO_BYTE (lim);
1011     }
1012
1013   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1014   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
1015                          BVAR (current_buffer, case_eqv_table));
1016
1017   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1018                       (!NILP (BVAR (current_buffer, case_fold_search))
1019                        ? BVAR (current_buffer, case_canon_table)
1020                        : Qnil),
1021                       (!NILP (BVAR (current_buffer, case_fold_search))
1022                        ? BVAR (current_buffer, case_eqv_table)
1023                        : Qnil),
1024                       posix);
1025   if (np <= 0)
1026     {
1027       if (NILP (noerror))
1028         xsignal1 (Qsearch_failed, string);
1029
1030       if (!EQ (noerror, Qt))
1031         {
1032           eassert (BEGV <= lim && lim <= ZV);
1033           SET_PT_BOTH (lim, lim_byte);
1034           return Qnil;
1035 #if 0 /* This would be clean, but maybe programs depend on
1036          a value of nil here.  */
1037           np = lim;
1038 #endif
1039         }
1040       else
1041         return Qnil;
1042     }
1043
1044   eassert (BEGV <= np && np <= ZV);
1045   SET_PT (np);
1046
1047   return make_number (np);
1048 }
1049 \f
1050 /* Return 1 if REGEXP it matches just one constant string.  */
1051
1052 static int
1053 trivial_regexp_p (Lisp_Object regexp)
1054 {
1055   ptrdiff_t len = SBYTES (regexp);
1056   unsigned char *s = SDATA (regexp);
1057   while (--len >= 0)
1058     {
1059       switch (*s++)
1060         {
1061         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1062           return 0;
1063         case '\\':
1064           if (--len < 0)
1065             return 0;
1066           switch (*s++)
1067             {
1068             case '|': case '(': case ')': case '`': case '\'': case 'b':
1069             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1070             case 'S': case '=': case '{': case '}': case '_':
1071             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1072             case '1': case '2': case '3': case '4': case '5':
1073             case '6': case '7': case '8': case '9':
1074               return 0;
1075             }
1076         }
1077     }
1078   return 1;
1079 }
1080
1081 /* Search for the n'th occurrence of STRING in the current buffer,
1082    starting at position POS and stopping at position LIM,
1083    treating STRING as a literal string if RE is false or as
1084    a regular expression if RE is true.
1085
1086    If N is positive, searching is forward and LIM must be greater than POS.
1087    If N is negative, searching is backward and LIM must be less than POS.
1088
1089    Returns -x if x occurrences remain to be found (x > 0),
1090    or else the position at the beginning of the Nth occurrence
1091    (if searching backward) or the end (if searching forward).
1092
1093    POSIX is nonzero if we want full backtracking (POSIX style)
1094    for this pattern.  0 means backtrack only enough to get a valid match.  */
1095
1096 #define TRANSLATE(out, trt, d)                  \
1097 do                                              \
1098   {                                             \
1099     if (! NILP (trt))                           \
1100       {                                         \
1101         Lisp_Object temp;                       \
1102         temp = Faref (trt, make_number (d));    \
1103         if (INTEGERP (temp))                    \
1104           out = XINT (temp);                    \
1105         else                                    \
1106           out = d;                              \
1107       }                                         \
1108     else                                        \
1109       out = d;                                  \
1110   }                                             \
1111 while (0)
1112
1113 /* Only used in search_buffer, to record the end position of the match
1114    when searching regexps and SEARCH_REGS should not be changed
1115    (i.e. Vinhibit_changing_match_data is non-nil).  */
1116 static struct re_registers search_regs_1;
1117
1118 static EMACS_INT
1119 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1120                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1121                int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
1122 {
1123   ptrdiff_t len = SCHARS (string);
1124   ptrdiff_t len_byte = SBYTES (string);
1125   register ptrdiff_t i;
1126
1127   if (running_asynch_code)
1128     save_search_regs ();
1129
1130   /* Searching 0 times means don't move.  */
1131   /* Null string is found at starting position.  */
1132   if (len == 0 || n == 0)
1133     {
1134       set_search_regs (pos_byte, 0);
1135       return pos;
1136     }
1137
1138   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1139     {
1140       unsigned char *p1, *p2;
1141       ptrdiff_t s1, s2;
1142       struct re_pattern_buffer *bufp;
1143
1144       bufp = compile_pattern (string,
1145                               (NILP (Vinhibit_changing_match_data)
1146                                ? &search_regs : &search_regs_1),
1147                               trt, posix,
1148                               !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1149
1150       immediate_quit = 1;       /* Quit immediately if user types ^G,
1151                                    because letting this function finish
1152                                    can take too long. */
1153       QUIT;                     /* Do a pending quit right away,
1154                                    to avoid paradoxical behavior */
1155       /* Get pointers and sizes of the two strings
1156          that make up the visible portion of the buffer. */
1157
1158       p1 = BEGV_ADDR;
1159       s1 = GPT_BYTE - BEGV_BYTE;
1160       p2 = GAP_END_ADDR;
1161       s2 = ZV_BYTE - GPT_BYTE;
1162       if (s1 < 0)
1163         {
1164           p2 = p1;
1165           s2 = ZV_BYTE - BEGV_BYTE;
1166           s1 = 0;
1167         }
1168       if (s2 < 0)
1169         {
1170           s1 = ZV_BYTE - BEGV_BYTE;
1171           s2 = 0;
1172         }
1173       re_match_object = Qnil;
1174
1175       while (n < 0)
1176         {
1177           ptrdiff_t val;
1178
1179           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1180                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1181                              (NILP (Vinhibit_changing_match_data)
1182                               ? &search_regs : &search_regs_1),
1183                              /* Don't allow match past current point */
1184                              pos_byte - BEGV_BYTE);
1185           if (val == -2)
1186             {
1187               matcher_overflow ();
1188             }
1189           if (val >= 0)
1190             {
1191               if (NILP (Vinhibit_changing_match_data))
1192                 {
1193                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1194                   for (i = 0; i < search_regs.num_regs; i++)
1195                     if (search_regs.start[i] >= 0)
1196                       {
1197                         search_regs.start[i]
1198                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1199                         search_regs.end[i]
1200                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1201                       }
1202                   XSETBUFFER (last_thing_searched, current_buffer);
1203                   /* Set pos to the new position. */
1204                   pos = search_regs.start[0];
1205                 }
1206               else
1207                 {
1208                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1209                   /* Set pos to the new position.  */
1210                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1211                 }
1212             }
1213           else
1214             {
1215               immediate_quit = 0;
1216               return (n);
1217             }
1218           n++;
1219         }
1220       while (n > 0)
1221         {
1222           ptrdiff_t val;
1223
1224           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1225                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1226                              (NILP (Vinhibit_changing_match_data)
1227                               ? &search_regs : &search_regs_1),
1228                              lim_byte - BEGV_BYTE);
1229           if (val == -2)
1230             {
1231               matcher_overflow ();
1232             }
1233           if (val >= 0)
1234             {
1235               if (NILP (Vinhibit_changing_match_data))
1236                 {
1237                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1238                   for (i = 0; i < search_regs.num_regs; i++)
1239                     if (search_regs.start[i] >= 0)
1240                       {
1241                         search_regs.start[i]
1242                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1243                         search_regs.end[i]
1244                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1245                       }
1246                   XSETBUFFER (last_thing_searched, current_buffer);
1247                   pos = search_regs.end[0];
1248                 }
1249               else
1250                 {
1251                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1252                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1253                 }
1254             }
1255           else
1256             {
1257               immediate_quit = 0;
1258               return (0 - n);
1259             }
1260           n--;
1261         }
1262       immediate_quit = 0;
1263       return (pos);
1264     }
1265   else                          /* non-RE case */
1266     {
1267       unsigned char *raw_pattern, *pat;
1268       ptrdiff_t raw_pattern_size;
1269       ptrdiff_t raw_pattern_size_byte;
1270       unsigned char *patbuf;
1271       bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1272       unsigned char *base_pat;
1273       /* Set to positive if we find a non-ASCII char that need
1274          translation.  Otherwise set to zero later.  */
1275       int char_base = -1;
1276       int boyer_moore_ok = 1;
1277
1278       /* MULTIBYTE says whether the text to be searched is multibyte.
1279          We must convert PATTERN to match that, or we will not really
1280          find things right.  */
1281
1282       if (multibyte == STRING_MULTIBYTE (string))
1283         {
1284           raw_pattern = SDATA (string);
1285           raw_pattern_size = SCHARS (string);
1286           raw_pattern_size_byte = SBYTES (string);
1287         }
1288       else if (multibyte)
1289         {
1290           raw_pattern_size = SCHARS (string);
1291           raw_pattern_size_byte
1292             = count_size_as_multibyte (SDATA (string),
1293                                        raw_pattern_size);
1294           raw_pattern = alloca (raw_pattern_size_byte + 1);
1295           copy_text (SDATA (string), raw_pattern,
1296                      SCHARS (string), 0, 1);
1297         }
1298       else
1299         {
1300           /* Converting multibyte to single-byte.
1301
1302              ??? Perhaps this conversion should be done in a special way
1303              by subtracting nonascii-insert-offset from each non-ASCII char,
1304              so that only the multibyte chars which really correspond to
1305              the chosen single-byte character set can possibly match.  */
1306           raw_pattern_size = SCHARS (string);
1307           raw_pattern_size_byte = SCHARS (string);
1308           raw_pattern = alloca (raw_pattern_size + 1);
1309           copy_text (SDATA (string), raw_pattern,
1310                      SBYTES (string), 1, 0);
1311         }
1312
1313       /* Copy and optionally translate the pattern.  */
1314       len = raw_pattern_size;
1315       len_byte = raw_pattern_size_byte;
1316       patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
1317       pat = patbuf;
1318       base_pat = raw_pattern;
1319       if (multibyte)
1320         {
1321           /* Fill patbuf by translated characters in STRING while
1322              checking if we can use boyer-moore search.  If TRT is
1323              non-nil, we can use boyer-moore search only if TRT can be
1324              represented by the byte array of 256 elements.  For that,
1325              all non-ASCII case-equivalents of all case-sensitive
1326              characters in STRING must belong to the same character
1327              group (two characters belong to the same group iff their
1328              multibyte forms are the same except for the last byte;
1329              i.e. every 64 characters form a group; U+0000..U+003F,
1330              U+0040..U+007F, U+0080..U+00BF, ...).  */
1331
1332           while (--len >= 0)
1333             {
1334               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1335               int c, translated, inverse;
1336               int in_charlen, charlen;
1337
1338               /* If we got here and the RE flag is set, it's because we're
1339                  dealing with a regexp known to be trivial, so the backslash
1340                  just quotes the next character.  */
1341               if (RE && *base_pat == '\\')
1342                 {
1343                   len--;
1344                   raw_pattern_size--;
1345                   len_byte--;
1346                   base_pat++;
1347                 }
1348
1349               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1350
1351               if (NILP (trt))
1352                 {
1353                   str = base_pat;
1354                   charlen = in_charlen;
1355                 }
1356               else
1357                 {
1358                   /* Translate the character.  */
1359                   TRANSLATE (translated, trt, c);
1360                   charlen = CHAR_STRING (translated, str_base);
1361                   str = str_base;
1362
1363                   /* Check if C has any other case-equivalents.  */
1364                   TRANSLATE (inverse, inverse_trt, c);
1365                   /* If so, check if we can use boyer-moore.  */
1366                   if (c != inverse && boyer_moore_ok)
1367                     {
1368                       /* Check if all equivalents belong to the same
1369                          group of characters.  Note that the check of C
1370                          itself is done by the last iteration.  */
1371                       int this_char_base = -1;
1372
1373                       while (boyer_moore_ok)
1374                         {
1375                           if (ASCII_BYTE_P (inverse))
1376                             {
1377                               if (this_char_base > 0)
1378                                 boyer_moore_ok = 0;
1379                               else
1380                                 this_char_base = 0;
1381                             }
1382                           else if (CHAR_BYTE8_P (inverse))
1383                             /* Boyer-moore search can't handle a
1384                                translation of an eight-bit
1385                                character.  */
1386                             boyer_moore_ok = 0;
1387                           else if (this_char_base < 0)
1388                             {
1389                               this_char_base = inverse & ~0x3F;
1390                               if (char_base < 0)
1391                                 char_base = this_char_base;
1392                               else if (this_char_base != char_base)
1393                                 boyer_moore_ok = 0;
1394                             }
1395                           else if ((inverse & ~0x3F) != this_char_base)
1396                             boyer_moore_ok = 0;
1397                           if (c == inverse)
1398                             break;
1399                           TRANSLATE (inverse, inverse_trt, inverse);
1400                         }
1401                     }
1402                 }
1403
1404               /* Store this character into the translated pattern.  */
1405               memcpy (pat, str, charlen);
1406               pat += charlen;
1407               base_pat += in_charlen;
1408               len_byte -= in_charlen;
1409             }
1410
1411           /* If char_base is still negative we didn't find any translated
1412              non-ASCII characters.  */
1413           if (char_base < 0)
1414             char_base = 0;
1415         }
1416       else
1417         {
1418           /* Unibyte buffer.  */
1419           char_base = 0;
1420           while (--len >= 0)
1421             {
1422               int c, translated, inverse;
1423
1424               /* If we got here and the RE flag is set, it's because we're
1425                  dealing with a regexp known to be trivial, so the backslash
1426                  just quotes the next character.  */
1427               if (RE && *base_pat == '\\')
1428                 {
1429                   len--;
1430                   raw_pattern_size--;
1431                   base_pat++;
1432                 }
1433               c = *base_pat++;
1434               TRANSLATE (translated, trt, c);
1435               *pat++ = translated;
1436               /* Check that none of C's equivalents violates the
1437                  assumptions of boyer_moore.  */
1438               TRANSLATE (inverse, inverse_trt, c);
1439               while (1)
1440                 {
1441                   if (inverse >= 0200)
1442                     {
1443                       boyer_moore_ok = 0;
1444                       break;
1445                     }
1446                   if (c == inverse)
1447                     break;
1448                   TRANSLATE (inverse, inverse_trt, inverse);
1449                 }
1450             }
1451         }
1452
1453       len_byte = pat - patbuf;
1454       pat = base_pat = patbuf;
1455
1456       if (boyer_moore_ok)
1457         return boyer_moore (n, pat, len_byte, trt, inverse_trt,
1458                             pos_byte, lim_byte,
1459                             char_base);
1460       else
1461         return simple_search (n, pat, raw_pattern_size, len_byte, trt,
1462                               pos, pos_byte, lim, lim_byte);
1463     }
1464 }
1465 \f
1466 /* Do a simple string search N times for the string PAT,
1467    whose length is LEN/LEN_BYTE,
1468    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1469    TRT is the translation table.
1470
1471    Return the character position where the match is found.
1472    Otherwise, if M matches remained to be found, return -M.
1473
1474    This kind of search works regardless of what is in PAT and
1475    regardless of what is in TRT.  It is used in cases where
1476    boyer_moore cannot work.  */
1477
1478 static EMACS_INT
1479 simple_search (EMACS_INT n, unsigned char *pat,
1480                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1481                ptrdiff_t pos, ptrdiff_t pos_byte,
1482                ptrdiff_t lim, ptrdiff_t lim_byte)
1483 {
1484   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1485   bool forward = n > 0;
1486   /* Number of buffer bytes matched.  Note that this may be different
1487      from len_byte in a multibyte buffer.  */
1488   ptrdiff_t match_byte = PTRDIFF_MIN;
1489
1490   if (lim > pos && multibyte)
1491     while (n > 0)
1492       {
1493         while (1)
1494           {
1495             /* Try matching at position POS.  */
1496             ptrdiff_t this_pos = pos;
1497             ptrdiff_t this_pos_byte = pos_byte;
1498             ptrdiff_t this_len = len;
1499             unsigned char *p = pat;
1500             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1501               goto stop;
1502
1503             while (this_len > 0)
1504               {
1505                 int charlen, buf_charlen;
1506                 int pat_ch, buf_ch;
1507
1508                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1509                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1510                                                  buf_charlen);
1511                 TRANSLATE (buf_ch, trt, buf_ch);
1512
1513                 if (buf_ch != pat_ch)
1514                   break;
1515
1516                 this_len--;
1517                 p += charlen;
1518
1519                 this_pos_byte += buf_charlen;
1520                 this_pos++;
1521               }
1522
1523             if (this_len == 0)
1524               {
1525                 match_byte = this_pos_byte - pos_byte;
1526                 pos += len;
1527                 pos_byte += match_byte;
1528                 break;
1529               }
1530
1531             INC_BOTH (pos, pos_byte);
1532           }
1533
1534         n--;
1535       }
1536   else if (lim > pos)
1537     while (n > 0)
1538       {
1539         while (1)
1540           {
1541             /* Try matching at position POS.  */
1542             ptrdiff_t this_pos = pos;
1543             ptrdiff_t this_len = len;
1544             unsigned char *p = pat;
1545
1546             if (pos + len > lim)
1547               goto stop;
1548
1549             while (this_len > 0)
1550               {
1551                 int pat_ch = *p++;
1552                 int buf_ch = FETCH_BYTE (this_pos);
1553                 TRANSLATE (buf_ch, trt, buf_ch);
1554
1555                 if (buf_ch != pat_ch)
1556                   break;
1557
1558                 this_len--;
1559                 this_pos++;
1560               }
1561
1562             if (this_len == 0)
1563               {
1564                 match_byte = len;
1565                 pos += len;
1566                 break;
1567               }
1568
1569             pos++;
1570           }
1571
1572         n--;
1573       }
1574   /* Backwards search.  */
1575   else if (lim < pos && multibyte)
1576     while (n < 0)
1577       {
1578         while (1)
1579           {
1580             /* Try matching at position POS.  */
1581             ptrdiff_t this_pos = pos;
1582             ptrdiff_t this_pos_byte = pos_byte;
1583             ptrdiff_t this_len = len;
1584             const unsigned char *p = pat + len_byte;
1585
1586             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1587               goto stop;
1588
1589             while (this_len > 0)
1590               {
1591                 int pat_ch, buf_ch;
1592
1593                 DEC_BOTH (this_pos, this_pos_byte);
1594                 PREV_CHAR_BOUNDARY (p, pat);
1595                 pat_ch = STRING_CHAR (p);
1596                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1597                 TRANSLATE (buf_ch, trt, buf_ch);
1598
1599                 if (buf_ch != pat_ch)
1600                   break;
1601
1602                 this_len--;
1603               }
1604
1605             if (this_len == 0)
1606               {
1607                 match_byte = pos_byte - this_pos_byte;
1608                 pos = this_pos;
1609                 pos_byte = this_pos_byte;
1610                 break;
1611               }
1612
1613             DEC_BOTH (pos, pos_byte);
1614           }
1615
1616         n++;
1617       }
1618   else if (lim < pos)
1619     while (n < 0)
1620       {
1621         while (1)
1622           {
1623             /* Try matching at position POS.  */
1624             ptrdiff_t this_pos = pos - len;
1625             ptrdiff_t this_len = len;
1626             unsigned char *p = pat;
1627
1628             if (this_pos < lim)
1629               goto stop;
1630
1631             while (this_len > 0)
1632               {
1633                 int pat_ch = *p++;
1634                 int buf_ch = FETCH_BYTE (this_pos);
1635                 TRANSLATE (buf_ch, trt, buf_ch);
1636
1637                 if (buf_ch != pat_ch)
1638                   break;
1639                 this_len--;
1640                 this_pos++;
1641               }
1642
1643             if (this_len == 0)
1644               {
1645                 match_byte = len;
1646                 pos -= len;
1647                 break;
1648               }
1649
1650             pos--;
1651           }
1652
1653         n++;
1654       }
1655
1656  stop:
1657   if (n == 0)
1658     {
1659       eassert (match_byte != PTRDIFF_MIN);
1660       if (forward)
1661         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1662       else
1663         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1664
1665       return pos;
1666     }
1667   else if (n > 0)
1668     return -n;
1669   else
1670     return n;
1671 }
1672 \f
1673 /* Do Boyer-Moore search N times for the string BASE_PAT,
1674    whose length is LEN_BYTE,
1675    from buffer position POS_BYTE until LIM_BYTE.
1676    DIRECTION says which direction we search in.
1677    TRT and INVERSE_TRT are translation tables.
1678    Characters in PAT are already translated by TRT.
1679
1680    This kind of search works if all the characters in BASE_PAT that
1681    have nontrivial translation are the same aside from the last byte.
1682    This makes it possible to translate just the last byte of a
1683    character, and do so after just a simple test of the context.
1684    CHAR_BASE is nonzero if there is such a non-ASCII character.
1685
1686    If that criterion is not satisfied, do not call this function.  */
1687
1688 static EMACS_INT
1689 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1690              ptrdiff_t len_byte,
1691              Lisp_Object trt, Lisp_Object inverse_trt,
1692              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1693              int char_base)
1694 {
1695   int direction = ((n > 0) ? 1 : -1);
1696   register ptrdiff_t dirlen;
1697   ptrdiff_t limit;
1698   int stride_for_teases = 0;
1699   int BM_tab[0400];
1700   register unsigned char *cursor, *p_limit;
1701   register ptrdiff_t i;
1702   register int j;
1703   unsigned char *pat, *pat_end;
1704   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1705
1706   unsigned char simple_translate[0400];
1707   /* These are set to the preceding bytes of a byte to be translated
1708      if char_base is nonzero.  As the maximum byte length of a
1709      multibyte character is 5, we have to check at most four previous
1710      bytes.  */
1711   int translate_prev_byte1 = 0;
1712   int translate_prev_byte2 = 0;
1713   int translate_prev_byte3 = 0;
1714
1715   /* The general approach is that we are going to maintain that we know
1716      the first (closest to the present position, in whatever direction
1717      we're searching) character that could possibly be the last
1718      (furthest from present position) character of a valid match.  We
1719      advance the state of our knowledge by looking at that character
1720      and seeing whether it indeed matches the last character of the
1721      pattern.  If it does, we take a closer look.  If it does not, we
1722      move our pointer (to putative last characters) as far as is
1723      logically possible.  This amount of movement, which I call a
1724      stride, will be the length of the pattern if the actual character
1725      appears nowhere in the pattern, otherwise it will be the distance
1726      from the last occurrence of that character to the end of the
1727      pattern.  If the amount is zero we have a possible match.  */
1728
1729   /* Here we make a "mickey mouse" BM table.  The stride of the search
1730      is determined only by the last character of the putative match.
1731      If that character does not match, we will stride the proper
1732      distance to propose a match that superimposes it on the last
1733      instance of a character that matches it (per trt), or misses
1734      it entirely if there is none. */
1735
1736   dirlen = len_byte * direction;
1737
1738   /* Record position after the end of the pattern.  */
1739   pat_end = base_pat + len_byte;
1740   /* BASE_PAT points to a character that we start scanning from.
1741      It is the first character in a forward search,
1742      the last character in a backward search.  */
1743   if (direction < 0)
1744     base_pat = pat_end - 1;
1745
1746   /* A character that does not appear in the pattern induces a
1747      stride equal to the pattern length.  */
1748   for (i = 0; i < 0400; i++)
1749     BM_tab[i] = dirlen;
1750
1751   /* We use this for translation, instead of TRT itself.
1752      We fill this in to handle the characters that actually
1753      occur in the pattern.  Others don't matter anyway!  */
1754   for (i = 0; i < 0400; i++)
1755     simple_translate[i] = i;
1756
1757   if (char_base)
1758     {
1759       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1760          byte following them are the target of translation.  */
1761       unsigned char str[MAX_MULTIBYTE_LENGTH];
1762       int cblen = CHAR_STRING (char_base, str);
1763
1764       translate_prev_byte1 = str[cblen - 2];
1765       if (cblen > 2)
1766         {
1767           translate_prev_byte2 = str[cblen - 3];
1768           if (cblen > 3)
1769             translate_prev_byte3 = str[cblen - 4];
1770         }
1771     }
1772
1773   i = 0;
1774   while (i != dirlen)
1775     {
1776       unsigned char *ptr = base_pat + i;
1777       i += direction;
1778       if (! NILP (trt))
1779         {
1780           /* If the byte currently looking at is the last of a
1781              character to check case-equivalents, set CH to that
1782              character.  An ASCII character and a non-ASCII character
1783              matching with CHAR_BASE are to be checked.  */
1784           int ch = -1;
1785
1786           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1787             ch = *ptr;
1788           else if (char_base
1789                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1790             {
1791               unsigned char *charstart = ptr - 1;
1792
1793               while (! (CHAR_HEAD_P (*charstart)))
1794                 charstart--;
1795               ch = STRING_CHAR (charstart);
1796               if (char_base != (ch & ~0x3F))
1797                 ch = -1;
1798             }
1799
1800           if (ch >= 0200 && multibyte)
1801             j = (ch & 0x3F) | 0200;
1802           else
1803             j = *ptr;
1804
1805           if (i == dirlen)
1806             stride_for_teases = BM_tab[j];
1807
1808           BM_tab[j] = dirlen - i;
1809           /* A translation table is accompanied by its inverse -- see
1810              comment following downcase_table for details.  */
1811           if (ch >= 0)
1812             {
1813               int starting_ch = ch;
1814               int starting_j = j;
1815
1816               while (1)
1817                 {
1818                   TRANSLATE (ch, inverse_trt, ch);
1819                   if (ch >= 0200 && multibyte)
1820                     j = (ch & 0x3F) | 0200;
1821                   else
1822                     j = ch;
1823
1824                   /* For all the characters that map into CH,
1825                      set up simple_translate to map the last byte
1826                      into STARTING_J.  */
1827                   simple_translate[j] = starting_j;
1828                   if (ch == starting_ch)
1829                     break;
1830                   BM_tab[j] = dirlen - i;
1831                 }
1832             }
1833         }
1834       else
1835         {
1836           j = *ptr;
1837
1838           if (i == dirlen)
1839             stride_for_teases = BM_tab[j];
1840           BM_tab[j] = dirlen - i;
1841         }
1842       /* stride_for_teases tells how much to stride if we get a
1843          match on the far character but are subsequently
1844          disappointed, by recording what the stride would have been
1845          for that character if the last character had been
1846          different.  */
1847     }
1848   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1849   /* loop invariant - POS_BYTE points at where last char (first
1850      char if reverse) of pattern would align in a possible match.  */
1851   while (n != 0)
1852     {
1853       ptrdiff_t tail_end;
1854       unsigned char *tail_end_ptr;
1855
1856       /* It's been reported that some (broken) compiler thinks that
1857          Boolean expressions in an arithmetic context are unsigned.
1858          Using an explicit ?1:0 prevents this.  */
1859       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1860           < 0)
1861         return (n * (0 - direction));
1862       /* First we do the part we can by pointers (maybe nothing) */
1863       QUIT;
1864       pat = base_pat;
1865       limit = pos_byte - dirlen + direction;
1866       if (direction > 0)
1867         {
1868           limit = BUFFER_CEILING_OF (limit);
1869           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1870              can take on without hitting edge of buffer or the gap.  */
1871           limit = min (limit, pos_byte + 20000);
1872           limit = min (limit, lim_byte - 1);
1873         }
1874       else
1875         {
1876           limit = BUFFER_FLOOR_OF (limit);
1877           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1878              can take on without hitting edge of buffer or the gap.  */
1879           limit = max (limit, pos_byte - 20000);
1880           limit = max (limit, lim_byte);
1881         }
1882       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1883       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1884
1885       if ((limit - pos_byte) * direction > 20)
1886         {
1887           unsigned char *p2;
1888
1889           p_limit = BYTE_POS_ADDR (limit);
1890           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1891           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1892           while (1)             /* use one cursor setting as long as i can */
1893             {
1894               if (direction > 0) /* worth duplicating */
1895                 {
1896                   while (cursor <= p_limit)
1897                     {
1898                       if (BM_tab[*cursor] == 0)
1899                         goto hit;
1900                       cursor += BM_tab[*cursor];
1901                     }
1902                 }
1903               else
1904                 {
1905                   while (cursor >= p_limit)
1906                     {
1907                       if (BM_tab[*cursor] == 0)
1908                         goto hit;
1909                       cursor += BM_tab[*cursor];
1910                     }
1911                 }
1912               /* If you are here, cursor is beyond the end of the
1913                  searched region.  You fail to match within the
1914                  permitted region and would otherwise try a character
1915                  beyond that region.  */
1916               break;
1917
1918             hit:
1919               i = dirlen - direction;
1920               if (! NILP (trt))
1921                 {
1922                   while ((i -= direction) + direction != 0)
1923                     {
1924                       int ch;
1925                       cursor -= direction;
1926                       /* Translate only the last byte of a character.  */
1927                       if (! multibyte
1928                           || ((cursor == tail_end_ptr
1929                                || CHAR_HEAD_P (cursor[1]))
1930                               && (CHAR_HEAD_P (cursor[0])
1931                                   /* Check if this is the last byte of
1932                                      a translatable character.  */
1933                                   || (translate_prev_byte1 == cursor[-1]
1934                                       && (CHAR_HEAD_P (translate_prev_byte1)
1935                                           || (translate_prev_byte2 == cursor[-2]
1936                                               && (CHAR_HEAD_P (translate_prev_byte2)
1937                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1938                         ch = simple_translate[*cursor];
1939                       else
1940                         ch = *cursor;
1941                       if (pat[i] != ch)
1942                         break;
1943                     }
1944                 }
1945               else
1946                 {
1947                   while ((i -= direction) + direction != 0)
1948                     {
1949                       cursor -= direction;
1950                       if (pat[i] != *cursor)
1951                         break;
1952                     }
1953                 }
1954               cursor += dirlen - i - direction; /* fix cursor */
1955               if (i + direction == 0)
1956                 {
1957                   ptrdiff_t position, start, end;
1958
1959                   cursor -= direction;
1960
1961                   position = pos_byte + cursor - p2 + ((direction > 0)
1962                                                        ? 1 - len_byte : 0);
1963                   set_search_regs (position, len_byte);
1964
1965                   if (NILP (Vinhibit_changing_match_data))
1966                     {
1967                       start = search_regs.start[0];
1968                       end = search_regs.end[0];
1969                     }
1970                   else
1971                     /* If Vinhibit_changing_match_data is non-nil,
1972                        search_regs will not be changed.  So let's
1973                        compute start and end here.  */
1974                     {
1975                       start = BYTE_TO_CHAR (position);
1976                       end = BYTE_TO_CHAR (position + len_byte);
1977                     }
1978
1979                   if ((n -= direction) != 0)
1980                     cursor += dirlen; /* to resume search */
1981                   else
1982                     return direction > 0 ? end : start;
1983                 }
1984               else
1985                 cursor += stride_for_teases; /* <sigh> we lose -  */
1986             }
1987           pos_byte += cursor - p2;
1988         }
1989       else
1990         /* Now we'll pick up a clump that has to be done the hard
1991            way because it covers a discontinuity.  */
1992         {
1993           limit = ((direction > 0)
1994                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1995                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1996           limit = ((direction > 0)
1997                    ? min (limit + len_byte, lim_byte - 1)
1998                    : max (limit - len_byte, lim_byte));
1999           /* LIMIT is now the last value POS_BYTE can have
2000              and still be valid for a possible match.  */
2001           while (1)
2002             {
2003               /* This loop can be coded for space rather than
2004                  speed because it will usually run only once.
2005                  (the reach is at most len + 21, and typically
2006                  does not exceed len).  */
2007               while ((limit - pos_byte) * direction >= 0)
2008                 {
2009                   int ch = FETCH_BYTE (pos_byte);
2010                   if (BM_tab[ch] == 0)
2011                     goto hit2;
2012                   pos_byte += BM_tab[ch];
2013                 }
2014               break;    /* ran off the end */
2015
2016             hit2:
2017               /* Found what might be a match.  */
2018               i = dirlen - direction;
2019               while ((i -= direction) + direction != 0)
2020                 {
2021                   int ch;
2022                   unsigned char *ptr;
2023                   pos_byte -= direction;
2024                   ptr = BYTE_POS_ADDR (pos_byte);
2025                   /* Translate only the last byte of a character.  */
2026                   if (! multibyte
2027                       || ((ptr == tail_end_ptr
2028                            || CHAR_HEAD_P (ptr[1]))
2029                           && (CHAR_HEAD_P (ptr[0])
2030                               /* Check if this is the last byte of a
2031                                  translatable character.  */
2032                               || (translate_prev_byte1 == ptr[-1]
2033                                   && (CHAR_HEAD_P (translate_prev_byte1)
2034                                       || (translate_prev_byte2 == ptr[-2]
2035                                           && (CHAR_HEAD_P (translate_prev_byte2)
2036                                               || translate_prev_byte3 == ptr[-3])))))))
2037                     ch = simple_translate[*ptr];
2038                   else
2039                     ch = *ptr;
2040                   if (pat[i] != ch)
2041                     break;
2042                 }
2043               /* Above loop has moved POS_BYTE part or all the way
2044                  back to the first pos (last pos if reverse).
2045                  Set it once again at the last (first if reverse) char.  */
2046               pos_byte += dirlen - i - direction;
2047               if (i + direction == 0)
2048                 {
2049                   ptrdiff_t position, start, end;
2050                   pos_byte -= direction;
2051
2052                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2053                   set_search_regs (position, len_byte);
2054
2055                   if (NILP (Vinhibit_changing_match_data))
2056                     {
2057                       start = search_regs.start[0];
2058                       end = search_regs.end[0];
2059                     }
2060                   else
2061                     /* If Vinhibit_changing_match_data is non-nil,
2062                        search_regs will not be changed.  So let's
2063                        compute start and end here.  */
2064                     {
2065                       start = BYTE_TO_CHAR (position);
2066                       end = BYTE_TO_CHAR (position + len_byte);
2067                     }
2068
2069                   if ((n -= direction) != 0)
2070                     pos_byte += dirlen; /* to resume search */
2071                   else
2072                     return direction > 0 ? end : start;
2073                 }
2074               else
2075                 pos_byte += stride_for_teases;
2076             }
2077           }
2078       /* We have done one clump.  Can we continue? */
2079       if ((lim_byte - pos_byte) * direction < 0)
2080         return ((0 - n) * direction);
2081     }
2082   return BYTE_TO_CHAR (pos_byte);
2083 }
2084
2085 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2086    for the overall match just found in the current buffer.
2087    Also clear out the match data for registers 1 and up.  */
2088
2089 static void
2090 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2091 {
2092   ptrdiff_t i;
2093
2094   if (!NILP (Vinhibit_changing_match_data))
2095     return;
2096
2097   /* Make sure we have registers in which to store
2098      the match position.  */
2099   if (search_regs.num_regs == 0)
2100     {
2101       search_regs.start = xmalloc (2 * sizeof (regoff_t));
2102       search_regs.end = xmalloc (2 * sizeof (regoff_t));
2103       search_regs.num_regs = 2;
2104     }
2105
2106   /* Clear out the other registers.  */
2107   for (i = 1; i < search_regs.num_regs; i++)
2108     {
2109       search_regs.start[i] = -1;
2110       search_regs.end[i] = -1;
2111     }
2112
2113   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2114   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2115   XSETBUFFER (last_thing_searched, current_buffer);
2116 }
2117 \f
2118 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2119        "MSearch backward: ",
2120        doc: /* Search backward from point for STRING.
2121 Set point to the beginning of the occurrence found, and return point.
2122 An optional second argument bounds the search; it is a buffer position.
2123 The match found must not extend before that position.
2124 Optional third argument, if t, means if fail just return nil (no error).
2125  If not nil and not t, position at limit of search and return nil.
2126 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2127  successive occurrences.  If COUNT is negative, search forward,
2128  instead of backward, for -COUNT occurrences.
2129
2130 Search case-sensitivity is determined by the value of the variable
2131 `case-fold-search', which see.
2132
2133 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2134   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2135 {
2136   return search_command (string, bound, noerror, count, -1, 0, 0);
2137 }
2138
2139 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2140        doc: /* Search forward from point for STRING.
2141 Set point to the end of the occurrence found, and return point.
2142 An optional second argument bounds the search; it is a buffer position.
2143 The match found must not extend after that position.  A value of nil is
2144   equivalent to (point-max).
2145 Optional third argument, if t, means if fail just return nil (no error).
2146   If not nil and not t, move to limit of search and return nil.
2147 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2148  successive occurrences.  If COUNT is negative, search backward,
2149  instead of forward, for -COUNT occurrences.
2150
2151 Search case-sensitivity is determined by the value of the variable
2152 `case-fold-search', which see.
2153
2154 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2155   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2156 {
2157   return search_command (string, bound, noerror, count, 1, 0, 0);
2158 }
2159
2160 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2161        "sRE search backward: ",
2162        doc: /* Search backward from point for match for regular expression REGEXP.
2163 Set point to the beginning of the match, and return point.
2164 The match found is the one starting last in the buffer
2165 and yet ending before the origin of the search.
2166 An optional second argument bounds the search; it is a buffer position.
2167 The match found must start at or after that position.
2168 Optional third argument, if t, means if fail just return nil (no error).
2169   If not nil and not t, move to limit of search and return nil.
2170 Optional fourth argument is repeat count--search for successive occurrences.
2171
2172 Search case-sensitivity is determined by the value of the variable
2173 `case-fold-search', which see.
2174
2175 See also the functions `match-beginning', `match-end', `match-string',
2176 and `replace-match'.  */)
2177   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2178 {
2179   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2180 }
2181
2182 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2183        "sRE search: ",
2184        doc: /* Search forward from point for regular expression REGEXP.
2185 Set point to the end of the occurrence found, and return point.
2186 An optional second argument bounds the search; it is a buffer position.
2187 The match found must not extend after that position.
2188 Optional third argument, if t, means if fail just return nil (no error).
2189   If not nil and not t, move to limit of search and return nil.
2190 Optional fourth argument is repeat count--search for successive occurrences.
2191
2192 Search case-sensitivity is determined by the value of the variable
2193 `case-fold-search', which see.
2194
2195 See also the functions `match-beginning', `match-end', `match-string',
2196 and `replace-match'.  */)
2197   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2198 {
2199   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2200 }
2201
2202 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2203        "sPosix search backward: ",
2204        doc: /* Search backward from point for match for regular expression REGEXP.
2205 Find the longest match in accord with Posix regular expression rules.
2206 Set point to the beginning of the match, and return point.
2207 The match found is the one starting last in the buffer
2208 and yet ending before the origin of the search.
2209 An optional second argument bounds the search; it is a buffer position.
2210 The match found must start at or after that position.
2211 Optional third argument, if t, means if fail just return nil (no error).
2212   If not nil and not t, move to limit of search and return nil.
2213 Optional fourth argument is repeat count--search for successive occurrences.
2214
2215 Search case-sensitivity is determined by the value of the variable
2216 `case-fold-search', which see.
2217
2218 See also the functions `match-beginning', `match-end', `match-string',
2219 and `replace-match'.  */)
2220   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2221 {
2222   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2223 }
2224
2225 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2226        "sPosix search: ",
2227        doc: /* Search forward from point for regular expression REGEXP.
2228 Find the longest match in accord with Posix regular expression rules.
2229 Set point to the end of the occurrence found, and return point.
2230 An optional second argument bounds the search; it is a buffer position.
2231 The match found must not extend after that position.
2232 Optional third argument, if t, means if fail just return nil (no error).
2233   If not nil and not t, move to limit of search and return nil.
2234 Optional fourth argument is repeat count--search for successive occurrences.
2235
2236 Search case-sensitivity is determined by the value of the variable
2237 `case-fold-search', which see.
2238
2239 See also the functions `match-beginning', `match-end', `match-string',
2240 and `replace-match'.  */)
2241   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2242 {
2243   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2244 }
2245 \f
2246 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2247        doc: /* Replace text matched by last search with NEWTEXT.
2248 Leave point at the end of the replacement text.
2249
2250 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2251 the replacement text.  Otherwise, maybe capitalize the whole text, or
2252 maybe just word initials, based on the replaced text.  If the replaced
2253 text has only capital letters and has at least one multiletter word,
2254 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
2255 in the replaced text, capitalize each word in NEWTEXT.
2256
2257 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2258 Otherwise treat `\\' as special:
2259   `\\&' in NEWTEXT means substitute original matched text.
2260   `\\N' means substitute what matched the Nth `\\(...\\)'.
2261        If Nth parens didn't match, substitute nothing.
2262   `\\\\' means insert one `\\'.
2263   `\\?' is treated literally
2264        (for compatibility with `query-replace-regexp').
2265   Any other character following `\\' signals an error.
2266 Case conversion does not apply to these substitutions.
2267
2268 If optional fourth argument STRING is non-nil, it should be a string
2269 to act on; this should be the string on which the previous match was
2270 done via `string-match'.  In this case, `replace-match' creates and
2271 returns a new string, made by copying STRING and replacing the part of
2272 STRING that was matched (the original STRING itself is not altered).
2273
2274 The optional fifth argument SUBEXP specifies a subexpression;
2275 it says to replace just that subexpression with NEWTEXT,
2276 rather than replacing the entire matched text.
2277 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2278 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2279 NEWTEXT in place of subexp N.
2280 This is useful only after a regular expression search or match,
2281 since only regular expressions have distinguished subexpressions.  */)
2282   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2283 {
2284   enum { nochange, all_caps, cap_initial } case_action;
2285   register ptrdiff_t pos, pos_byte;
2286   int some_multiletter_word;
2287   int some_lowercase;
2288   int some_uppercase;
2289   int some_nonuppercase_initial;
2290   register int c, prevc;
2291   ptrdiff_t sub;
2292   ptrdiff_t opoint, newpoint;
2293
2294   CHECK_STRING (newtext);
2295
2296   if (! NILP (string))
2297     CHECK_STRING (string);
2298
2299   case_action = nochange;       /* We tried an initialization */
2300                                 /* but some C compilers blew it */
2301
2302   if (search_regs.num_regs <= 0)
2303     error ("`replace-match' called before any match found");
2304
2305   if (NILP (subexp))
2306     sub = 0;
2307   else
2308     {
2309       CHECK_NUMBER (subexp);
2310       if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
2311         args_out_of_range (subexp, make_number (search_regs.num_regs));
2312       sub = XINT (subexp);
2313     }
2314
2315   if (NILP (string))
2316     {
2317       if (search_regs.start[sub] < BEGV
2318           || search_regs.start[sub] > search_regs.end[sub]
2319           || search_regs.end[sub] > ZV)
2320         args_out_of_range (make_number (search_regs.start[sub]),
2321                            make_number (search_regs.end[sub]));
2322     }
2323   else
2324     {
2325       if (search_regs.start[sub] < 0
2326           || search_regs.start[sub] > search_regs.end[sub]
2327           || search_regs.end[sub] > SCHARS (string))
2328         args_out_of_range (make_number (search_regs.start[sub]),
2329                            make_number (search_regs.end[sub]));
2330     }
2331
2332   if (NILP (fixedcase))
2333     {
2334       /* Decide how to casify by examining the matched text. */
2335       ptrdiff_t last;
2336
2337       pos = search_regs.start[sub];
2338       last = search_regs.end[sub];
2339
2340       if (NILP (string))
2341         pos_byte = CHAR_TO_BYTE (pos);
2342       else
2343         pos_byte = string_char_to_byte (string, pos);
2344
2345       prevc = '\n';
2346       case_action = all_caps;
2347
2348       /* some_multiletter_word is set nonzero if any original word
2349          is more than one letter long. */
2350       some_multiletter_word = 0;
2351       some_lowercase = 0;
2352       some_nonuppercase_initial = 0;
2353       some_uppercase = 0;
2354
2355       while (pos < last)
2356         {
2357           if (NILP (string))
2358             {
2359               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2360               INC_BOTH (pos, pos_byte);
2361             }
2362           else
2363             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2364
2365           if (lowercasep (c))
2366             {
2367               /* Cannot be all caps if any original char is lower case */
2368
2369               some_lowercase = 1;
2370               if (SYNTAX (prevc) != Sword)
2371                 some_nonuppercase_initial = 1;
2372               else
2373                 some_multiletter_word = 1;
2374             }
2375           else if (uppercasep (c))
2376             {
2377               some_uppercase = 1;
2378               if (SYNTAX (prevc) != Sword)
2379                 ;
2380               else
2381                 some_multiletter_word = 1;
2382             }
2383           else
2384             {
2385               /* If the initial is a caseless word constituent,
2386                  treat that like a lowercase initial.  */
2387               if (SYNTAX (prevc) != Sword)
2388                 some_nonuppercase_initial = 1;
2389             }
2390
2391           prevc = c;
2392         }
2393
2394       /* Convert to all caps if the old text is all caps
2395          and has at least one multiletter word.  */
2396       if (! some_lowercase && some_multiletter_word)
2397         case_action = all_caps;
2398       /* Capitalize each word, if the old text has all capitalized words.  */
2399       else if (!some_nonuppercase_initial && some_multiletter_word)
2400         case_action = cap_initial;
2401       else if (!some_nonuppercase_initial && some_uppercase)
2402         /* Should x -> yz, operating on X, give Yz or YZ?
2403            We'll assume the latter.  */
2404         case_action = all_caps;
2405       else
2406         case_action = nochange;
2407     }
2408
2409   /* Do replacement in a string.  */
2410   if (!NILP (string))
2411     {
2412       Lisp_Object before, after;
2413
2414       before = Fsubstring (string, make_number (0),
2415                            make_number (search_regs.start[sub]));
2416       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2417
2418       /* Substitute parts of the match into NEWTEXT
2419          if desired.  */
2420       if (NILP (literal))
2421         {
2422           ptrdiff_t lastpos = 0;
2423           ptrdiff_t lastpos_byte = 0;
2424           /* We build up the substituted string in ACCUM.  */
2425           Lisp_Object accum;
2426           Lisp_Object middle;
2427           ptrdiff_t length = SBYTES (newtext);
2428
2429           accum = Qnil;
2430
2431           for (pos_byte = 0, pos = 0; pos_byte < length;)
2432             {
2433               ptrdiff_t substart = -1;
2434               ptrdiff_t subend = 0;
2435               int delbackslash = 0;
2436
2437               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2438
2439               if (c == '\\')
2440                 {
2441                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2442
2443                   if (c == '&')
2444                     {
2445                       substart = search_regs.start[sub];
2446                       subend = search_regs.end[sub];
2447                     }
2448                   else if (c >= '1' && c <= '9')
2449                     {
2450                       if (c - '0' < search_regs.num_regs
2451                           && 0 <= search_regs.start[c - '0'])
2452                         {
2453                           substart = search_regs.start[c - '0'];
2454                           subend = search_regs.end[c - '0'];
2455                         }
2456                       else
2457                         {
2458                           /* If that subexp did not match,
2459                              replace \\N with nothing.  */
2460                           substart = 0;
2461                           subend = 0;
2462                         }
2463                     }
2464                   else if (c == '\\')
2465                     delbackslash = 1;
2466                   else if (c != '?')
2467                     error ("Invalid use of `\\' in replacement text");
2468                 }
2469               if (substart >= 0)
2470                 {
2471                   if (pos - 2 != lastpos)
2472                     middle = substring_both (newtext, lastpos,
2473                                              lastpos_byte,
2474                                              pos - 2, pos_byte - 2);
2475                   else
2476                     middle = Qnil;
2477                   accum = concat3 (accum, middle,
2478                                    Fsubstring (string,
2479                                                make_number (substart),
2480                                                make_number (subend)));
2481                   lastpos = pos;
2482                   lastpos_byte = pos_byte;
2483                 }
2484               else if (delbackslash)
2485                 {
2486                   middle = substring_both (newtext, lastpos,
2487                                            lastpos_byte,
2488                                            pos - 1, pos_byte - 1);
2489
2490                   accum = concat2 (accum, middle);
2491                   lastpos = pos;
2492                   lastpos_byte = pos_byte;
2493                 }
2494             }
2495
2496           if (pos != lastpos)
2497             middle = substring_both (newtext, lastpos,
2498                                      lastpos_byte,
2499                                      pos, pos_byte);
2500           else
2501             middle = Qnil;
2502
2503           newtext = concat2 (accum, middle);
2504         }
2505
2506       /* Do case substitution in NEWTEXT if desired.  */
2507       if (case_action == all_caps)
2508         newtext = Fupcase (newtext);
2509       else if (case_action == cap_initial)
2510         newtext = Fupcase_initials (newtext);
2511
2512       return concat3 (before, newtext, after);
2513     }
2514
2515   /* Record point, then move (quietly) to the start of the match.  */
2516   if (PT >= search_regs.end[sub])
2517     opoint = PT - ZV;
2518   else if (PT > search_regs.start[sub])
2519     opoint = search_regs.end[sub] - ZV;
2520   else
2521     opoint = PT;
2522
2523   /* If we want non-literal replacement,
2524      perform substitution on the replacement string.  */
2525   if (NILP (literal))
2526     {
2527       ptrdiff_t length = SBYTES (newtext);
2528       unsigned char *substed;
2529       ptrdiff_t substed_alloc_size, substed_len;
2530       bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2531       bool str_multibyte = STRING_MULTIBYTE (newtext);
2532       int really_changed = 0;
2533
2534       substed_alloc_size = ((STRING_BYTES_BOUND - 100) / 2 < length
2535                             ? STRING_BYTES_BOUND
2536                             : length * 2 + 100);
2537       substed = xmalloc (substed_alloc_size);
2538       substed_len = 0;
2539
2540       /* Go thru NEWTEXT, producing the actual text to insert in
2541          SUBSTED while adjusting multibyteness to that of the current
2542          buffer.  */
2543
2544       for (pos_byte = 0, pos = 0; pos_byte < length;)
2545         {
2546           unsigned char str[MAX_MULTIBYTE_LENGTH];
2547           const unsigned char *add_stuff = NULL;
2548           ptrdiff_t add_len = 0;
2549           ptrdiff_t idx = -1;
2550
2551           if (str_multibyte)
2552             {
2553               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2554               if (!buf_multibyte)
2555                 c = multibyte_char_to_unibyte (c);
2556             }
2557           else
2558             {
2559               /* Note that we don't have to increment POS.  */
2560               c = SREF (newtext, pos_byte++);
2561               if (buf_multibyte)
2562                 MAKE_CHAR_MULTIBYTE (c);
2563             }
2564
2565           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2566              or set IDX to a match index, which means put that part
2567              of the buffer text into SUBSTED.  */
2568
2569           if (c == '\\')
2570             {
2571               really_changed = 1;
2572
2573               if (str_multibyte)
2574                 {
2575                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2576                                                       pos, pos_byte);
2577                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2578                     c = multibyte_char_to_unibyte (c);
2579                 }
2580               else
2581                 {
2582                   c = SREF (newtext, pos_byte++);
2583                   if (buf_multibyte)
2584                     MAKE_CHAR_MULTIBYTE (c);
2585                 }
2586
2587               if (c == '&')
2588                 idx = sub;
2589               else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
2590                 {
2591                   if (search_regs.start[c - '0'] >= 1)
2592                     idx = c - '0';
2593                 }
2594               else if (c == '\\')
2595                 add_len = 1, add_stuff = (unsigned char *) "\\";
2596               else
2597                 {
2598                   xfree (substed);
2599                   error ("Invalid use of `\\' in replacement text");
2600                 }
2601             }
2602           else
2603             {
2604               add_len = CHAR_STRING (c, str);
2605               add_stuff = str;
2606             }
2607
2608           /* If we want to copy part of a previous match,
2609              set up ADD_STUFF and ADD_LEN to point to it.  */
2610           if (idx >= 0)
2611             {
2612               ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2613               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2614               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2615                 move_gap_both (search_regs.start[idx], begbyte);
2616               add_stuff = BYTE_POS_ADDR (begbyte);
2617             }
2618
2619           /* Now the stuff we want to add to SUBSTED
2620              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2621
2622           /* Make sure SUBSTED is big enough.  */
2623           if (substed_alloc_size - substed_len < add_len)
2624             substed =
2625               xpalloc (substed, &substed_alloc_size,
2626                        add_len - (substed_alloc_size - substed_len),
2627                        STRING_BYTES_BOUND, 1);
2628
2629           /* Now add to the end of SUBSTED.  */
2630           if (add_stuff)
2631             {
2632               memcpy (substed + substed_len, add_stuff, add_len);
2633               substed_len += add_len;
2634             }
2635         }
2636
2637       if (really_changed)
2638         {
2639           if (buf_multibyte)
2640             {
2641               ptrdiff_t nchars =
2642                 multibyte_chars_in_text (substed, substed_len);
2643
2644               newtext = make_multibyte_string ((char *) substed, nchars,
2645                                                substed_len);
2646             }
2647           else
2648             newtext = make_unibyte_string ((char *) substed, substed_len);
2649         }
2650       xfree (substed);
2651     }
2652
2653   /* Replace the old text with the new in the cleanest possible way.  */
2654   replace_range (search_regs.start[sub], search_regs.end[sub],
2655                  newtext, 1, 0, 1);
2656   newpoint = search_regs.start[sub] + SCHARS (newtext);
2657
2658   if (case_action == all_caps)
2659     Fupcase_region (make_number (search_regs.start[sub]),
2660                     make_number (newpoint));
2661   else if (case_action == cap_initial)
2662     Fupcase_initials_region (make_number (search_regs.start[sub]),
2663                              make_number (newpoint));
2664
2665   /* Adjust search data for this change.  */
2666   {
2667     ptrdiff_t oldend = search_regs.end[sub];
2668     ptrdiff_t oldstart = search_regs.start[sub];
2669     ptrdiff_t change = newpoint - search_regs.end[sub];
2670     ptrdiff_t i;
2671
2672     for (i = 0; i < search_regs.num_regs; i++)
2673       {
2674         if (search_regs.start[i] >= oldend)
2675           search_regs.start[i] += change;
2676         else if (search_regs.start[i] > oldstart)
2677           search_regs.start[i] = oldstart;
2678         if (search_regs.end[i] >= oldend)
2679           search_regs.end[i] += change;
2680         else if (search_regs.end[i] > oldstart)
2681           search_regs.end[i] = oldstart;
2682       }
2683   }
2684
2685   /* Put point back where it was in the text.  */
2686   if (opoint <= 0)
2687     TEMP_SET_PT (opoint + ZV);
2688   else
2689     TEMP_SET_PT (opoint);
2690
2691   /* Now move point "officially" to the start of the inserted replacement.  */
2692   move_if_not_intangible (newpoint);
2693
2694   return Qnil;
2695 }
2696 \f
2697 static Lisp_Object
2698 match_limit (Lisp_Object num, int beginningp)
2699 {
2700   EMACS_INT n;
2701
2702   CHECK_NUMBER (num);
2703   n = XINT (num);
2704   if (n < 0)
2705     args_out_of_range (num, make_number (0));
2706   if (search_regs.num_regs <= 0)
2707     error ("No match data, because no search succeeded");
2708   if (n >= search_regs.num_regs
2709       || search_regs.start[n] < 0)
2710     return Qnil;
2711   return (make_number ((beginningp) ? search_regs.start[n]
2712                                     : search_regs.end[n]));
2713 }
2714
2715 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2716        doc: /* Return position of start of text matched by last search.
2717 SUBEXP, a number, specifies which parenthesized expression in the last
2718   regexp.
2719 Value is nil if SUBEXPth pair didn't match, or there were less than
2720   SUBEXP pairs.
2721 Zero means the entire text matched by the whole regexp or whole string.  */)
2722   (Lisp_Object subexp)
2723 {
2724   return match_limit (subexp, 1);
2725 }
2726
2727 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2728        doc: /* Return position of end of text matched by last search.
2729 SUBEXP, a number, specifies which parenthesized expression in the last
2730   regexp.
2731 Value is nil if SUBEXPth pair didn't match, or there were less than
2732   SUBEXP pairs.
2733 Zero means the entire text matched by the whole regexp or whole string.  */)
2734   (Lisp_Object subexp)
2735 {
2736   return match_limit (subexp, 0);
2737 }
2738
2739 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2740        doc: /* Return a list containing all info on what the last search matched.
2741 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2742 All the elements are markers or nil (nil if the Nth pair didn't match)
2743 if the last match was on a buffer; integers or nil if a string was matched.
2744 Use `set-match-data' to reinstate the data in this list.
2745
2746 If INTEGERS (the optional first argument) is non-nil, always use
2747 integers \(rather than markers) to represent buffer positions.  In
2748 this case, and if the last match was in a buffer, the buffer will get
2749 stored as one additional element at the end of the list.
2750
2751 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2752 enough to hold all the values, and if INTEGERS is non-nil, no consing
2753 is done.
2754
2755 If optional third arg RESEAT is non-nil, any previous markers on the
2756 REUSE list will be modified to point to nowhere.
2757
2758 Return value is undefined if the last search failed.  */)
2759   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2760 {
2761   Lisp_Object tail, prev;
2762   Lisp_Object *data;
2763   ptrdiff_t i, len;
2764
2765   if (!NILP (reseat))
2766     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2767       if (MARKERP (XCAR (tail)))
2768         {
2769           unchain_marker (XMARKER (XCAR (tail)));
2770           XSETCAR (tail, Qnil);
2771         }
2772
2773   if (NILP (last_thing_searched))
2774     return Qnil;
2775
2776   prev = Qnil;
2777
2778   data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
2779
2780   len = 0;
2781   for (i = 0; i < search_regs.num_regs; i++)
2782     {
2783       ptrdiff_t start = search_regs.start[i];
2784       if (start >= 0)
2785         {
2786           if (EQ (last_thing_searched, Qt)
2787               || ! NILP (integers))
2788             {
2789               XSETFASTINT (data[2 * i], start);
2790               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2791             }
2792           else if (BUFFERP (last_thing_searched))
2793             {
2794               data[2 * i] = Fmake_marker ();
2795               Fset_marker (data[2 * i],
2796                            make_number (start),
2797                            last_thing_searched);
2798               data[2 * i + 1] = Fmake_marker ();
2799               Fset_marker (data[2 * i + 1],
2800                            make_number (search_regs.end[i]),
2801                            last_thing_searched);
2802             }
2803           else
2804             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2805             emacs_abort ();
2806
2807           len = 2 * i + 2;
2808         }
2809       else
2810         data[2 * i] = data[2 * i + 1] = Qnil;
2811     }
2812
2813   if (BUFFERP (last_thing_searched) && !NILP (integers))
2814     {
2815       data[len] = last_thing_searched;
2816       len++;
2817     }
2818
2819   /* If REUSE is not usable, cons up the values and return them.  */
2820   if (! CONSP (reuse))
2821     return Flist (len, data);
2822
2823   /* If REUSE is a list, store as many value elements as will fit
2824      into the elements of REUSE.  */
2825   for (i = 0, tail = reuse; CONSP (tail);
2826        i++, tail = XCDR (tail))
2827     {
2828       if (i < len)
2829         XSETCAR (tail, data[i]);
2830       else
2831         XSETCAR (tail, Qnil);
2832       prev = tail;
2833     }
2834
2835   /* If we couldn't fit all value elements into REUSE,
2836      cons up the rest of them and add them to the end of REUSE.  */
2837   if (i < len)
2838     XSETCDR (prev, Flist (len - i, data + i));
2839
2840   return reuse;
2841 }
2842
2843 /* We used to have an internal use variant of `reseat' described as:
2844
2845       If RESEAT is `evaporate', put the markers back on the free list
2846       immediately.  No other references to the markers must exist in this
2847       case, so it is used only internally on the unwind stack and
2848       save-match-data from Lisp.
2849
2850    But it was ill-conceived: those supposedly-internal markers get exposed via
2851    the undo-list, so freeing them here is unsafe.  */
2852
2853 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2854        doc: /* Set internal data on last search match from elements of LIST.
2855 LIST should have been created by calling `match-data' previously.
2856
2857 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2858   (register Lisp_Object list, Lisp_Object reseat)
2859 {
2860   ptrdiff_t i;
2861   register Lisp_Object marker;
2862
2863   if (running_asynch_code)
2864     save_search_regs ();
2865
2866   CHECK_LIST (list);
2867
2868   /* Unless we find a marker with a buffer or an explicit buffer
2869      in LIST, assume that this match data came from a string.  */
2870   last_thing_searched = Qt;
2871
2872   /* Allocate registers if they don't already exist.  */
2873   {
2874     EMACS_INT length = XFASTINT (Flength (list)) / 2;
2875
2876     if (length > search_regs.num_regs)
2877       {
2878         ptrdiff_t num_regs = search_regs.num_regs;
2879         if (PTRDIFF_MAX < length)
2880           memory_full (SIZE_MAX);
2881         search_regs.start =
2882           xpalloc (search_regs.start, &num_regs, length - num_regs,
2883                    min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2884         search_regs.end =
2885           xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2886
2887         for (i = search_regs.num_regs; i < num_regs; i++)
2888           search_regs.start[i] = -1;
2889
2890         search_regs.num_regs = num_regs;
2891       }
2892
2893     for (i = 0; CONSP (list); i++)
2894       {
2895         marker = XCAR (list);
2896         if (BUFFERP (marker))
2897           {
2898             last_thing_searched = marker;
2899             break;
2900           }
2901         if (i >= length)
2902           break;
2903         if (NILP (marker))
2904           {
2905             search_regs.start[i] = -1;
2906             list = XCDR (list);
2907           }
2908         else
2909           {
2910             Lisp_Object from;
2911             Lisp_Object m;
2912
2913             m = marker;
2914             if (MARKERP (marker))
2915               {
2916                 if (XMARKER (marker)->buffer == 0)
2917                   XSETFASTINT (marker, 0);
2918                 else
2919                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2920               }
2921
2922             CHECK_NUMBER_COERCE_MARKER (marker);
2923             from = marker;
2924
2925             if (!NILP (reseat) && MARKERP (m))
2926               {
2927                 unchain_marker (XMARKER (m));
2928                 XSETCAR (list, Qnil);
2929               }
2930
2931             if ((list = XCDR (list), !CONSP (list)))
2932               break;
2933
2934             m = marker = XCAR (list);
2935
2936             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2937               XSETFASTINT (marker, 0);
2938
2939             CHECK_NUMBER_COERCE_MARKER (marker);
2940             if ((XINT (from) < 0
2941                  ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2942                  : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2943                 && (XINT (marker) < 0
2944                     ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2945                     : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
2946               {
2947                 search_regs.start[i] = XINT (from);
2948                 search_regs.end[i] = XINT (marker);
2949               }
2950             else
2951               {
2952                 search_regs.start[i] = -1;
2953               }
2954
2955             if (!NILP (reseat) && MARKERP (m))
2956               {
2957                 unchain_marker (XMARKER (m));
2958                 XSETCAR (list, Qnil);
2959               }
2960           }
2961         list = XCDR (list);
2962       }
2963
2964     for (; i < search_regs.num_regs; i++)
2965       search_regs.start[i] = -1;
2966   }
2967
2968   return Qnil;
2969 }
2970
2971 /* If non-zero the match data have been saved in saved_search_regs
2972    during the execution of a sentinel or filter. */
2973 static int search_regs_saved;
2974 static struct re_registers saved_search_regs;
2975 static Lisp_Object saved_last_thing_searched;
2976
2977 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2978    if asynchronous code (filter or sentinel) is running. */
2979 static void
2980 save_search_regs (void)
2981 {
2982   if (!search_regs_saved)
2983     {
2984       saved_search_regs.num_regs = search_regs.num_regs;
2985       saved_search_regs.start = search_regs.start;
2986       saved_search_regs.end = search_regs.end;
2987       saved_last_thing_searched = last_thing_searched;
2988       last_thing_searched = Qnil;
2989       search_regs.num_regs = 0;
2990       search_regs.start = 0;
2991       search_regs.end = 0;
2992
2993       search_regs_saved = 1;
2994     }
2995 }
2996
2997 /* Called upon exit from filters and sentinels. */
2998 void
2999 restore_search_regs (void)
3000 {
3001   if (search_regs_saved)
3002     {
3003       if (search_regs.num_regs > 0)
3004         {
3005           xfree (search_regs.start);
3006           xfree (search_regs.end);
3007         }
3008       search_regs.num_regs = saved_search_regs.num_regs;
3009       search_regs.start = saved_search_regs.start;
3010       search_regs.end = saved_search_regs.end;
3011       last_thing_searched = saved_last_thing_searched;
3012       saved_last_thing_searched = Qnil;
3013       search_regs_saved = 0;
3014     }
3015 }
3016
3017 static Lisp_Object
3018 unwind_set_match_data (Lisp_Object list)
3019 {
3020   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3021   return Fset_match_data (list, Qt);
3022 }
3023
3024 /* Called to unwind protect the match data.  */
3025 void
3026 record_unwind_save_match_data (void)
3027 {
3028   record_unwind_protect (unwind_set_match_data,
3029                          Fmatch_data (Qnil, Qnil, Qnil));
3030 }
3031
3032 /* Quote a string to deactivate reg-expr chars */
3033
3034 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3035        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3036   (Lisp_Object string)
3037 {
3038   register char *in, *out, *end;
3039   register char *temp;
3040   int backslashes_added = 0;
3041
3042   CHECK_STRING (string);
3043
3044   temp = alloca (SBYTES (string) * 2);
3045
3046   /* Now copy the data into the new string, inserting escapes. */
3047
3048   in = SSDATA (string);
3049   end = in + SBYTES (string);
3050   out = temp;
3051
3052   for (; in != end; in++)
3053     {
3054       if (*in == '['
3055           || *in == '*' || *in == '.' || *in == '\\'
3056           || *in == '?' || *in == '+'
3057           || *in == '^' || *in == '$')
3058         *out++ = '\\', backslashes_added++;
3059       *out++ = *in;
3060     }
3061
3062   return make_specified_string (temp,
3063                                 SCHARS (string) + backslashes_added,
3064                                 out - temp,
3065                                 STRING_MULTIBYTE (string));
3066 }
3067 \f
3068 void
3069 syms_of_search (void)
3070 {
3071   register int i;
3072
3073   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3074     {
3075       searchbufs[i].buf.allocated = 100;
3076       searchbufs[i].buf.buffer = xmalloc (100);
3077       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3078       searchbufs[i].regexp = Qnil;
3079       searchbufs[i].whitespace_regexp = Qnil;
3080       searchbufs[i].syntax_table = Qnil;
3081       staticpro (&searchbufs[i].regexp);
3082       staticpro (&searchbufs[i].whitespace_regexp);
3083       staticpro (&searchbufs[i].syntax_table);
3084       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3085     }
3086   searchbuf_head = &searchbufs[0];
3087
3088   DEFSYM (Qsearch_failed, "search-failed");
3089   DEFSYM (Qinvalid_regexp, "invalid-regexp");
3090
3091   Fput (Qsearch_failed, Qerror_conditions,
3092         listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
3093   Fput (Qsearch_failed, Qerror_message,
3094         build_pure_c_string ("Search failed"));
3095
3096   Fput (Qinvalid_regexp, Qerror_conditions,
3097         listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
3098   Fput (Qinvalid_regexp, Qerror_message,
3099         build_pure_c_string ("Invalid regexp"));
3100
3101   last_thing_searched = Qnil;
3102   staticpro (&last_thing_searched);
3103
3104   saved_last_thing_searched = Qnil;
3105   staticpro (&saved_last_thing_searched);
3106
3107   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3108       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3109 Some commands use this for user-specified regexps.
3110 Spaces that occur inside character classes or repetition operators
3111 or other such regexp constructs are not replaced with this.
3112 A value of nil (which is the normal value) means treat spaces literally.  */);
3113   Vsearch_spaces_regexp = Qnil;
3114
3115   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3116       doc: /* Internal use only.
3117 If non-nil, the primitive searching and matching functions
3118 such as `looking-at', `string-match', `re-search-forward', etc.,
3119 do not set the match data.  The proper way to use this variable
3120 is to bind it with `let' around a small expression.  */);
3121   Vinhibit_changing_match_data = Qnil;
3122
3123   defsubr (&Slooking_at);
3124   defsubr (&Sposix_looking_at);
3125   defsubr (&Sstring_match);
3126   defsubr (&Sposix_string_match);
3127   defsubr (&Ssearch_forward);
3128   defsubr (&Ssearch_backward);
3129   defsubr (&Sre_search_forward);
3130   defsubr (&Sre_search_backward);
3131   defsubr (&Sposix_search_forward);
3132   defsubr (&Sposix_search_backward);
3133   defsubr (&Sreplace_match);
3134   defsubr (&Smatch_beginning);
3135   defsubr (&Smatch_end);
3136   defsubr (&Smatch_data);
3137   defsubr (&Sset_match_data);
3138   defsubr (&Sregexp_quote);
3139 }