src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 Lisp_Object Vsearch_spaces_regexp;
  95
  96 /* If non-nil, the match data will not be changed during call to
  97    searching or matching functions.  This variable is for internal use
  98    only.  */
  99 Lisp_Object Vinhibit_changing_match_data;
 100
 101 static void set_search_regs (EMACS_INT, EMACS_INT);
 102 static void save_search_regs (void);
 103 static EMACS_INT simple_search (int, unsigned char *, int, int,
 104                                 Lisp_Object, EMACS_INT, EMACS_INT,
 105                                 EMACS_INT, EMACS_INT);
 106 static EMACS_INT boyer_moore (int, unsigned char *, int, int,
 107                               Lisp_Object, Lisp_Object,
 108                               EMACS_INT, EMACS_INT,
 109                               EMACS_INT, EMACS_INT, int);
 110 static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
 111                                 EMACS_INT, EMACS_INT, int, int,
 112                                 Lisp_Object, Lisp_Object, int);
 113 static void matcher_overflow (void) NO_RETURN;
 114
 115 static void
 116 matcher_overflow (void)
 117 {
 118   error ("Stack overflow in regexp matcher");
 119 }
 120
 121 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 122    PATTERN is the pattern to compile.
 123    CP is the place to put the result.
 124    TRANSLATE is a translation table for ignoring case, or nil for none.
 125    REGP is the structure that says where to store the "register"
 126    values that will result from matching this pattern.
 127    If it is 0, we should compile the pattern not to record any
 128    subexpression bounds.
 129    POSIX is nonzero if we want full backtracking (POSIX style)
 130    for this pattern.  0 means backtrack only enough to get a valid match.
 131
 132    The behavior also depends on Vsearch_spaces_regexp.  */
 133
 134 static void
 135 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
 136 {
 137   char *val;
 138   reg_syntax_t old;
 139
 140   cp->regexp = Qnil;
 141   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 142   cp->posix = posix;
 143   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 144   cp->buf.charset_unibyte = charset_unibyte;
 145   if (STRINGP (Vsearch_spaces_regexp))
 146     cp->whitespace_regexp = Vsearch_spaces_regexp;
 147   else
 148     cp->whitespace_regexp = Qnil;
 149
 150   /* rms: I think BLOCK_INPUT is not needed here any more,
 151      because regex.c defines malloc to call xmalloc.
 152      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 153      So let's turn it off.  */
 154   /*  BLOCK_INPUT;  */
 155   old = re_set_syntax (RE_SYNTAX_EMACS
 156                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 157
 158   if (STRINGP (Vsearch_spaces_regexp))
 159     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 160   else
 161     re_set_whitespace_regexp (NULL);
 162
 163   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 164                                      SBYTES (pattern), &cp->buf);
 165
 166   /* If the compiled pattern hard codes some of the contents of the
 167      syntax-table, it can only be reused with *this* syntax table.  */
 168   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 169
 170   re_set_whitespace_regexp (NULL);
 171
 172   re_set_syntax (old);
 173   /* UNBLOCK_INPUT;  */
 174   if (val)
 175     xsignal1 (Qinvalid_regexp, build_string (val));
 176
 177   cp->regexp = Fcopy_sequence (pattern);
 178 }
 179
 180 /* Shrink each compiled regexp buffer in the cache
 181    to the size actually used right now.
 182    This is called from garbage collection.  */
 183
 184 void
 185 shrink_regexp_cache (void)
 186 {
 187   struct regexp_cache *cp;
 188
 189   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 190     {
 191       cp->buf.allocated = cp->buf.used;
 192       cp->buf.buffer
 193         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 194     }
 195 }
 196
 197 /* Clear the regexp cache w.r.t. a particular syntax table,
 198    because it was changed.
 199    There is no danger of memory leak here because re_compile_pattern
 200    automagically manages the memory in each re_pattern_buffer struct,
 201    based on its `allocated' and `buffer' values.  */
 202 void
 203 clear_regexp_cache (void)
 204 {
 205   int i;
 206
 207   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 208     /* It's tempting to compare with the syntax-table we've actually changed,
 209        but it's not sufficient because char-table inheritance means that
 210        modifying one syntax-table can change others at the same time.  */
 211     if (!EQ (searchbufs[i].syntax_table, Qt))
 212       searchbufs[i].regexp = Qnil;
 213 }
 214
 215 /* Compile a regexp if necessary, but first check to see if there's one in
 216    the cache.
 217    PATTERN is the pattern to compile.
 218    TRANSLATE is a translation table for ignoring case, or nil for none.
 219    REGP is the structure that says where to store the "register"
 220    values that will result from matching this pattern.
 221    If it is 0, we should compile the pattern not to record any
 222    subexpression bounds.
 223    POSIX is nonzero if we want full backtracking (POSIX style)
 224    for this pattern.  0 means backtrack only enough to get a valid match.  */
 225
 226 struct re_pattern_buffer *
 227 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
 228 {
 229   struct regexp_cache *cp, **cpp;
 230
 231   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 232     {
 233       cp = *cpp;
 234       /* Entries are initialized to nil, and may be set to nil by
 235          compile_pattern_1 if the pattern isn't valid.  Don't apply
 236          string accessors in those cases.  However, compile_pattern_1
 237          is only applied to the cache entry we pick here to reuse.  So
 238          nil should never appear before a non-nil entry.  */
 239       if (NILP (cp->regexp))
 240         goto compile_it;
 241       if (SCHARS (cp->regexp) == SCHARS (pattern)
 242           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 243           && !NILP (Fstring_equal (cp->regexp, pattern))
 244           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 245           && cp->posix == posix
 246           && (EQ (cp->syntax_table, Qt)
 247               || EQ (cp->syntax_table, current_buffer->syntax_table))
 248           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 249           && cp->buf.charset_unibyte == charset_unibyte)
 250         break;
 251
 252       /* If we're at the end of the cache, compile into the nil cell
 253          we found, or the last (least recently used) cell with a
 254          string value.  */
 255       if (cp->next == 0)
 256         {
 257         compile_it:
 258           compile_pattern_1 (cp, pattern, translate, regp, posix);
 259           break;
 260         }
 261     }
 262
 263   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 264      either because we found it in the cache or because we just compiled it.
 265      Move it to the front of the queue to mark it as most recently used.  */
 266   *cpp = cp->next;
 267   cp->next = searchbuf_head;
 268   searchbuf_head = cp;
 269
 270   /* Advise the searching functions about the space we have allocated
 271      for register data.  */
 272   if (regp)
 273     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 274
 275   /* The compiled pattern can be used both for multibyte and unibyte
 276      target.  But, we have to tell which the pattern is used for. */
 277   cp->buf.target_multibyte = multibyte;
 278
 279   return &cp->buf;
 280 }
 281
 282 \f
 283 static Lisp_Object
 284 looking_at_1 (Lisp_Object string, int posix)
 285 {
 286   Lisp_Object val;
 287   unsigned char *p1, *p2;
 288   EMACS_INT s1, s2;
 289   register int i;
 290   struct re_pattern_buffer *bufp;
 291
 292   if (running_asynch_code)
 293     save_search_regs ();
 294
 295   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 296   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 297     = current_buffer->case_eqv_table;
 298
 299   CHECK_STRING (string);
 300   bufp = compile_pattern (string,
 301                           (NILP (Vinhibit_changing_match_data)
 302                            ? &search_regs : NULL),
 303                           (!NILP (current_buffer->case_fold_search)
 304                            ? current_buffer->case_canon_table : Qnil),
 305                           posix,
 306                           !NILP (current_buffer->enable_multibyte_characters));
 307
 308   immediate_quit = 1;
 309   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 310
 311   /* Get pointers and sizes of the two strings
 312      that make up the visible portion of the buffer. */
 313
 314   p1 = BEGV_ADDR;
 315   s1 = GPT_BYTE - BEGV_BYTE;
 316   p2 = GAP_END_ADDR;
 317   s2 = ZV_BYTE - GPT_BYTE;
 318   if (s1 < 0)
 319     {
 320       p2 = p1;
 321       s2 = ZV_BYTE - BEGV_BYTE;
 322       s1 = 0;
 323     }
 324   if (s2 < 0)
 325     {
 326       s1 = ZV_BYTE - BEGV_BYTE;
 327       s2 = 0;
 328     }
 329
 330   re_match_object = Qnil;
 331
 332   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 333                   PT_BYTE - BEGV_BYTE,
 334                   (NILP (Vinhibit_changing_match_data)
 335                    ? &search_regs : NULL),
 336                   ZV_BYTE - BEGV_BYTE);
 337   immediate_quit = 0;
 338
 339   if (i == -2)
 340     matcher_overflow ();
 341
 342   val = (0 <= i ? Qt : Qnil);
 343   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 344     for (i = 0; i < search_regs.num_regs; i++)
 345       if (search_regs.start[i] >= 0)
 346         {
 347           search_regs.start[i]
 348             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 349           search_regs.end[i]
 350             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 351         }
 352
 353   /* Set last_thing_searched only when match data is changed.  */
 354   if (NILP (Vinhibit_changing_match_data))
 355     XSETBUFFER (last_thing_searched, current_buffer);
 356
 357   return val;
 358 }
 359
 360 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 361        doc: /* Return t if text after point matches regular expression REGEXP.
 362 This function modifies the match data that `match-beginning',
 363 `match-end' and `match-data' access; save and restore the match
 364 data if you want to preserve them.  */)
 365      (regexp)
 366      Lisp_Object regexp;
 367 {
 368   return looking_at_1 (regexp, 0);
 369 }
 370
 371 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 372        doc: /* Return t if text after point matches regular expression REGEXP.
 373 Find the longest match, in accord with Posix regular expression rules.
 374 This function modifies the match data that `match-beginning',
 375 `match-end' and `match-data' access; save and restore the match
 376 data if you want to preserve them.  */)
 377      (regexp)
 378      Lisp_Object regexp;
 379 {
 380   return looking_at_1 (regexp, 1);
 381 }
 382 \f
 383 static Lisp_Object
 384 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
 385 {
 386   int val;
 387   struct re_pattern_buffer *bufp;
 388   EMACS_INT pos, pos_byte;
 389   int i;
 390
 391   if (running_asynch_code)
 392     save_search_regs ();
 393
 394   CHECK_STRING (regexp);
 395   CHECK_STRING (string);
 396
 397   if (NILP (start))
 398     pos = 0, pos_byte = 0;
 399   else
 400     {
 401       int len = SCHARS (string);
 402
 403       CHECK_NUMBER (start);
 404       pos = XINT (start);
 405       if (pos < 0 && -pos <= len)
 406         pos = len + pos;
 407       else if (0 > pos || pos > len)
 408         args_out_of_range (string, start);
 409       pos_byte = string_char_to_byte (string, pos);
 410     }
 411
 412   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 413   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 414     = current_buffer->case_eqv_table;
 415
 416   bufp = compile_pattern (regexp,
 417                           (NILP (Vinhibit_changing_match_data)
 418                            ? &search_regs : NULL),
 419                           (!NILP (current_buffer->case_fold_search)
 420                            ? current_buffer->case_canon_table : Qnil),
 421                           posix,
 422                           STRING_MULTIBYTE (string));
 423   immediate_quit = 1;
 424   re_match_object = string;
 425
 426   val = re_search (bufp, (char *) SDATA (string),
 427                    SBYTES (string), pos_byte,
 428                    SBYTES (string) - pos_byte,
 429                    (NILP (Vinhibit_changing_match_data)
 430                     ? &search_regs : NULL));
 431   immediate_quit = 0;
 432
 433   /* Set last_thing_searched only when match data is changed.  */
 434   if (NILP (Vinhibit_changing_match_data))
 435     last_thing_searched = Qt;
 436
 437   if (val == -2)
 438     matcher_overflow ();
 439   if (val < 0) return Qnil;
 440
 441   if (NILP (Vinhibit_changing_match_data))
 442     for (i = 0; i < search_regs.num_regs; i++)
 443       if (search_regs.start[i] >= 0)
 444         {
 445           search_regs.start[i]
 446             = string_byte_to_char (string, search_regs.start[i]);
 447           search_regs.end[i]
 448             = string_byte_to_char (string, search_regs.end[i]);
 449         }
 450
 451   return make_number (string_byte_to_char (string, val));
 452 }
 453
 454 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 455        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 456 Matching ignores case if `case-fold-search' is non-nil.
 457 If third arg START is non-nil, start search at that index in STRING.
 458 For index of first char beyond the match, do (match-end 0).
 459 `match-end' and `match-beginning' also give indices of substrings
 460 matched by parenthesis constructs in the pattern.
 461
 462 You can use the function `match-string' to extract the substrings
 463 matched by the parenthesis constructions in REGEXP. */)
 464      (regexp, string, start)
 465      Lisp_Object regexp, string, start;
 466 {
 467   return string_match_1 (regexp, string, start, 0);
 468 }
 469
 470 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 471        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 472 Find the longest match, in accord with Posix regular expression rules.
 473 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 474 If third arg START is non-nil, start search at that index in STRING.
 475 For index of first char beyond the match, do (match-end 0).
 476 `match-end' and `match-beginning' also give indices of substrings
 477 matched by parenthesis constructs in the pattern.  */)
 478      (regexp, string, start)
 479      Lisp_Object regexp, string, start;
 480 {
 481   return string_match_1 (regexp, string, start, 1);
 482 }
 483
 484 /* Match REGEXP against STRING, searching all of STRING,
 485    and return the index of the match, or negative on failure.
 486    This does not clobber the match data.  */
 487
 488 int
 489 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 490 {
 491   int val;
 492   struct re_pattern_buffer *bufp;
 493
 494   bufp = compile_pattern (regexp, 0, Qnil,
 495                           0, STRING_MULTIBYTE (string));
 496   immediate_quit = 1;
 497   re_match_object = string;
 498
 499   val = re_search (bufp, (char *) SDATA (string),
 500                    SBYTES (string), 0,
 501                    SBYTES (string), 0);
 502   immediate_quit = 0;
 503   return val;
 504 }
 505
 506 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 507    and return the index of the match, or negative on failure.
 508    This does not clobber the match data.
 509    We assume that STRING contains single-byte characters.  */
 510
 511 extern Lisp_Object Vascii_downcase_table;
 512
 513 int
 514 fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
 515 {
 516   int val;
 517   struct re_pattern_buffer *bufp;
 518   int len = strlen (string);
 519
 520   regexp = string_make_unibyte (regexp);
 521   re_match_object = Qt;
 522   bufp = compile_pattern (regexp, 0,
 523                           Vascii_canon_table, 0,
 524                           0);
 525   immediate_quit = 1;
 526   val = re_search (bufp, string, len, 0, len, 0);
 527   immediate_quit = 0;
 528   return val;
 529 }
 530
 531 /* Like fast_string_match but ignore case.  */
 532
 533 int
 534 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 535 {
 536   int val;
 537   struct re_pattern_buffer *bufp;
 538
 539   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 540                           0, STRING_MULTIBYTE (string));
 541   immediate_quit = 1;
 542   re_match_object = string;
 543
 544   val = re_search (bufp, (char *) SDATA (string),
 545                    SBYTES (string), 0,
 546                    SBYTES (string), 0);
 547   immediate_quit = 0;
 548   return val;
 549 }
 550 \f
 551 /* Match REGEXP against the characters after POS to LIMIT, and return
 552    the number of matched characters.  If STRING is non-nil, match
 553    against the characters in it.  In that case, POS and LIMIT are
 554    indices into the string.  This function doesn't modify the match
 555    data.  */
 556
 557 EMACS_INT
 558 fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
 559 {
 560   int multibyte;
 561   struct re_pattern_buffer *buf;
 562   unsigned char *p1, *p2;
 563   EMACS_INT s1, s2;
 564   EMACS_INT len;
 565
 566   if (STRINGP (string))
 567     {
 568       if (pos_byte < 0)
 569         pos_byte = string_char_to_byte (string, pos);
 570       if (limit_byte < 0)
 571         limit_byte = string_char_to_byte (string, limit);
 572       p1 = NULL;
 573       s1 = 0;
 574       p2 = SDATA (string);
 575       s2 = SBYTES (string);
 576       re_match_object = string;
 577       multibyte = STRING_MULTIBYTE (string);
 578     }
 579   else
 580     {
 581       if (pos_byte < 0)
 582         pos_byte = CHAR_TO_BYTE (pos);
 583       if (limit_byte < 0)
 584         limit_byte = CHAR_TO_BYTE (limit);
 585       pos_byte -= BEGV_BYTE;
 586       limit_byte -= BEGV_BYTE;
 587       p1 = BEGV_ADDR;
 588       s1 = GPT_BYTE - BEGV_BYTE;
 589       p2 = GAP_END_ADDR;
 590       s2 = ZV_BYTE - GPT_BYTE;
 591       if (s1 < 0)
 592         {
 593           p2 = p1;
 594           s2 = ZV_BYTE - BEGV_BYTE;
 595           s1 = 0;
 596         }
 597       if (s2 < 0)
 598         {
 599           s1 = ZV_BYTE - BEGV_BYTE;
 600           s2 = 0;
 601         }
 602       re_match_object = Qnil;
 603       multibyte = ! NILP (current_buffer->enable_multibyte_characters);
 604     }
 605
 606   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 607   immediate_quit = 1;
 608   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 609                     pos_byte, NULL, limit_byte);
 610   immediate_quit = 0;
 611
 612   return len;
 613 }
 614
 615 \f
 616 /* The newline cache: remembering which sections of text have no newlines.  */
 617
 618 /* If the user has requested newline caching, make sure it's on.
 619    Otherwise, make sure it's off.
 620    This is our cheezy way of associating an action with the change of
 621    state of a buffer-local variable.  */
 622 static void
 623 newline_cache_on_off (struct buffer *buf)
 624 {
 625   if (NILP (buf->cache_long_line_scans))
 626     {
 627       /* It should be off.  */
 628       if (buf->newline_cache)
 629         {
 630           free_region_cache (buf->newline_cache);
 631           buf->newline_cache = 0;
 632         }
 633     }
 634   else
 635     {
 636       /* It should be on.  */
 637       if (buf->newline_cache == 0)
 638         buf->newline_cache = new_region_cache ();
 639     }
 640 }
 641
 642 \f
 643 /* Search for COUNT instances of the character TARGET between START and END.
 644
 645    If COUNT is positive, search forwards; END must be >= START.
 646    If COUNT is negative, search backwards for the -COUNTth instance;
 647       END must be <= START.
 648    If COUNT is zero, do anything you please; run rogue, for all I care.
 649
 650    If END is zero, use BEGV or ZV instead, as appropriate for the
 651    direction indicated by COUNT.
 652
 653    If we find COUNT instances, set *SHORTAGE to zero, and return the
 654    position past the COUNTth match.  Note that for reverse motion
 655    this is not the same as the usual convention for Emacs motion commands.
 656
 657    If we don't find COUNT instances before reaching END, set *SHORTAGE
 658    to the number of TARGETs left unfound, and return END.
 659
 660    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 661    except when inside redisplay.  */
 662
 663 int
 664 scan_buffer (register int target, EMACS_INT start, EMACS_INT end, int count, int *shortage, int allow_quit)
 665 {
 666   struct region_cache *newline_cache;
 667   int direction;
 668
 669   if (count > 0)
 670     {
 671       direction = 1;
 672       if (! end) end = ZV;
 673     }
 674   else
 675     {
 676       direction = -1;
 677       if (! end) end = BEGV;
 678     }
 679
 680   newline_cache_on_off (current_buffer);
 681   newline_cache = current_buffer->newline_cache;
 682
 683   if (shortage != 0)
 684     *shortage = 0;
 685
 686   immediate_quit = allow_quit;
 687
 688   if (count > 0)
 689     while (start != end)
 690       {
 691         /* Our innermost scanning loop is very simple; it doesn't know
 692            about gaps, buffer ends, or the newline cache.  ceiling is
 693            the position of the last character before the next such
 694            obstacle --- the last character the dumb search loop should
 695            examine.  */
 696         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
 697         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 698         EMACS_INT tem;
 699
 700         /* If we're looking for a newline, consult the newline cache
 701            to see where we can avoid some scanning.  */
 702         if (target == '\n' && newline_cache)
 703           {
 704             int next_change;
 705             immediate_quit = 0;
 706             while (region_cache_forward
 707                    (current_buffer, newline_cache, start_byte, &next_change))
 708               start_byte = next_change;
 709             immediate_quit = allow_quit;
 710
 711             /* START should never be after END.  */
 712             if (start_byte > ceiling_byte)
 713               start_byte = ceiling_byte;
 714
 715             /* Now the text after start is an unknown region, and
 716                next_change is the position of the next known region. */
 717             ceiling_byte = min (next_change - 1, ceiling_byte);
 718           }
 719
 720         /* The dumb loop can only scan text stored in contiguous
 721            bytes. BUFFER_CEILING_OF returns the last character
 722            position that is contiguous, so the ceiling is the
 723            position after that.  */
 724         tem = BUFFER_CEILING_OF (start_byte);
 725         ceiling_byte = min (tem, ceiling_byte);
 726
 727         {
 728           /* The termination address of the dumb loop.  */
 729           register unsigned char *ceiling_addr
 730             = BYTE_POS_ADDR (ceiling_byte) + 1;
 731           register unsigned char *cursor
 732             = BYTE_POS_ADDR (start_byte);
 733           unsigned char *base = cursor;
 734
 735           while (cursor < ceiling_addr)
 736             {
 737               unsigned char *scan_start = cursor;
 738
 739               /* The dumb loop.  */
 740               while (*cursor != target && ++cursor < ceiling_addr)
 741                 ;
 742
 743               /* If we're looking for newlines, cache the fact that
 744                  the region from start to cursor is free of them. */
 745               if (target == '\n' && newline_cache)
 746                 know_region_cache (current_buffer, newline_cache,
 747                                    start_byte + scan_start - base,
 748                                    start_byte + cursor - base);
 749
 750               /* Did we find the target character?  */
 751               if (cursor < ceiling_addr)
 752                 {
 753                   if (--count == 0)
 754                     {
 755                       immediate_quit = 0;
 756                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 757                     }
 758                   cursor++;
 759                 }
 760             }
 761
 762           start = BYTE_TO_CHAR (start_byte + cursor - base);
 763         }
 764       }
 765   else
 766     while (start > end)
 767       {
 768         /* The last character to check before the next obstacle.  */
 769         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
 770         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 771         EMACS_INT tem;
 772
 773         /* Consult the newline cache, if appropriate.  */
 774         if (target == '\n' && newline_cache)
 775           {
 776             int next_change;
 777             immediate_quit = 0;
 778             while (region_cache_backward
 779                    (current_buffer, newline_cache, start_byte, &next_change))
 780               start_byte = next_change;
 781             immediate_quit = allow_quit;
 782
 783             /* Start should never be at or before end.  */
 784             if (start_byte <= ceiling_byte)
 785               start_byte = ceiling_byte + 1;
 786
 787             /* Now the text before start is an unknown region, and
 788                next_change is the position of the next known region. */
 789             ceiling_byte = max (next_change, ceiling_byte);
 790           }
 791
 792         /* Stop scanning before the gap.  */
 793         tem = BUFFER_FLOOR_OF (start_byte - 1);
 794         ceiling_byte = max (tem, ceiling_byte);
 795
 796         {
 797           /* The termination address of the dumb loop.  */
 798           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 799           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 800           unsigned char *base = cursor;
 801
 802           while (cursor >= ceiling_addr)
 803             {
 804               unsigned char *scan_start = cursor;
 805
 806               while (*cursor != target && --cursor >= ceiling_addr)
 807                 ;
 808
 809               /* If we're looking for newlines, cache the fact that
 810                  the region from after the cursor to start is free of them.  */
 811               if (target == '\n' && newline_cache)
 812                 know_region_cache (current_buffer, newline_cache,
 813                                    start_byte + cursor - base,
 814                                    start_byte + scan_start - base);
 815
 816               /* Did we find the target character?  */
 817               if (cursor >= ceiling_addr)
 818                 {
 819                   if (++count >= 0)
 820                     {
 821                       immediate_quit = 0;
 822                       return BYTE_TO_CHAR (start_byte + cursor - base);
 823                     }
 824                   cursor--;
 825                 }
 826             }
 827
 828           start = BYTE_TO_CHAR (start_byte + cursor - base);
 829         }
 830       }
 831
 832   immediate_quit = 0;
 833   if (shortage != 0)
 834     *shortage = count * direction;
 835   return start;
 836 }
 837 \f
 838 /* Search for COUNT instances of a line boundary, which means either a
 839    newline or (if selective display enabled) a carriage return.
 840    Start at START.  If COUNT is negative, search backwards.
 841
 842    We report the resulting position by calling TEMP_SET_PT_BOTH.
 843
 844    If we find COUNT instances. we position after (always after,
 845    even if scanning backwards) the COUNTth match, and return 0.
 846
 847    If we don't find COUNT instances before reaching the end of the
 848    buffer (or the beginning, if scanning backwards), we return
 849    the number of line boundaries left unfound, and position at
 850    the limit we bumped up against.
 851
 852    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 853    except in special cases.  */
 854
 855 int
 856 scan_newline (EMACS_INT start, EMACS_INT start_byte, EMACS_INT limit, EMACS_INT limit_byte, register int count, int allow_quit)
 857 {
 858   int direction = ((count > 0) ? 1 : -1);
 859
 860   register unsigned char *cursor;
 861   unsigned char *base;
 862
 863   EMACS_INT ceiling;
 864   register unsigned char *ceiling_addr;
 865
 866   int old_immediate_quit = immediate_quit;
 867
 868   /* The code that follows is like scan_buffer
 869      but checks for either newline or carriage return.  */
 870
 871   if (allow_quit)
 872     immediate_quit++;
 873
 874   start_byte = CHAR_TO_BYTE (start);
 875
 876   if (count > 0)
 877     {
 878       while (start_byte < limit_byte)
 879         {
 880           ceiling =  BUFFER_CEILING_OF (start_byte);
 881           ceiling = min (limit_byte - 1, ceiling);
 882           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 883           base = (cursor = BYTE_POS_ADDR (start_byte));
 884           while (1)
 885             {
 886               while (*cursor != '\n' && ++cursor != ceiling_addr)
 887                 ;
 888
 889               if (cursor != ceiling_addr)
 890                 {
 891                   if (--count == 0)
 892                     {
 893                       immediate_quit = old_immediate_quit;
 894                       start_byte = start_byte + cursor - base + 1;
 895                       start = BYTE_TO_CHAR (start_byte);
 896                       TEMP_SET_PT_BOTH (start, start_byte);
 897                       return 0;
 898                     }
 899                   else
 900                     if (++cursor == ceiling_addr)
 901                       break;
 902                 }
 903               else
 904                 break;
 905             }
 906           start_byte += cursor - base;
 907         }
 908     }
 909   else
 910     {
 911       while (start_byte > limit_byte)
 912         {
 913           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 914           ceiling = max (limit_byte, ceiling);
 915           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 916           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 917           while (1)
 918             {
 919               while (--cursor != ceiling_addr && *cursor != '\n')
 920                 ;
 921
 922               if (cursor != ceiling_addr)
 923                 {
 924                   if (++count == 0)
 925                     {
 926                       immediate_quit = old_immediate_quit;
 927                       /* Return the position AFTER the match we found.  */
 928                       start_byte = start_byte + cursor - base + 1;
 929                       start = BYTE_TO_CHAR (start_byte);
 930                       TEMP_SET_PT_BOTH (start, start_byte);
 931                       return 0;
 932                     }
 933                 }
 934               else
 935                 break;
 936             }
 937           /* Here we add 1 to compensate for the last decrement
 938              of CURSOR, which took it past the valid range.  */
 939           start_byte += cursor - base + 1;
 940         }
 941     }
 942
 943   TEMP_SET_PT_BOTH (limit, limit_byte);
 944   immediate_quit = old_immediate_quit;
 945
 946   return count * direction;
 947 }
 948
 949 int
 950 find_next_newline_no_quit (EMACS_INT from, int cnt)
 951 {
 952   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 953 }
 954
 955 /* Like find_next_newline, but returns position before the newline,
 956    not after, and only search up to TO.  This isn't just
 957    find_next_newline (...)-1, because you might hit TO.  */
 958
 959 int
 960 find_before_next_newline (EMACS_INT from, EMACS_INT to, int cnt)
 961 {
 962   int shortage;
 963   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 964
 965   if (shortage == 0)
 966     pos--;
 967
 968   return pos;
 969 }
 970 \f
 971 /* Subroutines of Lisp buffer search functions. */
 972
 973 static Lisp_Object
 974 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count, int direction, int RE, int posix)
 975 {
 976   register int np;
 977   int lim, lim_byte;
 978   int n = direction;
 979
 980   if (!NILP (count))
 981     {
 982       CHECK_NUMBER (count);
 983       n *= XINT (count);
 984     }
 985
 986   CHECK_STRING (string);
 987   if (NILP (bound))
 988     {
 989       if (n > 0)
 990         lim = ZV, lim_byte = ZV_BYTE;
 991       else
 992         lim = BEGV, lim_byte = BEGV_BYTE;
 993     }
 994   else
 995     {
 996       CHECK_NUMBER_COERCE_MARKER (bound);
 997       lim = XINT (bound);
 998       if (n > 0 ? lim < PT : lim > PT)
 999         error ("Invalid search bound (wrong side of point)");
1000       if (lim > ZV)
1001         lim = ZV, lim_byte = ZV_BYTE;
1002       else if (lim < BEGV)
1003         lim = BEGV, lim_byte = BEGV_BYTE;
1004       else
1005         lim_byte = CHAR_TO_BYTE (lim);
1006     }
1007
1008   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1009   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1010     = current_buffer->case_eqv_table;
1011
1012   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1013                       (!NILP (current_buffer->case_fold_search)
1014                        ? current_buffer->case_canon_table
1015                        : Qnil),
1016                       (!NILP (current_buffer->case_fold_search)
1017                        ? current_buffer->case_eqv_table
1018                        : Qnil),
1019                       posix);
1020   if (np <= 0)
1021     {
1022       if (NILP (noerror))
1023         xsignal1 (Qsearch_failed, string);
1024
1025       if (!EQ (noerror, Qt))
1026         {
1027           if (lim < BEGV || lim > ZV)
1028             abort ();
1029           SET_PT_BOTH (lim, lim_byte);
1030           return Qnil;
1031 #if 0 /* This would be clean, but maybe programs depend on
1032          a value of nil here.  */
1033           np = lim;
1034 #endif
1035         }
1036       else
1037         return Qnil;
1038     }
1039
1040   if (np < BEGV || np > ZV)
1041     abort ();
1042
1043   SET_PT (np);
1044
1045   return make_number (np);
1046 }
1047 \f
1048 /* Return 1 if REGEXP it matches just one constant string.  */
1049
1050 static int
1051 trivial_regexp_p (Lisp_Object regexp)
1052 {
1053   int len = SBYTES (regexp);
1054   unsigned char *s = SDATA (regexp);
1055   while (--len >= 0)
1056     {
1057       switch (*s++)
1058         {
1059         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1060           return 0;
1061         case '\\':
1062           if (--len < 0)
1063             return 0;
1064           switch (*s++)
1065             {
1066             case '|': case '(': case ')': case '`': case '\'': case 'b':
1067             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1068             case 'S': case '=': case '{': case '}': case '_':
1069             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1070             case '1': case '2': case '3': case '4': case '5':
1071             case '6': case '7': case '8': case '9':
1072               return 0;
1073             }
1074         }
1075     }
1076   return 1;
1077 }
1078
1079 /* Search for the n'th occurrence of STRING in the current buffer,
1080    starting at position POS and stopping at position LIM,
1081    treating STRING as a literal string if RE is false or as
1082    a regular expression if RE is true.
1083
1084    If N is positive, searching is forward and LIM must be greater than POS.
1085    If N is negative, searching is backward and LIM must be less than POS.
1086
1087    Returns -x if x occurrences remain to be found (x > 0),
1088    or else the position at the beginning of the Nth occurrence
1089    (if searching backward) or the end (if searching forward).
1090
1091    POSIX is nonzero if we want full backtracking (POSIX style)
1092    for this pattern.  0 means backtrack only enough to get a valid match.  */
1093
1094 #define TRANSLATE(out, trt, d)                  \
1095 do                                              \
1096   {                                             \
1097     if (! NILP (trt))                           \
1098       {                                         \
1099         Lisp_Object temp;                       \
1100         temp = Faref (trt, make_number (d));    \
1101         if (INTEGERP (temp))                    \
1102           out = XINT (temp);                    \
1103         else                                    \
1104           out = d;                              \
1105       }                                         \
1106     else                                        \
1107       out = d;                                  \
1108   }                                             \
1109 while (0)
1110
1111 /* Only used in search_buffer, to record the end position of the match
1112    when searching regexps and SEARCH_REGS should not be changed
1113    (i.e. Vinhibit_changing_match_data is non-nil).  */
1114 static struct re_registers search_regs_1;
1115
1116 static EMACS_INT
1117 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1118                RE, trt, inverse_trt, posix)
1119      Lisp_Object string;
1120      EMACS_INT pos;
1121      EMACS_INT pos_byte;
1122      EMACS_INT lim;
1123      EMACS_INT lim_byte;
1124      int n;
1125      int RE;
1126      Lisp_Object trt;
1127      Lisp_Object inverse_trt;
1128      int posix;
1129 {
1130   int len = SCHARS (string);
1131   int len_byte = SBYTES (string);
1132   register int i;
1133
1134   if (running_asynch_code)
1135     save_search_regs ();
1136
1137   /* Searching 0 times means don't move.  */
1138   /* Null string is found at starting position.  */
1139   if (len == 0 || n == 0)
1140     {
1141       set_search_regs (pos_byte, 0);
1142       return pos;
1143     }
1144
1145   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1146     {
1147       unsigned char *p1, *p2;
1148       int s1, s2;
1149       struct re_pattern_buffer *bufp;
1150
1151       bufp = compile_pattern (string,
1152                               (NILP (Vinhibit_changing_match_data)
1153                                ? &search_regs : &search_regs_1),
1154                               trt, posix,
1155                               !NILP (current_buffer->enable_multibyte_characters));
1156
1157       immediate_quit = 1;       /* Quit immediately if user types ^G,
1158                                    because letting this function finish
1159                                    can take too long. */
1160       QUIT;                     /* Do a pending quit right away,
1161                                    to avoid paradoxical behavior */
1162       /* Get pointers and sizes of the two strings
1163          that make up the visible portion of the buffer. */
1164
1165       p1 = BEGV_ADDR;
1166       s1 = GPT_BYTE - BEGV_BYTE;
1167       p2 = GAP_END_ADDR;
1168       s2 = ZV_BYTE - GPT_BYTE;
1169       if (s1 < 0)
1170         {
1171           p2 = p1;
1172           s2 = ZV_BYTE - BEGV_BYTE;
1173           s1 = 0;
1174         }
1175       if (s2 < 0)
1176         {
1177           s1 = ZV_BYTE - BEGV_BYTE;
1178           s2 = 0;
1179         }
1180       re_match_object = Qnil;
1181
1182       while (n < 0)
1183         {
1184           int val;
1185           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1186                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1187                              (NILP (Vinhibit_changing_match_data)
1188                               ? &search_regs : &search_regs_1),
1189                              /* Don't allow match past current point */
1190                              pos_byte - BEGV_BYTE);
1191           if (val == -2)
1192             {
1193               matcher_overflow ();
1194             }
1195           if (val >= 0)
1196             {
1197               if (NILP (Vinhibit_changing_match_data))
1198                 {
1199                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1200                   for (i = 0; i < search_regs.num_regs; i++)
1201                     if (search_regs.start[i] >= 0)
1202                       {
1203                         search_regs.start[i]
1204                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1205                         search_regs.end[i]
1206                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1207                       }
1208                   XSETBUFFER (last_thing_searched, current_buffer);
1209                   /* Set pos to the new position. */
1210                   pos = search_regs.start[0];
1211                 }
1212               else
1213                 {
1214                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1215                   /* Set pos to the new position.  */
1216                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1217                 }
1218             }
1219           else
1220             {
1221               immediate_quit = 0;
1222               return (n);
1223             }
1224           n++;
1225         }
1226       while (n > 0)
1227         {
1228           int val;
1229           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1230                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1231                              (NILP (Vinhibit_changing_match_data)
1232                               ? &search_regs : &search_regs_1),
1233                              lim_byte - BEGV_BYTE);
1234           if (val == -2)
1235             {
1236               matcher_overflow ();
1237             }
1238           if (val >= 0)
1239             {
1240               if (NILP (Vinhibit_changing_match_data))
1241                 {
1242                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1243                   for (i = 0; i < search_regs.num_regs; i++)
1244                     if (search_regs.start[i] >= 0)
1245                       {
1246                         search_regs.start[i]
1247                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1248                         search_regs.end[i]
1249                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1250                       }
1251                   XSETBUFFER (last_thing_searched, current_buffer);
1252                   pos = search_regs.end[0];
1253                 }
1254               else
1255                 {
1256                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1257                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1258                 }
1259             }
1260           else
1261             {
1262               immediate_quit = 0;
1263               return (0 - n);
1264             }
1265           n--;
1266         }
1267       immediate_quit = 0;
1268       return (pos);
1269     }
1270   else                          /* non-RE case */
1271     {
1272       unsigned char *raw_pattern, *pat;
1273       int raw_pattern_size;
1274       int raw_pattern_size_byte;
1275       unsigned char *patbuf;
1276       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1277       unsigned char *base_pat;
1278       /* Set to positive if we find a non-ASCII char that need
1279          translation.  Otherwise set to zero later.  */
1280       int char_base = -1;
1281       int boyer_moore_ok = 1;
1282
1283       /* MULTIBYTE says whether the text to be searched is multibyte.
1284          We must convert PATTERN to match that, or we will not really
1285          find things right.  */
1286
1287       if (multibyte == STRING_MULTIBYTE (string))
1288         {
1289           raw_pattern = (unsigned char *) SDATA (string);
1290           raw_pattern_size = SCHARS (string);
1291           raw_pattern_size_byte = SBYTES (string);
1292         }
1293       else if (multibyte)
1294         {
1295           raw_pattern_size = SCHARS (string);
1296           raw_pattern_size_byte
1297             = count_size_as_multibyte (SDATA (string),
1298                                        raw_pattern_size);
1299           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1300           copy_text (SDATA (string), raw_pattern,
1301                      SCHARS (string), 0, 1);
1302         }
1303       else
1304         {
1305           /* Converting multibyte to single-byte.
1306
1307              ??? Perhaps this conversion should be done in a special way
1308              by subtracting nonascii-insert-offset from each non-ASCII char,
1309              so that only the multibyte chars which really correspond to
1310              the chosen single-byte character set can possibly match.  */
1311           raw_pattern_size = SCHARS (string);
1312           raw_pattern_size_byte = SCHARS (string);
1313           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1314           copy_text (SDATA (string), raw_pattern,
1315                      SBYTES (string), 1, 0);
1316         }
1317
1318       /* Copy and optionally translate the pattern.  */
1319       len = raw_pattern_size;
1320       len_byte = raw_pattern_size_byte;
1321       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1322       pat = patbuf;
1323       base_pat = raw_pattern;
1324       if (multibyte)
1325         {
1326           /* Fill patbuf by translated characters in STRING while
1327              checking if we can use boyer-moore search.  If TRT is
1328              non-nil, we can use boyer-moore search only if TRT can be
1329              represented by the byte array of 256 elements.  For that,
1330              all non-ASCII case-equivalents of all case-senstive
1331              characters in STRING must belong to the same charset and
1332              row.  */
1333
1334           while (--len >= 0)
1335             {
1336               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1337               int c, translated, inverse;
1338               int in_charlen, charlen;
1339
1340               /* If we got here and the RE flag is set, it's because we're
1341                  dealing with a regexp known to be trivial, so the backslash
1342                  just quotes the next character.  */
1343               if (RE && *base_pat == '\\')
1344                 {
1345                   len--;
1346                   raw_pattern_size--;
1347                   len_byte--;
1348                   base_pat++;
1349                 }
1350
1351               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1352
1353               if (NILP (trt))
1354                 {
1355                   str = base_pat;
1356                   charlen = in_charlen;
1357                 }
1358               else
1359                 {
1360                   /* Translate the character.  */
1361                   TRANSLATE (translated, trt, c);
1362                   charlen = CHAR_STRING (translated, str_base);
1363                   str = str_base;
1364
1365                   /* Check if C has any other case-equivalents.  */
1366                   TRANSLATE (inverse, inverse_trt, c);
1367                   /* If so, check if we can use boyer-moore.  */
1368                   if (c != inverse && boyer_moore_ok)
1369                     {
1370                       /* Check if all equivalents belong to the same
1371                          group of characters.  Note that the check of C
1372                          itself is done by the last iteration.  */
1373                       int this_char_base = -1;
1374
1375                       while (boyer_moore_ok)
1376                         {
1377                           if (ASCII_BYTE_P (inverse))
1378                             {
1379                               if (this_char_base > 0)
1380                                 boyer_moore_ok = 0;
1381                               else
1382                                 this_char_base = 0;
1383                             }
1384                           else if (CHAR_BYTE8_P (inverse))
1385                             /* Boyer-moore search can't handle a
1386                                translation of an eight-bit
1387                                character.  */
1388                             boyer_moore_ok = 0;
1389                           else if (this_char_base < 0)
1390                             {
1391                               this_char_base = inverse & ~0x3F;
1392                               if (char_base < 0)
1393                                 char_base = this_char_base;
1394                               else if (this_char_base != char_base)
1395                                 boyer_moore_ok = 0;
1396                             }
1397                           else if ((inverse & ~0x3F) != this_char_base)
1398                             boyer_moore_ok = 0;
1399                           if (c == inverse)
1400                             break;
1401                           TRANSLATE (inverse, inverse_trt, inverse);
1402                         }
1403                     }
1404                 }
1405
1406               /* Store this character into the translated pattern.  */
1407               bcopy (str, pat, charlen);
1408               pat += charlen;
1409               base_pat += in_charlen;
1410               len_byte -= in_charlen;
1411             }
1412
1413           /* If char_base is still negative we didn't find any translated
1414              non-ASCII characters.  */
1415           if (char_base < 0)
1416             char_base = 0;
1417         }
1418       else
1419         {
1420           /* Unibyte buffer.  */
1421           char_base = 0;
1422           while (--len >= 0)
1423             {
1424               int c, translated;
1425
1426               /* If we got here and the RE flag is set, it's because we're
1427                  dealing with a regexp known to be trivial, so the backslash
1428                  just quotes the next character.  */
1429               if (RE && *base_pat == '\\')
1430                 {
1431                   len--;
1432                   raw_pattern_size--;
1433                   base_pat++;
1434                 }
1435               c = *base_pat++;
1436               TRANSLATE (translated, trt, c);
1437               *pat++ = translated;
1438             }
1439         }
1440
1441       len_byte = pat - patbuf;
1442       len = raw_pattern_size;
1443       pat = base_pat = patbuf;
1444
1445       if (boyer_moore_ok)
1446         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1447                             pos, pos_byte, lim, lim_byte,
1448                             char_base);
1449       else
1450         return simple_search (n, pat, len, len_byte, trt,
1451                               pos, pos_byte, lim, lim_byte);
1452     }
1453 }
1454 \f
1455 /* Do a simple string search N times for the string PAT,
1456    whose length is LEN/LEN_BYTE,
1457    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1458    TRT is the translation table.
1459
1460    Return the character position where the match is found.
1461    Otherwise, if M matches remained to be found, return -M.
1462
1463    This kind of search works regardless of what is in PAT and
1464    regardless of what is in TRT.  It is used in cases where
1465    boyer_moore cannot work.  */
1466
1467 static EMACS_INT
1468 simple_search (int n, unsigned char *pat, int len, int len_byte, Lisp_Object trt, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT lim, EMACS_INT lim_byte)
1469 {
1470   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1471   int forward = n > 0;
1472   /* Number of buffer bytes matched.  Note that this may be different
1473      from len_byte in a multibyte buffer.  */
1474   int match_byte;
1475
1476   if (lim > pos && multibyte)
1477     while (n > 0)
1478       {
1479         while (1)
1480           {
1481             /* Try matching at position POS.  */
1482             EMACS_INT this_pos = pos;
1483             EMACS_INT this_pos_byte = pos_byte;
1484             int this_len = len;
1485             unsigned char *p = pat;
1486             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1487               goto stop;
1488
1489             while (this_len > 0)
1490               {
1491                 int charlen, buf_charlen;
1492                 int pat_ch, buf_ch;
1493
1494                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1495                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1496                                                  buf_charlen);
1497                 TRANSLATE (buf_ch, trt, buf_ch);
1498
1499                 if (buf_ch != pat_ch)
1500                   break;
1501
1502                 this_len--;
1503                 p += charlen;
1504
1505                 this_pos_byte += buf_charlen;
1506                 this_pos++;
1507               }
1508
1509             if (this_len == 0)
1510               {
1511                 match_byte = this_pos_byte - pos_byte;
1512                 pos += len;
1513                 pos_byte += match_byte;
1514                 break;
1515               }
1516
1517             INC_BOTH (pos, pos_byte);
1518           }
1519
1520         n--;
1521       }
1522   else if (lim > pos)
1523     while (n > 0)
1524       {
1525         while (1)
1526           {
1527             /* Try matching at position POS.  */
1528             EMACS_INT this_pos = pos;
1529             int this_len = len;
1530             unsigned char *p = pat;
1531
1532             if (pos + len > lim)
1533               goto stop;
1534
1535             while (this_len > 0)
1536               {
1537                 int pat_ch = *p++;
1538                 int buf_ch = FETCH_BYTE (this_pos);
1539                 TRANSLATE (buf_ch, trt, buf_ch);
1540
1541                 if (buf_ch != pat_ch)
1542                   break;
1543
1544                 this_len--;
1545                 this_pos++;
1546               }
1547
1548             if (this_len == 0)
1549               {
1550                 match_byte = len;
1551                 pos += len;
1552                 break;
1553               }
1554
1555             pos++;
1556           }
1557
1558         n--;
1559       }
1560   /* Backwards search.  */
1561   else if (lim < pos && multibyte)
1562     while (n < 0)
1563       {
1564         while (1)
1565           {
1566             /* Try matching at position POS.  */
1567             EMACS_INT this_pos = pos;
1568             EMACS_INT this_pos_byte = pos_byte;
1569             int this_len = len;
1570             const unsigned char *p = pat + len_byte;
1571
1572             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1573               goto stop;
1574
1575             while (this_len > 0)
1576               {
1577                 int charlen;
1578                 int pat_ch, buf_ch;
1579
1580                 DEC_BOTH (this_pos, this_pos_byte);
1581                 PREV_CHAR_BOUNDARY (p, pat);
1582                 pat_ch = STRING_CHAR (p);
1583                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1584                 TRANSLATE (buf_ch, trt, buf_ch);
1585
1586                 if (buf_ch != pat_ch)
1587                   break;
1588
1589                 this_len--;
1590               }
1591
1592             if (this_len == 0)
1593               {
1594                 match_byte = pos_byte - this_pos_byte;
1595                 pos = this_pos;
1596                 pos_byte = this_pos_byte;
1597                 break;
1598               }
1599
1600             DEC_BOTH (pos, pos_byte);
1601           }
1602
1603         n++;
1604       }
1605   else if (lim < pos)
1606     while (n < 0)
1607       {
1608         while (1)
1609           {
1610             /* Try matching at position POS.  */
1611             EMACS_INT this_pos = pos - len;
1612             int this_len = len;
1613             unsigned char *p = pat;
1614
1615             if (this_pos < lim)
1616               goto stop;
1617
1618             while (this_len > 0)
1619               {
1620                 int pat_ch = *p++;
1621                 int buf_ch = FETCH_BYTE (this_pos);
1622                 TRANSLATE (buf_ch, trt, buf_ch);
1623
1624                 if (buf_ch != pat_ch)
1625                   break;
1626                 this_len--;
1627                 this_pos++;
1628               }
1629
1630             if (this_len == 0)
1631               {
1632                 match_byte = len;
1633                 pos -= len;
1634                 break;
1635               }
1636
1637             pos--;
1638           }
1639
1640         n++;
1641       }
1642
1643  stop:
1644   if (n == 0)
1645     {
1646       if (forward)
1647         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1648       else
1649         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1650
1651       return pos;
1652     }
1653   else if (n > 0)
1654     return -n;
1655   else
1656     return n;
1657 }
1658 \f
1659 /* Do Boyer-Moore search N times for the string BASE_PAT,
1660    whose length is LEN/LEN_BYTE,
1661    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1662    DIRECTION says which direction we search in.
1663    TRT and INVERSE_TRT are translation tables.
1664    Characters in PAT are already translated by TRT.
1665
1666    This kind of search works if all the characters in BASE_PAT that
1667    have nontrivial translation are the same aside from the last byte.
1668    This makes it possible to translate just the last byte of a
1669    character, and do so after just a simple test of the context.
1670    CHAR_BASE is nonzero if there is such a non-ASCII character.
1671
1672    If that criterion is not satisfied, do not call this function.  */
1673
1674 static EMACS_INT
1675 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1676              pos, pos_byte, lim, lim_byte, char_base)
1677      int n;
1678      unsigned char *base_pat;
1679      int len, len_byte;
1680      Lisp_Object trt;
1681      Lisp_Object inverse_trt;
1682      EMACS_INT pos, pos_byte;
1683      EMACS_INT lim, lim_byte;
1684      int char_base;
1685 {
1686   int direction = ((n > 0) ? 1 : -1);
1687   register int dirlen;
1688   EMACS_INT limit;
1689   int stride_for_teases = 0;
1690   int BM_tab[0400];
1691   register unsigned char *cursor, *p_limit;
1692   register int i, j;
1693   unsigned char *pat, *pat_end;
1694   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1695
1696   unsigned char simple_translate[0400];
1697   /* These are set to the preceding bytes of a byte to be translated
1698      if char_base is nonzero.  As the maximum byte length of a
1699      multibyte character is 5, we have to check at most four previous
1700      bytes.  */
1701   int translate_prev_byte1 = 0;
1702   int translate_prev_byte2 = 0;
1703   int translate_prev_byte3 = 0;
1704   int translate_prev_byte4 = 0;
1705
1706   /* The general approach is that we are going to maintain that we know
1707      the first (closest to the present position, in whatever direction
1708      we're searching) character that could possibly be the last
1709      (furthest from present position) character of a valid match.  We
1710      advance the state of our knowledge by looking at that character
1711      and seeing whether it indeed matches the last character of the
1712      pattern.  If it does, we take a closer look.  If it does not, we
1713      move our pointer (to putative last characters) as far as is
1714      logically possible.  This amount of movement, which I call a
1715      stride, will be the length of the pattern if the actual character
1716      appears nowhere in the pattern, otherwise it will be the distance
1717      from the last occurrence of that character to the end of the
1718      pattern.  If the amount is zero we have a possible match.  */
1719
1720   /* Here we make a "mickey mouse" BM table.  The stride of the search
1721      is determined only by the last character of the putative match.
1722      If that character does not match, we will stride the proper
1723      distance to propose a match that superimposes it on the last
1724      instance of a character that matches it (per trt), or misses
1725      it entirely if there is none. */
1726
1727   dirlen = len_byte * direction;
1728
1729   /* Record position after the end of the pattern.  */
1730   pat_end = base_pat + len_byte;
1731   /* BASE_PAT points to a character that we start scanning from.
1732      It is the first character in a forward search,
1733      the last character in a backward search.  */
1734   if (direction < 0)
1735     base_pat = pat_end - 1;
1736
1737   /* A character that does not appear in the pattern induces a
1738      stride equal to the pattern length.  */
1739   for (i = 0; i < 0400; i++)
1740     BM_tab[i] = dirlen;
1741
1742   /* We use this for translation, instead of TRT itself.
1743      We fill this in to handle the characters that actually
1744      occur in the pattern.  Others don't matter anyway!  */
1745   for (i = 0; i < 0400; i++)
1746     simple_translate[i] = i;
1747
1748   if (char_base)
1749     {
1750       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1751          byte following them are the target of translation.  */
1752       unsigned char str[MAX_MULTIBYTE_LENGTH];
1753       int len = CHAR_STRING (char_base, str);
1754
1755       translate_prev_byte1 = str[len - 2];
1756       if (len > 2)
1757         {
1758           translate_prev_byte2 = str[len - 3];
1759           if (len > 3)
1760             {
1761               translate_prev_byte3 = str[len - 4];
1762               if (len > 4)
1763                 translate_prev_byte4 = str[len - 5];
1764             }
1765         }
1766     }
1767
1768   i = 0;
1769   while (i != dirlen)
1770     {
1771       unsigned char *ptr = base_pat + i;
1772       i += direction;
1773       if (! NILP (trt))
1774         {
1775           /* If the byte currently looking at is the last of a
1776              character to check case-equivalents, set CH to that
1777              character.  An ASCII character and a non-ASCII character
1778              matching with CHAR_BASE are to be checked.  */
1779           int ch = -1;
1780
1781           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1782             ch = *ptr;
1783           else if (char_base
1784                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1785             {
1786               unsigned char *charstart = ptr - 1;
1787
1788               while (! (CHAR_HEAD_P (*charstart)))
1789                 charstart--;
1790               ch = STRING_CHAR (charstart);
1791               if (char_base != (ch & ~0x3F))
1792                 ch = -1;
1793             }
1794
1795           if (ch >= 0200)
1796             j = (ch & 0x3F) | 0200;
1797           else
1798             j = *ptr;
1799
1800           if (i == dirlen)
1801             stride_for_teases = BM_tab[j];
1802
1803           BM_tab[j] = dirlen - i;
1804           /* A translation table is accompanied by its inverse -- see */
1805           /* comment following downcase_table for details */
1806           if (ch >= 0)
1807             {
1808               int starting_ch = ch;
1809               int starting_j = j;
1810
1811               while (1)
1812                 {
1813                   TRANSLATE (ch, inverse_trt, ch);
1814                   if (ch >= 0200)
1815                     j = (ch & 0x3F) | 0200;
1816                   else
1817                     j = ch;
1818
1819                   /* For all the characters that map into CH,
1820                      set up simple_translate to map the last byte
1821                      into STARTING_J.  */
1822                   simple_translate[j] = starting_j;
1823                   if (ch == starting_ch)
1824                     break;
1825                   BM_tab[j] = dirlen - i;
1826                 }
1827             }
1828         }
1829       else
1830         {
1831           j = *ptr;
1832
1833           if (i == dirlen)
1834             stride_for_teases = BM_tab[j];
1835           BM_tab[j] = dirlen - i;
1836         }
1837       /* stride_for_teases tells how much to stride if we get a
1838          match on the far character but are subsequently
1839          disappointed, by recording what the stride would have been
1840          for that character if the last character had been
1841          different.  */
1842     }
1843   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1844   /* loop invariant - POS_BYTE points at where last char (first
1845      char if reverse) of pattern would align in a possible match.  */
1846   while (n != 0)
1847     {
1848       EMACS_INT tail_end;
1849       unsigned char *tail_end_ptr;
1850
1851       /* It's been reported that some (broken) compiler thinks that
1852          Boolean expressions in an arithmetic context are unsigned.
1853          Using an explicit ?1:0 prevents this.  */
1854       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1855           < 0)
1856         return (n * (0 - direction));
1857       /* First we do the part we can by pointers (maybe nothing) */
1858       QUIT;
1859       pat = base_pat;
1860       limit = pos_byte - dirlen + direction;
1861       if (direction > 0)
1862         {
1863           limit = BUFFER_CEILING_OF (limit);
1864           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1865              can take on without hitting edge of buffer or the gap.  */
1866           limit = min (limit, pos_byte + 20000);
1867           limit = min (limit, lim_byte - 1);
1868         }
1869       else
1870         {
1871           limit = BUFFER_FLOOR_OF (limit);
1872           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1873              can take on without hitting edge of buffer or the gap.  */
1874           limit = max (limit, pos_byte - 20000);
1875           limit = max (limit, lim_byte);
1876         }
1877       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1878       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1879
1880       if ((limit - pos_byte) * direction > 20)
1881         {
1882           unsigned char *p2;
1883
1884           p_limit = BYTE_POS_ADDR (limit);
1885           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1886           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1887           while (1)             /* use one cursor setting as long as i can */
1888             {
1889               if (direction > 0) /* worth duplicating */
1890                 {
1891                   while (cursor <= p_limit)
1892                     {
1893                       if (BM_tab[*cursor] == 0)
1894                         goto hit;
1895                       cursor += BM_tab[*cursor];
1896                     }
1897                 }
1898               else
1899                 {
1900                   while (cursor >= p_limit)
1901                     {
1902                       if (BM_tab[*cursor] == 0)
1903                         goto hit;
1904                       cursor += BM_tab[*cursor];
1905                     }
1906                 }
1907               /* If you are here, cursor is beyond the end of the
1908                  searched region.  You fail to match within the
1909                  permitted region and would otherwise try a character
1910                  beyond that region.  */
1911               break;
1912
1913             hit:
1914               i = dirlen - direction;
1915               if (! NILP (trt))
1916                 {
1917                   while ((i -= direction) + direction != 0)
1918                     {
1919                       int ch;
1920                       cursor -= direction;
1921                       /* Translate only the last byte of a character.  */
1922                       if (! multibyte
1923                           || ((cursor == tail_end_ptr
1924                                || CHAR_HEAD_P (cursor[1]))
1925                               && (CHAR_HEAD_P (cursor[0])
1926                                   /* Check if this is the last byte of
1927                                      a translable character.  */
1928                                   || (translate_prev_byte1 == cursor[-1]
1929                                       && (CHAR_HEAD_P (translate_prev_byte1)
1930                                           || (translate_prev_byte2 == cursor[-2]
1931                                               && (CHAR_HEAD_P (translate_prev_byte2)
1932                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1933                         ch = simple_translate[*cursor];
1934                       else
1935                         ch = *cursor;
1936                       if (pat[i] != ch)
1937                         break;
1938                     }
1939                 }
1940               else
1941                 {
1942                   while ((i -= direction) + direction != 0)
1943                     {
1944                       cursor -= direction;
1945                       if (pat[i] != *cursor)
1946                         break;
1947                     }
1948                 }
1949               cursor += dirlen - i - direction; /* fix cursor */
1950               if (i + direction == 0)
1951                 {
1952                   EMACS_INT position, start, end;
1953
1954                   cursor -= direction;
1955
1956                   position = pos_byte + cursor - p2 + ((direction > 0)
1957                                                        ? 1 - len_byte : 0);
1958                   set_search_regs (position, len_byte);
1959
1960                   if (NILP (Vinhibit_changing_match_data))
1961                     {
1962                       start = search_regs.start[0];
1963                       end = search_regs.end[0];
1964                     }
1965                   else
1966                     /* If Vinhibit_changing_match_data is non-nil,
1967                        search_regs will not be changed.  So let's
1968                        compute start and end here.  */
1969                     {
1970                       start = BYTE_TO_CHAR (position);
1971                       end = BYTE_TO_CHAR (position + len_byte);
1972                     }
1973
1974                   if ((n -= direction) != 0)
1975                     cursor += dirlen; /* to resume search */
1976                   else
1977                     return direction > 0 ? end : start;
1978                 }
1979               else
1980                 cursor += stride_for_teases; /* <sigh> we lose -  */
1981             }
1982           pos_byte += cursor - p2;
1983         }
1984       else
1985         /* Now we'll pick up a clump that has to be done the hard
1986            way because it covers a discontinuity.  */
1987         {
1988           limit = ((direction > 0)
1989                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1990                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1991           limit = ((direction > 0)
1992                    ? min (limit + len_byte, lim_byte - 1)
1993                    : max (limit - len_byte, lim_byte));
1994           /* LIMIT is now the last value POS_BYTE can have
1995              and still be valid for a possible match.  */
1996           while (1)
1997             {
1998               /* This loop can be coded for space rather than
1999                  speed because it will usually run only once.
2000                  (the reach is at most len + 21, and typically
2001                  does not exceed len).  */
2002               while ((limit - pos_byte) * direction >= 0)
2003                 {
2004                   int ch = FETCH_BYTE (pos_byte);
2005                   if (BM_tab[ch] == 0)
2006                     goto hit2;
2007                   pos_byte += BM_tab[ch];
2008                 }
2009               break;    /* ran off the end */
2010
2011             hit2:
2012               /* Found what might be a match.  */
2013               i = dirlen - direction;
2014               while ((i -= direction) + direction != 0)
2015                 {
2016                   int ch;
2017                   unsigned char *ptr;
2018                   pos_byte -= direction;
2019                   ptr = BYTE_POS_ADDR (pos_byte);
2020                   /* Translate only the last byte of a character.  */
2021                   if (! multibyte
2022                       || ((ptr == tail_end_ptr
2023                            || CHAR_HEAD_P (ptr[1]))
2024                           && (CHAR_HEAD_P (ptr[0])
2025                               /* Check if this is the last byte of a
2026                                  translable character.  */
2027                               || (translate_prev_byte1 == ptr[-1]
2028                                   && (CHAR_HEAD_P (translate_prev_byte1)
2029                                       || (translate_prev_byte2 == ptr[-2]
2030                                           && (CHAR_HEAD_P (translate_prev_byte2)
2031                                               || translate_prev_byte3 == ptr[-3])))))))
2032                     ch = simple_translate[*ptr];
2033                   else
2034                     ch = *ptr;
2035                   if (pat[i] != ch)
2036                     break;
2037                 }
2038               /* Above loop has moved POS_BYTE part or all the way
2039                  back to the first pos (last pos if reverse).
2040                  Set it once again at the last (first if reverse) char.  */
2041               pos_byte += dirlen - i - direction;
2042               if (i + direction == 0)
2043                 {
2044                   EMACS_INT position, start, end;
2045                   pos_byte -= direction;
2046
2047                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2048                   set_search_regs (position, len_byte);
2049
2050                   if (NILP (Vinhibit_changing_match_data))
2051                     {
2052                       start = search_regs.start[0];
2053                       end = search_regs.end[0];
2054                     }
2055                   else
2056                     /* If Vinhibit_changing_match_data is non-nil,
2057                        search_regs will not be changed.  So let's
2058                        compute start and end here.  */
2059                     {
2060                       start = BYTE_TO_CHAR (position);
2061                       end = BYTE_TO_CHAR (position + len_byte);
2062                     }
2063
2064                   if ((n -= direction) != 0)
2065                     pos_byte += dirlen; /* to resume search */
2066                   else
2067                     return direction > 0 ? end : start;
2068                 }
2069               else
2070                 pos_byte += stride_for_teases;
2071             }
2072           }
2073       /* We have done one clump.  Can we continue? */
2074       if ((lim_byte - pos_byte) * direction < 0)
2075         return ((0 - n) * direction);
2076     }
2077   return BYTE_TO_CHAR (pos_byte);
2078 }
2079
2080 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2081    for the overall match just found in the current buffer.
2082    Also clear out the match data for registers 1 and up.  */
2083
2084 static void
2085 set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
2086 {
2087   int i;
2088
2089   if (!NILP (Vinhibit_changing_match_data))
2090     return;
2091
2092   /* Make sure we have registers in which to store
2093      the match position.  */
2094   if (search_regs.num_regs == 0)
2095     {
2096       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2097       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2098       search_regs.num_regs = 2;
2099     }
2100
2101   /* Clear out the other registers.  */
2102   for (i = 1; i < search_regs.num_regs; i++)
2103     {
2104       search_regs.start[i] = -1;
2105       search_regs.end[i] = -1;
2106     }
2107
2108   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2109   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2110   XSETBUFFER (last_thing_searched, current_buffer);
2111 }
2112 \f
2113 /* Given STRING, a string of words separated by word delimiters,
2114    compute a regexp that matches those exact words separated by
2115    arbitrary punctuation.  If LAX is nonzero, the end of the string
2116    need not match a word boundary unless it ends in whitespace.  */
2117
2118 static Lisp_Object
2119 wordify (Lisp_Object string, int lax)
2120 {
2121   register unsigned char *p, *o;
2122   register int i, i_byte, len, punct_count = 0, word_count = 0;
2123   Lisp_Object val;
2124   int prev_c = 0;
2125   int adjust, whitespace_at_end;
2126
2127   CHECK_STRING (string);
2128   p = SDATA (string);
2129   len = SCHARS (string);
2130
2131   for (i = 0, i_byte = 0; i < len; )
2132     {
2133       int c;
2134
2135       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2136
2137       if (SYNTAX (c) != Sword)
2138         {
2139           punct_count++;
2140           if (i > 0 && SYNTAX (prev_c) == Sword)
2141             word_count++;
2142         }
2143
2144       prev_c = c;
2145     }
2146
2147   if (SYNTAX (prev_c) == Sword)
2148     {
2149       word_count++;
2150       whitespace_at_end = 0;
2151     }
2152   else
2153     whitespace_at_end = 1;
2154
2155   if (!word_count)
2156     return empty_unibyte_string;
2157
2158   adjust = - punct_count + 5 * (word_count - 1)
2159     + ((lax && !whitespace_at_end) ? 2 : 4);
2160   if (STRING_MULTIBYTE (string))
2161     val = make_uninit_multibyte_string (len + adjust,
2162                                         SBYTES (string)
2163                                         + adjust);
2164   else
2165     val = make_uninit_string (len + adjust);
2166
2167   o = SDATA (val);
2168   *o++ = '\\';
2169   *o++ = 'b';
2170   prev_c = 0;
2171
2172   for (i = 0, i_byte = 0; i < len; )
2173     {
2174       int c;
2175       int i_byte_orig = i_byte;
2176
2177       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2178
2179       if (SYNTAX (c) == Sword)
2180         {
2181           bcopy (SDATA (string) + i_byte_orig, o,
2182                  i_byte - i_byte_orig);
2183           o += i_byte - i_byte_orig;
2184         }
2185       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2186         {
2187           *o++ = '\\';
2188           *o++ = 'W';
2189           *o++ = '\\';
2190           *o++ = 'W';
2191           *o++ = '*';
2192         }
2193
2194       prev_c = c;
2195     }
2196
2197   if (!lax || whitespace_at_end)
2198     {
2199       *o++ = '\\';
2200       *o++ = 'b';
2201     }
2202
2203   return val;
2204 }
2205 \f
2206 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2207        "MSearch backward: ",
2208        doc: /* Search backward from point for STRING.
2209 Set point to the beginning of the occurrence found, and return point.
2210 An optional second argument bounds the search; it is a buffer position.
2211 The match found must not extend before that position.
2212 Optional third argument, if t, means if fail just return nil (no error).
2213  If not nil and not t, position at limit of search and return nil.
2214 Optional fourth argument is repeat count--search for successive occurrences.
2215
2216 Search case-sensitivity is determined by the value of the variable
2217 `case-fold-search', which see.
2218
2219 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2220      (string, bound, noerror, count)
2221      Lisp_Object string, bound, noerror, count;
2222 {
2223   return search_command (string, bound, noerror, count, -1, 0, 0);
2224 }
2225
2226 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2227        doc: /* Search forward from point for STRING.
2228 Set point to the end of the occurrence found, and return point.
2229 An optional second argument bounds the search; it is a buffer position.
2230 The match found must not extend after that position.  A value of nil is
2231   equivalent to (point-max).
2232 Optional third argument, if t, means if fail just return nil (no error).
2233   If not nil and not t, move to limit of search and return nil.
2234 Optional fourth argument is repeat count--search for successive occurrences.
2235
2236 Search case-sensitivity is determined by the value of the variable
2237 `case-fold-search', which see.
2238
2239 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2240      (string, bound, noerror, count)
2241      Lisp_Object string, bound, noerror, count;
2242 {
2243   return search_command (string, bound, noerror, count, 1, 0, 0);
2244 }
2245
2246 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2247        "sWord search backward: ",
2248        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2249 Set point to the beginning of the occurrence found, and return point.
2250 An optional second argument bounds the search; it is a buffer position.
2251 The match found must not extend before that position.
2252 Optional third argument, if t, means if fail just return nil (no error).
2253   If not nil and not t, move to limit of search and return nil.
2254 Optional fourth argument is repeat count--search for successive occurrences.  */)
2255      (string, bound, noerror, count)
2256      Lisp_Object string, bound, noerror, count;
2257 {
2258   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2259 }
2260
2261 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2262        "sWord search: ",
2263        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2264 Set point to the end of the occurrence found, and return point.
2265 An optional second argument bounds the search; it is a buffer position.
2266 The match found must not extend after that position.
2267 Optional third argument, if t, means if fail just return nil (no error).
2268   If not nil and not t, move to limit of search and return nil.
2269 Optional fourth argument is repeat count--search for successive occurrences.  */)
2270      (string, bound, noerror, count)
2271      Lisp_Object string, bound, noerror, count;
2272 {
2273   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2274 }
2275
2276 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2277        "sWord search backward: ",
2278        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2279 Set point to the beginning of the occurrence found, and return point.
2280
2281 Unlike `word-search-backward', the end of STRING need not match a word
2282 boundary unless it ends in whitespace.
2283
2284 An optional second argument bounds the search; it is a buffer position.
2285 The match found must not extend before that position.
2286 Optional third argument, if t, means if fail just return nil (no error).
2287   If not nil and not t, move to limit of search and return nil.
2288 Optional fourth argument is repeat count--search for successive occurrences.  */)
2289      (string, bound, noerror, count)
2290      Lisp_Object string, bound, noerror, count;
2291 {
2292   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2293 }
2294
2295 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2296        "sWord search: ",
2297        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2298 Set point to the end of the occurrence found, and return point.
2299
2300 Unlike `word-search-forward', the end of STRING need not match a word
2301 boundary unless it ends in whitespace.
2302
2303 An optional second argument bounds the search; it is a buffer position.
2304 The match found must not extend after that position.
2305 Optional third argument, if t, means if fail just return nil (no error).
2306   If not nil and not t, move to limit of search and return nil.
2307 Optional fourth argument is repeat count--search for successive occurrences.  */)
2308      (string, bound, noerror, count)
2309      Lisp_Object string, bound, noerror, count;
2310 {
2311   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2312 }
2313
2314 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2315        "sRE search backward: ",
2316        doc: /* Search backward from point for match for regular expression REGEXP.
2317 Set point to the beginning of the match, and return point.
2318 The match found is the one starting last in the buffer
2319 and yet ending before the origin of the search.
2320 An optional second argument bounds the search; it is a buffer position.
2321 The match found must start at or after that position.
2322 Optional third argument, if t, means if fail just return nil (no error).
2323   If not nil and not t, move to limit of search and return nil.
2324 Optional fourth argument is repeat count--search for successive occurrences.
2325 See also the functions `match-beginning', `match-end', `match-string',
2326 and `replace-match'.  */)
2327      (regexp, bound, noerror, count)
2328      Lisp_Object regexp, bound, noerror, count;
2329 {
2330   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2331 }
2332
2333 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2334        "sRE search: ",
2335        doc: /* Search forward from point for regular expression REGEXP.
2336 Set point to the end of the occurrence found, and return point.
2337 An optional second argument bounds the search; it is a buffer position.
2338 The match found must not extend after that position.
2339 Optional third argument, if t, means if fail just return nil (no error).
2340   If not nil and not t, move to limit of search and return nil.
2341 Optional fourth argument is repeat count--search for successive occurrences.
2342 See also the functions `match-beginning', `match-end', `match-string',
2343 and `replace-match'.  */)
2344      (regexp, bound, noerror, count)
2345      Lisp_Object regexp, bound, noerror, count;
2346 {
2347   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2348 }
2349
2350 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2351        "sPosix search backward: ",
2352        doc: /* Search backward from point for match for regular expression REGEXP.
2353 Find the longest match in accord with Posix regular expression rules.
2354 Set point to the beginning of the match, and return point.
2355 The match found is the one starting last in the buffer
2356 and yet ending before the origin of the search.
2357 An optional second argument bounds the search; it is a buffer position.
2358 The match found must start at or after that position.
2359 Optional third argument, if t, means if fail just return nil (no error).
2360   If not nil and not t, move to limit of search and return nil.
2361 Optional fourth argument is repeat count--search for successive occurrences.
2362 See also the functions `match-beginning', `match-end', `match-string',
2363 and `replace-match'.  */)
2364      (regexp, bound, noerror, count)
2365      Lisp_Object regexp, bound, noerror, count;
2366 {
2367   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2368 }
2369
2370 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2371        "sPosix search: ",
2372        doc: /* Search forward from point for regular expression REGEXP.
2373 Find the longest match in accord with Posix regular expression rules.
2374 Set point to the end of the occurrence found, and return point.
2375 An optional second argument bounds the search; it is a buffer position.
2376 The match found must not extend after that position.
2377 Optional third argument, if t, means if fail just return nil (no error).
2378   If not nil and not t, move to limit of search and return nil.
2379 Optional fourth argument is repeat count--search for successive occurrences.
2380 See also the functions `match-beginning', `match-end', `match-string',
2381 and `replace-match'.  */)
2382      (regexp, bound, noerror, count)
2383      Lisp_Object regexp, bound, noerror, count;
2384 {
2385   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2386 }
2387 \f
2388 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2389        doc: /* Replace text matched by last search with NEWTEXT.
2390 Leave point at the end of the replacement text.
2391
2392 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2393 Otherwise maybe capitalize the whole text, or maybe just word initials,
2394 based on the replaced text.
2395 If the replaced text has only capital letters
2396 and has at least one multiletter word, convert NEWTEXT to all caps.
2397 Otherwise if all words are capitalized in the replaced text,
2398 capitalize each word in NEWTEXT.
2399
2400 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2401 Otherwise treat `\\' as special:
2402   `\\&' in NEWTEXT means substitute original matched text.
2403   `\\N' means substitute what matched the Nth `\\(...\\)'.
2404        If Nth parens didn't match, substitute nothing.
2405   `\\\\' means insert one `\\'.
2406 Case conversion does not apply to these substitutions.
2407
2408 FIXEDCASE and LITERAL are optional arguments.
2409
2410 The optional fourth argument STRING can be a string to modify.
2411 This is meaningful when the previous match was done against STRING,
2412 using `string-match'.  When used this way, `replace-match'
2413 creates and returns a new string made by copying STRING and replacing
2414 the part of STRING that was matched.
2415
2416 The optional fifth argument SUBEXP specifies a subexpression;
2417 it says to replace just that subexpression with NEWTEXT,
2418 rather than replacing the entire matched text.
2419 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2420 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2421 NEWTEXT in place of subexp N.
2422 This is useful only after a regular expression search or match,
2423 since only regular expressions have distinguished subexpressions.  */)
2424      (newtext, fixedcase, literal, string, subexp)
2425      Lisp_Object newtext, fixedcase, literal, string, subexp;
2426 {
2427   enum { nochange, all_caps, cap_initial } case_action;
2428   register int pos, pos_byte;
2429   int some_multiletter_word;
2430   int some_lowercase;
2431   int some_uppercase;
2432   int some_nonuppercase_initial;
2433   register int c, prevc;
2434   int sub;
2435   EMACS_INT opoint, newpoint;
2436
2437   CHECK_STRING (newtext);
2438
2439   if (! NILP (string))
2440     CHECK_STRING (string);
2441
2442   case_action = nochange;       /* We tried an initialization */
2443                                 /* but some C compilers blew it */
2444
2445   if (search_regs.num_regs <= 0)
2446     error ("`replace-match' called before any match found");
2447
2448   if (NILP (subexp))
2449     sub = 0;
2450   else
2451     {
2452       CHECK_NUMBER (subexp);
2453       sub = XINT (subexp);
2454       if (sub < 0 || sub >= search_regs.num_regs)
2455         args_out_of_range (subexp, make_number (search_regs.num_regs));
2456     }
2457
2458   if (NILP (string))
2459     {
2460       if (search_regs.start[sub] < BEGV
2461           || search_regs.start[sub] > search_regs.end[sub]
2462           || search_regs.end[sub] > ZV)
2463         args_out_of_range (make_number (search_regs.start[sub]),
2464                            make_number (search_regs.end[sub]));
2465     }
2466   else
2467     {
2468       if (search_regs.start[sub] < 0
2469           || search_regs.start[sub] > search_regs.end[sub]
2470           || search_regs.end[sub] > SCHARS (string))
2471         args_out_of_range (make_number (search_regs.start[sub]),
2472                            make_number (search_regs.end[sub]));
2473     }
2474
2475   if (NILP (fixedcase))
2476     {
2477       /* Decide how to casify by examining the matched text. */
2478       EMACS_INT last;
2479
2480       pos = search_regs.start[sub];
2481       last = search_regs.end[sub];
2482
2483       if (NILP (string))
2484         pos_byte = CHAR_TO_BYTE (pos);
2485       else
2486         pos_byte = string_char_to_byte (string, pos);
2487
2488       prevc = '\n';
2489       case_action = all_caps;
2490
2491       /* some_multiletter_word is set nonzero if any original word
2492          is more than one letter long. */
2493       some_multiletter_word = 0;
2494       some_lowercase = 0;
2495       some_nonuppercase_initial = 0;
2496       some_uppercase = 0;
2497
2498       while (pos < last)
2499         {
2500           if (NILP (string))
2501             {
2502               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2503               INC_BOTH (pos, pos_byte);
2504             }
2505           else
2506             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2507
2508           if (LOWERCASEP (c))
2509             {
2510               /* Cannot be all caps if any original char is lower case */
2511
2512               some_lowercase = 1;
2513               if (SYNTAX (prevc) != Sword)
2514                 some_nonuppercase_initial = 1;
2515               else
2516                 some_multiletter_word = 1;
2517             }
2518           else if (UPPERCASEP (c))
2519             {
2520               some_uppercase = 1;
2521               if (SYNTAX (prevc) != Sword)
2522                 ;
2523               else
2524                 some_multiletter_word = 1;
2525             }
2526           else
2527             {
2528               /* If the initial is a caseless word constituent,
2529                  treat that like a lowercase initial.  */
2530               if (SYNTAX (prevc) != Sword)
2531                 some_nonuppercase_initial = 1;
2532             }
2533
2534           prevc = c;
2535         }
2536
2537       /* Convert to all caps if the old text is all caps
2538          and has at least one multiletter word.  */
2539       if (! some_lowercase && some_multiletter_word)
2540         case_action = all_caps;
2541       /* Capitalize each word, if the old text has all capitalized words.  */
2542       else if (!some_nonuppercase_initial && some_multiletter_word)
2543         case_action = cap_initial;
2544       else if (!some_nonuppercase_initial && some_uppercase)
2545         /* Should x -> yz, operating on X, give Yz or YZ?
2546            We'll assume the latter.  */
2547         case_action = all_caps;
2548       else
2549         case_action = nochange;
2550     }
2551
2552   /* Do replacement in a string.  */
2553   if (!NILP (string))
2554     {
2555       Lisp_Object before, after;
2556
2557       before = Fsubstring (string, make_number (0),
2558                            make_number (search_regs.start[sub]));
2559       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2560
2561       /* Substitute parts of the match into NEWTEXT
2562          if desired.  */
2563       if (NILP (literal))
2564         {
2565           EMACS_INT lastpos = 0;
2566           EMACS_INT lastpos_byte = 0;
2567           /* We build up the substituted string in ACCUM.  */
2568           Lisp_Object accum;
2569           Lisp_Object middle;
2570           int length = SBYTES (newtext);
2571
2572           accum = Qnil;
2573
2574           for (pos_byte = 0, pos = 0; pos_byte < length;)
2575             {
2576               int substart = -1;
2577               int subend = 0;
2578               int delbackslash = 0;
2579
2580               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2581
2582               if (c == '\\')
2583                 {
2584                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2585
2586                   if (c == '&')
2587                     {
2588                       substart = search_regs.start[sub];
2589                       subend = search_regs.end[sub];
2590                     }
2591                   else if (c >= '1' && c <= '9')
2592                     {
2593                       if (search_regs.start[c - '0'] >= 0
2594                           && c <= search_regs.num_regs + '0')
2595                         {
2596                           substart = search_regs.start[c - '0'];
2597                           subend = search_regs.end[c - '0'];
2598                         }
2599                       else
2600                         {
2601                           /* If that subexp did not match,
2602                              replace \\N with nothing.  */
2603                           substart = 0;
2604                           subend = 0;
2605                         }
2606                     }
2607                   else if (c == '\\')
2608                     delbackslash = 1;
2609                   else
2610                     error ("Invalid use of `\\' in replacement text");
2611                 }
2612               if (substart >= 0)
2613                 {
2614                   if (pos - 2 != lastpos)
2615                     middle = substring_both (newtext, lastpos,
2616                                              lastpos_byte,
2617                                              pos - 2, pos_byte - 2);
2618                   else
2619                     middle = Qnil;
2620                   accum = concat3 (accum, middle,
2621                                    Fsubstring (string,
2622                                                make_number (substart),
2623                                                make_number (subend)));
2624                   lastpos = pos;
2625                   lastpos_byte = pos_byte;
2626                 }
2627               else if (delbackslash)
2628                 {
2629                   middle = substring_both (newtext, lastpos,
2630                                            lastpos_byte,
2631                                            pos - 1, pos_byte - 1);
2632
2633                   accum = concat2 (accum, middle);
2634                   lastpos = pos;
2635                   lastpos_byte = pos_byte;
2636                 }
2637             }
2638
2639           if (pos != lastpos)
2640             middle = substring_both (newtext, lastpos,
2641                                      lastpos_byte,
2642                                      pos, pos_byte);
2643           else
2644             middle = Qnil;
2645
2646           newtext = concat2 (accum, middle);
2647         }
2648
2649       /* Do case substitution in NEWTEXT if desired.  */
2650       if (case_action == all_caps)
2651         newtext = Fupcase (newtext);
2652       else if (case_action == cap_initial)
2653         newtext = Fupcase_initials (newtext);
2654
2655       return concat3 (before, newtext, after);
2656     }
2657
2658   /* Record point, then move (quietly) to the start of the match.  */
2659   if (PT >= search_regs.end[sub])
2660     opoint = PT - ZV;
2661   else if (PT > search_regs.start[sub])
2662     opoint = search_regs.end[sub] - ZV;
2663   else
2664     opoint = PT;
2665
2666   /* If we want non-literal replacement,
2667      perform substitution on the replacement string.  */
2668   if (NILP (literal))
2669     {
2670       int length = SBYTES (newtext);
2671       unsigned char *substed;
2672       int substed_alloc_size, substed_len;
2673       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2674       int str_multibyte = STRING_MULTIBYTE (newtext);
2675       Lisp_Object rev_tbl;
2676       int really_changed = 0;
2677
2678       rev_tbl = Qnil;
2679
2680       substed_alloc_size = length * 2 + 100;
2681       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2682       substed_len = 0;
2683
2684       /* Go thru NEWTEXT, producing the actual text to insert in
2685          SUBSTED while adjusting multibyteness to that of the current
2686          buffer.  */
2687
2688       for (pos_byte = 0, pos = 0; pos_byte < length;)
2689         {
2690           unsigned char str[MAX_MULTIBYTE_LENGTH];
2691           unsigned char *add_stuff = NULL;
2692           int add_len = 0;
2693           int idx = -1;
2694
2695           if (str_multibyte)
2696             {
2697               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2698               if (!buf_multibyte)
2699                 c = multibyte_char_to_unibyte (c, rev_tbl);
2700             }
2701           else
2702             {
2703               /* Note that we don't have to increment POS.  */
2704               c = SREF (newtext, pos_byte++);
2705               if (buf_multibyte)
2706                 MAKE_CHAR_MULTIBYTE (c);
2707             }
2708
2709           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2710              or set IDX to a match index, which means put that part
2711              of the buffer text into SUBSTED.  */
2712
2713           if (c == '\\')
2714             {
2715               really_changed = 1;
2716
2717               if (str_multibyte)
2718                 {
2719                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2720                                                       pos, pos_byte);
2721                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2722                     c = multibyte_char_to_unibyte (c, rev_tbl);
2723                 }
2724               else
2725                 {
2726                   c = SREF (newtext, pos_byte++);
2727                   if (buf_multibyte)
2728                     MAKE_CHAR_MULTIBYTE (c);
2729                 }
2730
2731               if (c == '&')
2732                 idx = sub;
2733               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2734                 {
2735                   if (search_regs.start[c - '0'] >= 1)
2736                     idx = c - '0';
2737                 }
2738               else if (c == '\\')
2739                 add_len = 1, add_stuff = "\\";
2740               else
2741                 {
2742                   xfree (substed);
2743                   error ("Invalid use of `\\' in replacement text");
2744                 }
2745             }
2746           else
2747             {
2748               add_len = CHAR_STRING (c, str);
2749               add_stuff = str;
2750             }
2751
2752           /* If we want to copy part of a previous match,
2753              set up ADD_STUFF and ADD_LEN to point to it.  */
2754           if (idx >= 0)
2755             {
2756               EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2757               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2758               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2759                 move_gap (search_regs.start[idx]);
2760               add_stuff = BYTE_POS_ADDR (begbyte);
2761             }
2762
2763           /* Now the stuff we want to add to SUBSTED
2764              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2765
2766           /* Make sure SUBSTED is big enough.  */
2767           if (substed_len + add_len >= substed_alloc_size)
2768             {
2769               substed_alloc_size = substed_len + add_len + 500;
2770               substed = (unsigned char *) xrealloc (substed,
2771                                                     substed_alloc_size + 1);
2772             }
2773
2774           /* Now add to the end of SUBSTED.  */
2775           if (add_stuff)
2776             {
2777               bcopy (add_stuff, substed + substed_len, add_len);
2778               substed_len += add_len;
2779             }
2780         }
2781
2782       if (really_changed)
2783         {
2784           if (buf_multibyte)
2785             {
2786               int nchars = multibyte_chars_in_text (substed, substed_len);
2787
2788               newtext = make_multibyte_string (substed, nchars, substed_len);
2789             }
2790           else
2791             newtext = make_unibyte_string (substed, substed_len);
2792         }
2793       xfree (substed);
2794     }
2795
2796   /* Replace the old text with the new in the cleanest possible way.  */
2797   replace_range (search_regs.start[sub], search_regs.end[sub],
2798                  newtext, 1, 0, 1);
2799   newpoint = search_regs.start[sub] + SCHARS (newtext);
2800
2801   if (case_action == all_caps)
2802     Fupcase_region (make_number (search_regs.start[sub]),
2803                     make_number (newpoint));
2804   else if (case_action == cap_initial)
2805     Fupcase_initials_region (make_number (search_regs.start[sub]),
2806                              make_number (newpoint));
2807
2808   /* Adjust search data for this change.  */
2809   {
2810     EMACS_INT oldend = search_regs.end[sub];
2811     EMACS_INT oldstart = search_regs.start[sub];
2812     EMACS_INT change = newpoint - search_regs.end[sub];
2813     int i;
2814
2815     for (i = 0; i < search_regs.num_regs; i++)
2816       {
2817         if (search_regs.start[i] >= oldend)
2818           search_regs.start[i] += change;
2819         else if (search_regs.start[i] > oldstart)
2820           search_regs.start[i] = oldstart;
2821         if (search_regs.end[i] >= oldend)
2822           search_regs.end[i] += change;
2823         else if (search_regs.end[i] > oldstart)
2824           search_regs.end[i] = oldstart;
2825       }
2826   }
2827
2828   /* Put point back where it was in the text.  */
2829   if (opoint <= 0)
2830     TEMP_SET_PT (opoint + ZV);
2831   else
2832     TEMP_SET_PT (opoint);
2833
2834   /* Now move point "officially" to the start of the inserted replacement.  */
2835   move_if_not_intangible (newpoint);
2836
2837   return Qnil;
2838 }
2839 \f
2840 static Lisp_Object
2841 match_limit (Lisp_Object num, int beginningp)
2842 {
2843   register int n;
2844
2845   CHECK_NUMBER (num);
2846   n = XINT (num);
2847   if (n < 0)
2848     args_out_of_range (num, make_number (0));
2849   if (search_regs.num_regs <= 0)
2850     error ("No match data, because no search succeeded");
2851   if (n >= search_regs.num_regs
2852       || search_regs.start[n] < 0)
2853     return Qnil;
2854   return (make_number ((beginningp) ? search_regs.start[n]
2855                                     : search_regs.end[n]));
2856 }
2857
2858 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2859        doc: /* Return position of start of text matched by last search.
2860 SUBEXP, a number, specifies which parenthesized expression in the last
2861   regexp.
2862 Value is nil if SUBEXPth pair didn't match, or there were less than
2863   SUBEXP pairs.
2864 Zero means the entire text matched by the whole regexp or whole string.  */)
2865      (subexp)
2866      Lisp_Object subexp;
2867 {
2868   return match_limit (subexp, 1);
2869 }
2870
2871 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2872        doc: /* Return position of end of text matched by last search.
2873 SUBEXP, a number, specifies which parenthesized expression in the last
2874   regexp.
2875 Value is nil if SUBEXPth pair didn't match, or there were less than
2876   SUBEXP pairs.
2877 Zero means the entire text matched by the whole regexp or whole string.  */)
2878      (subexp)
2879      Lisp_Object subexp;
2880 {
2881   return match_limit (subexp, 0);
2882 }
2883
2884 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2885        doc: /* Return a list containing all info on what the last search matched.
2886 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2887 All the elements are markers or nil (nil if the Nth pair didn't match)
2888 if the last match was on a buffer; integers or nil if a string was matched.
2889 Use `set-match-data' to reinstate the data in this list.
2890
2891 If INTEGERS (the optional first argument) is non-nil, always use
2892 integers \(rather than markers) to represent buffer positions.  In
2893 this case, and if the last match was in a buffer, the buffer will get
2894 stored as one additional element at the end of the list.
2895
2896 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2897 enough to hold all the values, and if INTEGERS is non-nil, no consing
2898 is done.
2899
2900 If optional third arg RESEAT is non-nil, any previous markers on the
2901 REUSE list will be modified to point to nowhere.
2902
2903 Return value is undefined if the last search failed.  */)
2904   (integers, reuse, reseat)
2905      Lisp_Object integers, reuse, reseat;
2906 {
2907   Lisp_Object tail, prev;
2908   Lisp_Object *data;
2909   int i, len;
2910
2911   if (!NILP (reseat))
2912     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2913       if (MARKERP (XCAR (tail)))
2914         {
2915           unchain_marker (XMARKER (XCAR (tail)));
2916           XSETCAR (tail, Qnil);
2917         }
2918
2919   if (NILP (last_thing_searched))
2920     return Qnil;
2921
2922   prev = Qnil;
2923
2924   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2925                                  * sizeof (Lisp_Object));
2926
2927   len = 0;
2928   for (i = 0; i < search_regs.num_regs; i++)
2929     {
2930       int start = search_regs.start[i];
2931       if (start >= 0)
2932         {
2933           if (EQ (last_thing_searched, Qt)
2934               || ! NILP (integers))
2935             {
2936               XSETFASTINT (data[2 * i], start);
2937               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2938             }
2939           else if (BUFFERP (last_thing_searched))
2940             {
2941               data[2 * i] = Fmake_marker ();
2942               Fset_marker (data[2 * i],
2943                            make_number (start),
2944                            last_thing_searched);
2945               data[2 * i + 1] = Fmake_marker ();
2946               Fset_marker (data[2 * i + 1],
2947                            make_number (search_regs.end[i]),
2948                            last_thing_searched);
2949             }
2950           else
2951             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2952             abort ();
2953
2954           len = 2 * i + 2;
2955         }
2956       else
2957         data[2 * i] = data[2 * i + 1] = Qnil;
2958     }
2959
2960   if (BUFFERP (last_thing_searched) && !NILP (integers))
2961     {
2962       data[len] = last_thing_searched;
2963       len++;
2964     }
2965
2966   /* If REUSE is not usable, cons up the values and return them.  */
2967   if (! CONSP (reuse))
2968     return Flist (len, data);
2969
2970   /* If REUSE is a list, store as many value elements as will fit
2971      into the elements of REUSE.  */
2972   for (i = 0, tail = reuse; CONSP (tail);
2973        i++, tail = XCDR (tail))
2974     {
2975       if (i < len)
2976         XSETCAR (tail, data[i]);
2977       else
2978         XSETCAR (tail, Qnil);
2979       prev = tail;
2980     }
2981
2982   /* If we couldn't fit all value elements into REUSE,
2983      cons up the rest of them and add them to the end of REUSE.  */
2984   if (i < len)
2985     XSETCDR (prev, Flist (len - i, data + i));
2986
2987   return reuse;
2988 }
2989
2990 /* We used to have an internal use variant of `reseat' described as:
2991
2992       If RESEAT is `evaporate', put the markers back on the free list
2993       immediately.  No other references to the markers must exist in this
2994       case, so it is used only internally on the unwind stack and
2995       save-match-data from Lisp.
2996
2997    But it was ill-conceived: those supposedly-internal markers get exposed via
2998    the undo-list, so freeing them here is unsafe.  */
2999
3000 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
3001        doc: /* Set internal data on last search match from elements of LIST.
3002 LIST should have been created by calling `match-data' previously.
3003
3004 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
3005     (list, reseat)
3006      register Lisp_Object list, reseat;
3007 {
3008   register int i;
3009   register Lisp_Object marker;
3010
3011   if (running_asynch_code)
3012     save_search_regs ();
3013
3014   CHECK_LIST (list);
3015
3016   /* Unless we find a marker with a buffer or an explicit buffer
3017      in LIST, assume that this match data came from a string.  */
3018   last_thing_searched = Qt;
3019
3020   /* Allocate registers if they don't already exist.  */
3021   {
3022     int length = XFASTINT (Flength (list)) / 2;
3023
3024     if (length > search_regs.num_regs)
3025       {
3026         if (search_regs.num_regs == 0)
3027           {
3028             search_regs.start
3029               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3030             search_regs.end
3031               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3032           }
3033         else
3034           {
3035             search_regs.start
3036               = (regoff_t *) xrealloc (search_regs.start,
3037                                        length * sizeof (regoff_t));
3038             search_regs.end
3039               = (regoff_t *) xrealloc (search_regs.end,
3040                                        length * sizeof (regoff_t));
3041           }
3042
3043         for (i = search_regs.num_regs; i < length; i++)
3044           search_regs.start[i] = -1;
3045
3046         search_regs.num_regs = length;
3047       }
3048
3049     for (i = 0; CONSP (list); i++)
3050       {
3051         marker = XCAR (list);
3052         if (BUFFERP (marker))
3053           {
3054             last_thing_searched = marker;
3055             break;
3056           }
3057         if (i >= length)
3058           break;
3059         if (NILP (marker))
3060           {
3061             search_regs.start[i] = -1;
3062             list = XCDR (list);
3063           }
3064         else
3065           {
3066             EMACS_INT from;
3067             Lisp_Object m;
3068
3069             m = marker;
3070             if (MARKERP (marker))
3071               {
3072                 if (XMARKER (marker)->buffer == 0)
3073                   XSETFASTINT (marker, 0);
3074                 else
3075                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3076               }
3077
3078             CHECK_NUMBER_COERCE_MARKER (marker);
3079             from = XINT (marker);
3080
3081             if (!NILP (reseat) && MARKERP (m))
3082               {
3083                 unchain_marker (XMARKER (m));
3084                 XSETCAR (list, Qnil);
3085               }
3086
3087             if ((list = XCDR (list), !CONSP (list)))
3088               break;
3089
3090             m = marker = XCAR (list);
3091
3092             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3093               XSETFASTINT (marker, 0);
3094
3095             CHECK_NUMBER_COERCE_MARKER (marker);
3096             search_regs.start[i] = from;
3097             search_regs.end[i] = XINT (marker);
3098
3099             if (!NILP (reseat) && MARKERP (m))
3100               {
3101                 unchain_marker (XMARKER (m));
3102                 XSETCAR (list, Qnil);
3103               }
3104           }
3105         list = XCDR (list);
3106       }
3107
3108     for (; i < search_regs.num_regs; i++)
3109       search_regs.start[i] = -1;
3110   }
3111
3112   return Qnil;
3113 }
3114
3115 /* If non-zero the match data have been saved in saved_search_regs
3116    during the execution of a sentinel or filter. */
3117 static int search_regs_saved;
3118 static struct re_registers saved_search_regs;
3119 static Lisp_Object saved_last_thing_searched;
3120
3121 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3122    if asynchronous code (filter or sentinel) is running. */
3123 static void
3124 save_search_regs (void)
3125 {
3126   if (!search_regs_saved)
3127     {
3128       saved_search_regs.num_regs = search_regs.num_regs;
3129       saved_search_regs.start = search_regs.start;
3130       saved_search_regs.end = search_regs.end;
3131       saved_last_thing_searched = last_thing_searched;
3132       last_thing_searched = Qnil;
3133       search_regs.num_regs = 0;
3134       search_regs.start = 0;
3135       search_regs.end = 0;
3136
3137       search_regs_saved = 1;
3138     }
3139 }
3140
3141 /* Called upon exit from filters and sentinels. */
3142 void
3143 restore_search_regs (void)
3144 {
3145   if (search_regs_saved)
3146     {
3147       if (search_regs.num_regs > 0)
3148         {
3149           xfree (search_regs.start);
3150           xfree (search_regs.end);
3151         }
3152       search_regs.num_regs = saved_search_regs.num_regs;
3153       search_regs.start = saved_search_regs.start;
3154       search_regs.end = saved_search_regs.end;
3155       last_thing_searched = saved_last_thing_searched;
3156       saved_last_thing_searched = Qnil;
3157       search_regs_saved = 0;
3158     }
3159 }
3160
3161 static Lisp_Object
3162 unwind_set_match_data (Lisp_Object list)
3163 {
3164   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3165   return Fset_match_data (list, Qt);
3166 }
3167
3168 /* Called to unwind protect the match data.  */
3169 void
3170 record_unwind_save_match_data (void)
3171 {
3172   record_unwind_protect (unwind_set_match_data,
3173                          Fmatch_data (Qnil, Qnil, Qnil));
3174 }
3175
3176 /* Quote a string to inactivate reg-expr chars */
3177
3178 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3179        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3180      (string)
3181      Lisp_Object string;
3182 {
3183   register unsigned char *in, *out, *end;
3184   register unsigned char *temp;
3185   int backslashes_added = 0;
3186
3187   CHECK_STRING (string);
3188
3189   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3190
3191   /* Now copy the data into the new string, inserting escapes. */
3192
3193   in = SDATA (string);
3194   end = in + SBYTES (string);
3195   out = temp;
3196
3197   for (; in != end; in++)
3198     {
3199       if (*in == '['
3200           || *in == '*' || *in == '.' || *in == '\\'
3201           || *in == '?' || *in == '+'
3202           || *in == '^' || *in == '$')
3203         *out++ = '\\', backslashes_added++;
3204       *out++ = *in;
3205     }
3206
3207   return make_specified_string (temp,
3208                                 SCHARS (string) + backslashes_added,
3209                                 out - temp,
3210                                 STRING_MULTIBYTE (string));
3211 }
3212 \f
3213 void
3214 syms_of_search (void)
3215 {
3216   register int i;
3217
3218   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3219     {
3220       searchbufs[i].buf.allocated = 100;
3221       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3222       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3223       searchbufs[i].regexp = Qnil;
3224       searchbufs[i].whitespace_regexp = Qnil;
3225       searchbufs[i].syntax_table = Qnil;
3226       staticpro (&searchbufs[i].regexp);
3227       staticpro (&searchbufs[i].whitespace_regexp);
3228       staticpro (&searchbufs[i].syntax_table);
3229       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3230     }
3231   searchbuf_head = &searchbufs[0];
3232
3233   Qsearch_failed = intern_c_string ("search-failed");
3234   staticpro (&Qsearch_failed);
3235   Qinvalid_regexp = intern_c_string ("invalid-regexp");
3236   staticpro (&Qinvalid_regexp);
3237
3238   Fput (Qsearch_failed, Qerror_conditions,
3239         pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
3240   Fput (Qsearch_failed, Qerror_message,
3241         make_pure_c_string ("Search failed"));
3242
3243   Fput (Qinvalid_regexp, Qerror_conditions,
3244         pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
3245   Fput (Qinvalid_regexp, Qerror_message,
3246         make_pure_c_string ("Invalid regexp"));
3247
3248   last_thing_searched = Qnil;
3249   staticpro (&last_thing_searched);
3250
3251   saved_last_thing_searched = Qnil;
3252   staticpro (&saved_last_thing_searched);
3253
3254   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3255       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3256 Some commands use this for user-specified regexps.
3257 Spaces that occur inside character classes or repetition operators
3258 or other such regexp constructs are not replaced with this.
3259 A value of nil (which is the normal value) means treat spaces literally.  */);
3260   Vsearch_spaces_regexp = Qnil;
3261
3262   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3263       doc: /* Internal use only.
3264 If non-nil, the primitive searching and matching functions
3265 such as `looking-at', `string-match', `re-search-forward', etc.,
3266 do not set the match data.  The proper way to use this variable
3267 is to bind it with `let' around a small expression.  */);
3268   Vinhibit_changing_match_data = Qnil;
3269
3270   defsubr (&Slooking_at);
3271   defsubr (&Sposix_looking_at);
3272   defsubr (&Sstring_match);
3273   defsubr (&Sposix_string_match);
3274   defsubr (&Ssearch_forward);
3275   defsubr (&Ssearch_backward);
3276   defsubr (&Sword_search_forward);
3277   defsubr (&Sword_search_backward);
3278   defsubr (&Sword_search_forward_lax);
3279   defsubr (&Sword_search_backward_lax);
3280   defsubr (&Sre_search_forward);
3281   defsubr (&Sre_search_backward);
3282   defsubr (&Sposix_search_forward);
3283   defsubr (&Sposix_search_backward);
3284   defsubr (&Sreplace_match);
3285   defsubr (&Smatch_beginning);
3286   defsubr (&Smatch_end);
3287   defsubr (&Smatch_data);
3288   defsubr (&Sset_match_data);
3289   defsubr (&Sregexp_quote);
3290 }
3291
3292 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3293    (do not change this comment) */