src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 Lisp_Object Vsearch_spaces_regexp;
  95
  96 /* If non-nil, the match data will not be changed during call to
  97    searching or matching functions.  This variable is for internal use
  98    only.  */
  99 Lisp_Object Vinhibit_changing_match_data;
 100
 101 static void set_search_regs (EMACS_INT, EMACS_INT);
 102 static void save_search_regs (void);
 103 static EMACS_INT simple_search (int, unsigned char *, int, int,
 104                                 Lisp_Object, EMACS_INT, EMACS_INT,
 105                                 EMACS_INT, EMACS_INT);
 106 static EMACS_INT boyer_moore (int, unsigned char *, int, int,
 107                               Lisp_Object, Lisp_Object,
 108                               EMACS_INT, EMACS_INT,
 109                               EMACS_INT, EMACS_INT, int);
 110 static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
 111                                 EMACS_INT, EMACS_INT, int, int,
 112                                 Lisp_Object, Lisp_Object, int);
 113 static void matcher_overflow (void) NO_RETURN;
 114
 115 static void
 116 matcher_overflow (void)
 117 {
 118   error ("Stack overflow in regexp matcher");
 119 }
 120
 121 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 122    PATTERN is the pattern to compile.
 123    CP is the place to put the result.
 124    TRANSLATE is a translation table for ignoring case, or nil for none.
 125    REGP is the structure that says where to store the "register"
 126    values that will result from matching this pattern.
 127    If it is 0, we should compile the pattern not to record any
 128    subexpression bounds.
 129    POSIX is nonzero if we want full backtracking (POSIX style)
 130    for this pattern.  0 means backtrack only enough to get a valid match.
 131
 132    The behavior also depends on Vsearch_spaces_regexp.  */
 133
 134 static void
 135 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
 136 {
 137   char *val;
 138   reg_syntax_t old;
 139
 140   cp->regexp = Qnil;
 141   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 142   cp->posix = posix;
 143   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 144   cp->buf.charset_unibyte = charset_unibyte;
 145   if (STRINGP (Vsearch_spaces_regexp))
 146     cp->whitespace_regexp = Vsearch_spaces_regexp;
 147   else
 148     cp->whitespace_regexp = Qnil;
 149
 150   /* rms: I think BLOCK_INPUT is not needed here any more,
 151      because regex.c defines malloc to call xmalloc.
 152      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 153      So let's turn it off.  */
 154   /*  BLOCK_INPUT;  */
 155   old = re_set_syntax (RE_SYNTAX_EMACS
 156                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 157
 158   if (STRINGP (Vsearch_spaces_regexp))
 159     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 160   else
 161     re_set_whitespace_regexp (NULL);
 162
 163   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 164                                      SBYTES (pattern), &cp->buf);
 165
 166   /* If the compiled pattern hard codes some of the contents of the
 167      syntax-table, it can only be reused with *this* syntax table.  */
 168   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 169
 170   re_set_whitespace_regexp (NULL);
 171
 172   re_set_syntax (old);
 173   /* UNBLOCK_INPUT;  */
 174   if (val)
 175     xsignal1 (Qinvalid_regexp, build_string (val));
 176
 177   cp->regexp = Fcopy_sequence (pattern);
 178 }
 179
 180 /* Shrink each compiled regexp buffer in the cache
 181    to the size actually used right now.
 182    This is called from garbage collection.  */
 183
 184 void
 185 shrink_regexp_cache (void)
 186 {
 187   struct regexp_cache *cp;
 188
 189   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 190     {
 191       cp->buf.allocated = cp->buf.used;
 192       cp->buf.buffer
 193         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 194     }
 195 }
 196
 197 /* Clear the regexp cache w.r.t. a particular syntax table,
 198    because it was changed.
 199    There is no danger of memory leak here because re_compile_pattern
 200    automagically manages the memory in each re_pattern_buffer struct,
 201    based on its `allocated' and `buffer' values.  */
 202 void
 203 clear_regexp_cache (void)
 204 {
 205   int i;
 206
 207   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 208     /* It's tempting to compare with the syntax-table we've actually changed,
 209        but it's not sufficient because char-table inheritance means that
 210        modifying one syntax-table can change others at the same time.  */
 211     if (!EQ (searchbufs[i].syntax_table, Qt))
 212       searchbufs[i].regexp = Qnil;
 213 }
 214
 215 /* Compile a regexp if necessary, but first check to see if there's one in
 216    the cache.
 217    PATTERN is the pattern to compile.
 218    TRANSLATE is a translation table for ignoring case, or nil for none.
 219    REGP is the structure that says where to store the "register"
 220    values that will result from matching this pattern.
 221    If it is 0, we should compile the pattern not to record any
 222    subexpression bounds.
 223    POSIX is nonzero if we want full backtracking (POSIX style)
 224    for this pattern.  0 means backtrack only enough to get a valid match.  */
 225
 226 struct re_pattern_buffer *
 227 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
 228 {
 229   struct regexp_cache *cp, **cpp;
 230
 231   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 232     {
 233       cp = *cpp;
 234       /* Entries are initialized to nil, and may be set to nil by
 235          compile_pattern_1 if the pattern isn't valid.  Don't apply
 236          string accessors in those cases.  However, compile_pattern_1
 237          is only applied to the cache entry we pick here to reuse.  So
 238          nil should never appear before a non-nil entry.  */
 239       if (NILP (cp->regexp))
 240         goto compile_it;
 241       if (SCHARS (cp->regexp) == SCHARS (pattern)
 242           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 243           && !NILP (Fstring_equal (cp->regexp, pattern))
 244           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 245           && cp->posix == posix
 246           && (EQ (cp->syntax_table, Qt)
 247               || EQ (cp->syntax_table, current_buffer->syntax_table))
 248           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 249           && cp->buf.charset_unibyte == charset_unibyte)
 250         break;
 251
 252       /* If we're at the end of the cache, compile into the nil cell
 253          we found, or the last (least recently used) cell with a
 254          string value.  */
 255       if (cp->next == 0)
 256         {
 257         compile_it:
 258           compile_pattern_1 (cp, pattern, translate, regp, posix);
 259           break;
 260         }
 261     }
 262
 263   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 264      either because we found it in the cache or because we just compiled it.
 265      Move it to the front of the queue to mark it as most recently used.  */
 266   *cpp = cp->next;
 267   cp->next = searchbuf_head;
 268   searchbuf_head = cp;
 269
 270   /* Advise the searching functions about the space we have allocated
 271      for register data.  */
 272   if (regp)
 273     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 274
 275   /* The compiled pattern can be used both for multibyte and unibyte
 276      target.  But, we have to tell which the pattern is used for. */
 277   cp->buf.target_multibyte = multibyte;
 278
 279   return &cp->buf;
 280 }
 281
 282 \f
 283 static Lisp_Object
 284 looking_at_1 (Lisp_Object string, int posix)
 285 {
 286   Lisp_Object val;
 287   unsigned char *p1, *p2;
 288   EMACS_INT s1, s2;
 289   register int i;
 290   struct re_pattern_buffer *bufp;
 291
 292   if (running_asynch_code)
 293     save_search_regs ();
 294
 295   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 296   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 297     = current_buffer->case_eqv_table;
 298
 299   CHECK_STRING (string);
 300   bufp = compile_pattern (string,
 301                           (NILP (Vinhibit_changing_match_data)
 302                            ? &search_regs : NULL),
 303                           (!NILP (current_buffer->case_fold_search)
 304                            ? current_buffer->case_canon_table : Qnil),
 305                           posix,
 306                           !NILP (current_buffer->enable_multibyte_characters));
 307
 308   immediate_quit = 1;
 309   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 310
 311   /* Get pointers and sizes of the two strings
 312      that make up the visible portion of the buffer. */
 313
 314   p1 = BEGV_ADDR;
 315   s1 = GPT_BYTE - BEGV_BYTE;
 316   p2 = GAP_END_ADDR;
 317   s2 = ZV_BYTE - GPT_BYTE;
 318   if (s1 < 0)
 319     {
 320       p2 = p1;
 321       s2 = ZV_BYTE - BEGV_BYTE;
 322       s1 = 0;
 323     }
 324   if (s2 < 0)
 325     {
 326       s1 = ZV_BYTE - BEGV_BYTE;
 327       s2 = 0;
 328     }
 329
 330   re_match_object = Qnil;
 331
 332   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 333                   PT_BYTE - BEGV_BYTE,
 334                   (NILP (Vinhibit_changing_match_data)
 335                    ? &search_regs : NULL),
 336                   ZV_BYTE - BEGV_BYTE);
 337   immediate_quit = 0;
 338
 339   if (i == -2)
 340     matcher_overflow ();
 341
 342   val = (0 <= i ? Qt : Qnil);
 343   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 344     for (i = 0; i < search_regs.num_regs; i++)
 345       if (search_regs.start[i] >= 0)
 346         {
 347           search_regs.start[i]
 348             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 349           search_regs.end[i]
 350             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 351         }
 352
 353   /* Set last_thing_searched only when match data is changed.  */
 354   if (NILP (Vinhibit_changing_match_data))
 355     XSETBUFFER (last_thing_searched, current_buffer);
 356
 357   return val;
 358 }
 359
 360 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 361        doc: /* Return t if text after point matches regular expression REGEXP.
 362 This function modifies the match data that `match-beginning',
 363 `match-end' and `match-data' access; save and restore the match
 364 data if you want to preserve them.  */)
 365      (regexp)
 366      Lisp_Object regexp;
 367 {
 368   return looking_at_1 (regexp, 0);
 369 }
 370
 371 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 372        doc: /* Return t if text after point matches regular expression REGEXP.
 373 Find the longest match, in accord with Posix regular expression rules.
 374 This function modifies the match data that `match-beginning',
 375 `match-end' and `match-data' access; save and restore the match
 376 data if you want to preserve them.  */)
 377      (regexp)
 378      Lisp_Object regexp;
 379 {
 380   return looking_at_1 (regexp, 1);
 381 }
 382 \f
 383 static Lisp_Object
 384 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
 385 {
 386   int val;
 387   struct re_pattern_buffer *bufp;
 388   EMACS_INT pos, pos_byte;
 389   int i;
 390
 391   if (running_asynch_code)
 392     save_search_regs ();
 393
 394   CHECK_STRING (regexp);
 395   CHECK_STRING (string);
 396
 397   if (NILP (start))
 398     pos = 0, pos_byte = 0;
 399   else
 400     {
 401       int len = SCHARS (string);
 402
 403       CHECK_NUMBER (start);
 404       pos = XINT (start);
 405       if (pos < 0 && -pos <= len)
 406         pos = len + pos;
 407       else if (0 > pos || pos > len)
 408         args_out_of_range (string, start);
 409       pos_byte = string_char_to_byte (string, pos);
 410     }
 411
 412   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 413   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 414     = current_buffer->case_eqv_table;
 415
 416   bufp = compile_pattern (regexp,
 417                           (NILP (Vinhibit_changing_match_data)
 418                            ? &search_regs : NULL),
 419                           (!NILP (current_buffer->case_fold_search)
 420                            ? current_buffer->case_canon_table : Qnil),
 421                           posix,
 422                           STRING_MULTIBYTE (string));
 423   immediate_quit = 1;
 424   re_match_object = string;
 425
 426   val = re_search (bufp, (char *) SDATA (string),
 427                    SBYTES (string), pos_byte,
 428                    SBYTES (string) - pos_byte,
 429                    (NILP (Vinhibit_changing_match_data)
 430                     ? &search_regs : NULL));
 431   immediate_quit = 0;
 432
 433   /* Set last_thing_searched only when match data is changed.  */
 434   if (NILP (Vinhibit_changing_match_data))
 435     last_thing_searched = Qt;
 436
 437   if (val == -2)
 438     matcher_overflow ();
 439   if (val < 0) return Qnil;
 440
 441   if (NILP (Vinhibit_changing_match_data))
 442     for (i = 0; i < search_regs.num_regs; i++)
 443       if (search_regs.start[i] >= 0)
 444         {
 445           search_regs.start[i]
 446             = string_byte_to_char (string, search_regs.start[i]);
 447           search_regs.end[i]
 448             = string_byte_to_char (string, search_regs.end[i]);
 449         }
 450
 451   return make_number (string_byte_to_char (string, val));
 452 }
 453
 454 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 455        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 456 Matching ignores case if `case-fold-search' is non-nil.
 457 If third arg START is non-nil, start search at that index in STRING.
 458 For index of first char beyond the match, do (match-end 0).
 459 `match-end' and `match-beginning' also give indices of substrings
 460 matched by parenthesis constructs in the pattern.
 461
 462 You can use the function `match-string' to extract the substrings
 463 matched by the parenthesis constructions in REGEXP. */)
 464      (regexp, string, start)
 465      Lisp_Object regexp, string, start;
 466 {
 467   return string_match_1 (regexp, string, start, 0);
 468 }
 469
 470 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 471        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 472 Find the longest match, in accord with Posix regular expression rules.
 473 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 474 If third arg START is non-nil, start search at that index in STRING.
 475 For index of first char beyond the match, do (match-end 0).
 476 `match-end' and `match-beginning' also give indices of substrings
 477 matched by parenthesis constructs in the pattern.  */)
 478      (regexp, string, start)
 479      Lisp_Object regexp, string, start;
 480 {
 481   return string_match_1 (regexp, string, start, 1);
 482 }
 483
 484 /* Match REGEXP against STRING, searching all of STRING,
 485    and return the index of the match, or negative on failure.
 486    This does not clobber the match data.  */
 487
 488 int
 489 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 490 {
 491   int val;
 492   struct re_pattern_buffer *bufp;
 493
 494   bufp = compile_pattern (regexp, 0, Qnil,
 495                           0, STRING_MULTIBYTE (string));
 496   immediate_quit = 1;
 497   re_match_object = string;
 498
 499   val = re_search (bufp, (char *) SDATA (string),
 500                    SBYTES (string), 0,
 501                    SBYTES (string), 0);
 502   immediate_quit = 0;
 503   return val;
 504 }
 505
 506 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 507    and return the index of the match, or negative on failure.
 508    This does not clobber the match data.
 509    We assume that STRING contains single-byte characters.  */
 510
 511 extern Lisp_Object Vascii_downcase_table;
 512
 513 int
 514 fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
 515 {
 516   int val;
 517   struct re_pattern_buffer *bufp;
 518   int len = strlen (string);
 519
 520   regexp = string_make_unibyte (regexp);
 521   re_match_object = Qt;
 522   bufp = compile_pattern (regexp, 0,
 523                           Vascii_canon_table, 0,
 524                           0);
 525   immediate_quit = 1;
 526   val = re_search (bufp, string, len, 0, len, 0);
 527   immediate_quit = 0;
 528   return val;
 529 }
 530
 531 /* Like fast_string_match but ignore case.  */
 532
 533 int
 534 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 535 {
 536   int val;
 537   struct re_pattern_buffer *bufp;
 538
 539   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 540                           0, STRING_MULTIBYTE (string));
 541   immediate_quit = 1;
 542   re_match_object = string;
 543
 544   val = re_search (bufp, (char *) SDATA (string),
 545                    SBYTES (string), 0,
 546                    SBYTES (string), 0);
 547   immediate_quit = 0;
 548   return val;
 549 }
 550 \f
 551 /* Match REGEXP against the characters after POS to LIMIT, and return
 552    the number of matched characters.  If STRING is non-nil, match
 553    against the characters in it.  In that case, POS and LIMIT are
 554    indices into the string.  This function doesn't modify the match
 555    data.  */
 556
 557 EMACS_INT
 558 fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
 559 {
 560   int multibyte;
 561   struct re_pattern_buffer *buf;
 562   unsigned char *p1, *p2;
 563   EMACS_INT s1, s2;
 564   EMACS_INT len;
 565
 566   if (STRINGP (string))
 567     {
 568       if (pos_byte < 0)
 569         pos_byte = string_char_to_byte (string, pos);
 570       if (limit_byte < 0)
 571         limit_byte = string_char_to_byte (string, limit);
 572       p1 = NULL;
 573       s1 = 0;
 574       p2 = SDATA (string);
 575       s2 = SBYTES (string);
 576       re_match_object = string;
 577       multibyte = STRING_MULTIBYTE (string);
 578     }
 579   else
 580     {
 581       if (pos_byte < 0)
 582         pos_byte = CHAR_TO_BYTE (pos);
 583       if (limit_byte < 0)
 584         limit_byte = CHAR_TO_BYTE (limit);
 585       pos_byte -= BEGV_BYTE;
 586       limit_byte -= BEGV_BYTE;
 587       p1 = BEGV_ADDR;
 588       s1 = GPT_BYTE - BEGV_BYTE;
 589       p2 = GAP_END_ADDR;
 590       s2 = ZV_BYTE - GPT_BYTE;
 591       if (s1 < 0)
 592         {
 593           p2 = p1;
 594           s2 = ZV_BYTE - BEGV_BYTE;
 595           s1 = 0;
 596         }
 597       if (s2 < 0)
 598         {
 599           s1 = ZV_BYTE - BEGV_BYTE;
 600           s2 = 0;
 601         }
 602       re_match_object = Qnil;
 603       multibyte = ! NILP (current_buffer->enable_multibyte_characters);
 604     }
 605
 606   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 607   immediate_quit = 1;
 608   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 609                     pos_byte, NULL, limit_byte);
 610   immediate_quit = 0;
 611
 612   return len;
 613 }
 614
 615 \f
 616 /* The newline cache: remembering which sections of text have no newlines.  */
 617
 618 /* If the user has requested newline caching, make sure it's on.
 619    Otherwise, make sure it's off.
 620    This is our cheezy way of associating an action with the change of
 621    state of a buffer-local variable.  */
 622 static void
 623 newline_cache_on_off (struct buffer *buf)
 624 {
 625   if (NILP (buf->cache_long_line_scans))
 626     {
 627       /* It should be off.  */
 628       if (buf->newline_cache)
 629         {
 630           free_region_cache (buf->newline_cache);
 631           buf->newline_cache = 0;
 632         }
 633     }
 634   else
 635     {
 636       /* It should be on.  */
 637       if (buf->newline_cache == 0)
 638         buf->newline_cache = new_region_cache ();
 639     }
 640 }
 641
 642 \f
 643 /* Search for COUNT instances of the character TARGET between START and END.
 644
 645    If COUNT is positive, search forwards; END must be >= START.
 646    If COUNT is negative, search backwards for the -COUNTth instance;
 647       END must be <= START.
 648    If COUNT is zero, do anything you please; run rogue, for all I care.
 649
 650    If END is zero, use BEGV or ZV instead, as appropriate for the
 651    direction indicated by COUNT.
 652
 653    If we find COUNT instances, set *SHORTAGE to zero, and return the
 654    position past the COUNTth match.  Note that for reverse motion
 655    this is not the same as the usual convention for Emacs motion commands.
 656
 657    If we don't find COUNT instances before reaching END, set *SHORTAGE
 658    to the number of TARGETs left unfound, and return END.
 659
 660    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 661    except when inside redisplay.  */
 662
 663 int
 664 scan_buffer (register int target, EMACS_INT start, EMACS_INT end, int count, int *shortage, int allow_quit)
 665 {
 666   struct region_cache *newline_cache;
 667   int direction;
 668
 669   if (count > 0)
 670     {
 671       direction = 1;
 672       if (! end) end = ZV;
 673     }
 674   else
 675     {
 676       direction = -1;
 677       if (! end) end = BEGV;
 678     }
 679
 680   newline_cache_on_off (current_buffer);
 681   newline_cache = current_buffer->newline_cache;
 682
 683   if (shortage != 0)
 684     *shortage = 0;
 685
 686   immediate_quit = allow_quit;
 687
 688   if (count > 0)
 689     while (start != end)
 690       {
 691         /* Our innermost scanning loop is very simple; it doesn't know
 692            about gaps, buffer ends, or the newline cache.  ceiling is
 693            the position of the last character before the next such
 694            obstacle --- the last character the dumb search loop should
 695            examine.  */
 696         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
 697         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 698         EMACS_INT tem;
 699
 700         /* If we're looking for a newline, consult the newline cache
 701            to see where we can avoid some scanning.  */
 702         if (target == '\n' && newline_cache)
 703           {
 704             int next_change;
 705             immediate_quit = 0;
 706             while (region_cache_forward
 707                    (current_buffer, newline_cache, start_byte, &next_change))
 708               start_byte = next_change;
 709             immediate_quit = allow_quit;
 710
 711             /* START should never be after END.  */
 712             if (start_byte > ceiling_byte)
 713               start_byte = ceiling_byte;
 714
 715             /* Now the text after start is an unknown region, and
 716                next_change is the position of the next known region. */
 717             ceiling_byte = min (next_change - 1, ceiling_byte);
 718           }
 719
 720         /* The dumb loop can only scan text stored in contiguous
 721            bytes. BUFFER_CEILING_OF returns the last character
 722            position that is contiguous, so the ceiling is the
 723            position after that.  */
 724         tem = BUFFER_CEILING_OF (start_byte);
 725         ceiling_byte = min (tem, ceiling_byte);
 726
 727         {
 728           /* The termination address of the dumb loop.  */
 729           register unsigned char *ceiling_addr
 730             = BYTE_POS_ADDR (ceiling_byte) + 1;
 731           register unsigned char *cursor
 732             = BYTE_POS_ADDR (start_byte);
 733           unsigned char *base = cursor;
 734
 735           while (cursor < ceiling_addr)
 736             {
 737               unsigned char *scan_start = cursor;
 738
 739               /* The dumb loop.  */
 740               while (*cursor != target && ++cursor < ceiling_addr)
 741                 ;
 742
 743               /* If we're looking for newlines, cache the fact that
 744                  the region from start to cursor is free of them. */
 745               if (target == '\n' && newline_cache)
 746                 know_region_cache (current_buffer, newline_cache,
 747                                    start_byte + scan_start - base,
 748                                    start_byte + cursor - base);
 749
 750               /* Did we find the target character?  */
 751               if (cursor < ceiling_addr)
 752                 {
 753                   if (--count == 0)
 754                     {
 755                       immediate_quit = 0;
 756                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 757                     }
 758                   cursor++;
 759                 }
 760             }
 761
 762           start = BYTE_TO_CHAR (start_byte + cursor - base);
 763         }
 764       }
 765   else
 766     while (start > end)
 767       {
 768         /* The last character to check before the next obstacle.  */
 769         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
 770         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 771         EMACS_INT tem;
 772
 773         /* Consult the newline cache, if appropriate.  */
 774         if (target == '\n' && newline_cache)
 775           {
 776             int next_change;
 777             immediate_quit = 0;
 778             while (region_cache_backward
 779                    (current_buffer, newline_cache, start_byte, &next_change))
 780               start_byte = next_change;
 781             immediate_quit = allow_quit;
 782
 783             /* Start should never be at or before end.  */
 784             if (start_byte <= ceiling_byte)
 785               start_byte = ceiling_byte + 1;
 786
 787             /* Now the text before start is an unknown region, and
 788                next_change is the position of the next known region. */
 789             ceiling_byte = max (next_change, ceiling_byte);
 790           }
 791
 792         /* Stop scanning before the gap.  */
 793         tem = BUFFER_FLOOR_OF (start_byte - 1);
 794         ceiling_byte = max (tem, ceiling_byte);
 795
 796         {
 797           /* The termination address of the dumb loop.  */
 798           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 799           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 800           unsigned char *base = cursor;
 801
 802           while (cursor >= ceiling_addr)
 803             {
 804               unsigned char *scan_start = cursor;
 805
 806               while (*cursor != target && --cursor >= ceiling_addr)
 807                 ;
 808
 809               /* If we're looking for newlines, cache the fact that
 810                  the region from after the cursor to start is free of them.  */
 811               if (target == '\n' && newline_cache)
 812                 know_region_cache (current_buffer, newline_cache,
 813                                    start_byte + cursor - base,
 814                                    start_byte + scan_start - base);
 815
 816               /* Did we find the target character?  */
 817               if (cursor >= ceiling_addr)
 818                 {
 819                   if (++count >= 0)
 820                     {
 821                       immediate_quit = 0;
 822                       return BYTE_TO_CHAR (start_byte + cursor - base);
 823                     }
 824                   cursor--;
 825                 }
 826             }
 827
 828           start = BYTE_TO_CHAR (start_byte + cursor - base);
 829         }
 830       }
 831
 832   immediate_quit = 0;
 833   if (shortage != 0)
 834     *shortage = count * direction;
 835   return start;
 836 }
 837 \f
 838 /* Search for COUNT instances of a line boundary, which means either a
 839    newline or (if selective display enabled) a carriage return.
 840    Start at START.  If COUNT is negative, search backwards.
 841
 842    We report the resulting position by calling TEMP_SET_PT_BOTH.
 843
 844    If we find COUNT instances. we position after (always after,
 845    even if scanning backwards) the COUNTth match, and return 0.
 846
 847    If we don't find COUNT instances before reaching the end of the
 848    buffer (or the beginning, if scanning backwards), we return
 849    the number of line boundaries left unfound, and position at
 850    the limit we bumped up against.
 851
 852    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 853    except in special cases.  */
 854
 855 int
 856 scan_newline (EMACS_INT start, EMACS_INT start_byte, EMACS_INT limit, EMACS_INT limit_byte, register int count, int allow_quit)
 857 {
 858   int direction = ((count > 0) ? 1 : -1);
 859
 860   register unsigned char *cursor;
 861   unsigned char *base;
 862
 863   EMACS_INT ceiling;
 864   register unsigned char *ceiling_addr;
 865
 866   int old_immediate_quit = immediate_quit;
 867
 868   /* The code that follows is like scan_buffer
 869      but checks for either newline or carriage return.  */
 870
 871   if (allow_quit)
 872     immediate_quit++;
 873
 874   start_byte = CHAR_TO_BYTE (start);
 875
 876   if (count > 0)
 877     {
 878       while (start_byte < limit_byte)
 879         {
 880           ceiling =  BUFFER_CEILING_OF (start_byte);
 881           ceiling = min (limit_byte - 1, ceiling);
 882           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 883           base = (cursor = BYTE_POS_ADDR (start_byte));
 884           while (1)
 885             {
 886               while (*cursor != '\n' && ++cursor != ceiling_addr)
 887                 ;
 888
 889               if (cursor != ceiling_addr)
 890                 {
 891                   if (--count == 0)
 892                     {
 893                       immediate_quit = old_immediate_quit;
 894                       start_byte = start_byte + cursor - base + 1;
 895                       start = BYTE_TO_CHAR (start_byte);
 896                       TEMP_SET_PT_BOTH (start, start_byte);
 897                       return 0;
 898                     }
 899                   else
 900                     if (++cursor == ceiling_addr)
 901                       break;
 902                 }
 903               else
 904                 break;
 905             }
 906           start_byte += cursor - base;
 907         }
 908     }
 909   else
 910     {
 911       while (start_byte > limit_byte)
 912         {
 913           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 914           ceiling = max (limit_byte, ceiling);
 915           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 916           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 917           while (1)
 918             {
 919               while (--cursor != ceiling_addr && *cursor != '\n')
 920                 ;
 921
 922               if (cursor != ceiling_addr)
 923                 {
 924                   if (++count == 0)
 925                     {
 926                       immediate_quit = old_immediate_quit;
 927                       /* Return the position AFTER the match we found.  */
 928                       start_byte = start_byte + cursor - base + 1;
 929                       start = BYTE_TO_CHAR (start_byte);
 930                       TEMP_SET_PT_BOTH (start, start_byte);
 931                       return 0;
 932                     }
 933                 }
 934               else
 935                 break;
 936             }
 937           /* Here we add 1 to compensate for the last decrement
 938              of CURSOR, which took it past the valid range.  */
 939           start_byte += cursor - base + 1;
 940         }
 941     }
 942
 943   TEMP_SET_PT_BOTH (limit, limit_byte);
 944   immediate_quit = old_immediate_quit;
 945
 946   return count * direction;
 947 }
 948
 949 int
 950 find_next_newline_no_quit (EMACS_INT from, int cnt)
 951 {
 952   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 953 }
 954
 955 /* Like find_next_newline, but returns position before the newline,
 956    not after, and only search up to TO.  This isn't just
 957    find_next_newline (...)-1, because you might hit TO.  */
 958
 959 int
 960 find_before_next_newline (EMACS_INT from, EMACS_INT to, int cnt)
 961 {
 962   int shortage;
 963   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 964
 965   if (shortage == 0)
 966     pos--;
 967
 968   return pos;
 969 }
 970 \f
 971 /* Subroutines of Lisp buffer search functions. */
 972
 973 static Lisp_Object
 974 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count, int direction, int RE, int posix)
 975 {
 976   register int np;
 977   int lim, lim_byte;
 978   int n = direction;
 979
 980   if (!NILP (count))
 981     {
 982       CHECK_NUMBER (count);
 983       n *= XINT (count);
 984     }
 985
 986   CHECK_STRING (string);
 987   if (NILP (bound))
 988     {
 989       if (n > 0)
 990         lim = ZV, lim_byte = ZV_BYTE;
 991       else
 992         lim = BEGV, lim_byte = BEGV_BYTE;
 993     }
 994   else
 995     {
 996       CHECK_NUMBER_COERCE_MARKER (bound);
 997       lim = XINT (bound);
 998       if (n > 0 ? lim < PT : lim > PT)
 999         error ("Invalid search bound (wrong side of point)");
1000       if (lim > ZV)
1001         lim = ZV, lim_byte = ZV_BYTE;
1002       else if (lim < BEGV)
1003         lim = BEGV, lim_byte = BEGV_BYTE;
1004       else
1005         lim_byte = CHAR_TO_BYTE (lim);
1006     }
1007
1008   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1009   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1010     = current_buffer->case_eqv_table;
1011
1012   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1013                       (!NILP (current_buffer->case_fold_search)
1014                        ? current_buffer->case_canon_table
1015                        : Qnil),
1016                       (!NILP (current_buffer->case_fold_search)
1017                        ? current_buffer->case_eqv_table
1018                        : Qnil),
1019                       posix);
1020   if (np <= 0)
1021     {
1022       if (NILP (noerror))
1023         xsignal1 (Qsearch_failed, string);
1024
1025       if (!EQ (noerror, Qt))
1026         {
1027           if (lim < BEGV || lim > ZV)
1028             abort ();
1029           SET_PT_BOTH (lim, lim_byte);
1030           return Qnil;
1031 #if 0 /* This would be clean, but maybe programs depend on
1032          a value of nil here.  */
1033           np = lim;
1034 #endif
1035         }
1036       else
1037         return Qnil;
1038     }
1039
1040   if (np < BEGV || np > ZV)
1041     abort ();
1042
1043   SET_PT (np);
1044
1045   return make_number (np);
1046 }
1047 \f
1048 /* Return 1 if REGEXP it matches just one constant string.  */
1049
1050 static int
1051 trivial_regexp_p (Lisp_Object regexp)
1052 {
1053   int len = SBYTES (regexp);
1054   unsigned char *s = SDATA (regexp);
1055   while (--len >= 0)
1056     {
1057       switch (*s++)
1058         {
1059         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1060           return 0;
1061         case '\\':
1062           if (--len < 0)
1063             return 0;
1064           switch (*s++)
1065             {
1066             case '|': case '(': case ')': case '`': case '\'': case 'b':
1067             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1068             case 'S': case '=': case '{': case '}': case '_':
1069             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1070             case '1': case '2': case '3': case '4': case '5':
1071             case '6': case '7': case '8': case '9':
1072               return 0;
1073             }
1074         }
1075     }
1076   return 1;
1077 }
1078
1079 /* Search for the n'th occurrence of STRING in the current buffer,
1080    starting at position POS and stopping at position LIM,
1081    treating STRING as a literal string if RE is false or as
1082    a regular expression if RE is true.
1083
1084    If N is positive, searching is forward and LIM must be greater than POS.
1085    If N is negative, searching is backward and LIM must be less than POS.
1086
1087    Returns -x if x occurrences remain to be found (x > 0),
1088    or else the position at the beginning of the Nth occurrence
1089    (if searching backward) or the end (if searching forward).
1090
1091    POSIX is nonzero if we want full backtracking (POSIX style)
1092    for this pattern.  0 means backtrack only enough to get a valid match.  */
1093
1094 #define TRANSLATE(out, trt, d)                  \
1095 do                                              \
1096   {                                             \
1097     if (! NILP (trt))                           \
1098       {                                         \
1099         Lisp_Object temp;                       \
1100         temp = Faref (trt, make_number (d));    \
1101         if (INTEGERP (temp))                    \
1102           out = XINT (temp);                    \
1103         else                                    \
1104           out = d;                              \
1105       }                                         \
1106     else                                        \
1107       out = d;                                  \
1108   }                                             \
1109 while (0)
1110
1111 /* Only used in search_buffer, to record the end position of the match
1112    when searching regexps and SEARCH_REGS should not be changed
1113    (i.e. Vinhibit_changing_match_data is non-nil).  */
1114 static struct re_registers search_regs_1;
1115
1116 static EMACS_INT
1117 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1118                RE, trt, inverse_trt, posix)
1119      Lisp_Object string;
1120      EMACS_INT pos;
1121      EMACS_INT pos_byte;
1122      EMACS_INT lim;
1123      EMACS_INT lim_byte;
1124      int n;
1125      int RE;
1126      Lisp_Object trt;
1127      Lisp_Object inverse_trt;
1128      int posix;
1129 {
1130   int len = SCHARS (string);
1131   int len_byte = SBYTES (string);
1132   register int i;
1133
1134   if (running_asynch_code)
1135     save_search_regs ();
1136
1137   /* Searching 0 times means don't move.  */
1138   /* Null string is found at starting position.  */
1139   if (len == 0 || n == 0)
1140     {
1141       set_search_regs (pos_byte, 0);
1142       return pos;
1143     }
1144
1145   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1146     {
1147       unsigned char *p1, *p2;
1148       int s1, s2;
1149       struct re_pattern_buffer *bufp;
1150
1151       bufp = compile_pattern (string,
1152                               (NILP (Vinhibit_changing_match_data)
1153                                ? &search_regs : &search_regs_1),
1154                               trt, posix,
1155                               !NILP (current_buffer->enable_multibyte_characters));
1156
1157       immediate_quit = 1;       /* Quit immediately if user types ^G,
1158                                    because letting this function finish
1159                                    can take too long. */
1160       QUIT;                     /* Do a pending quit right away,
1161                                    to avoid paradoxical behavior */
1162       /* Get pointers and sizes of the two strings
1163          that make up the visible portion of the buffer. */
1164
1165       p1 = BEGV_ADDR;
1166       s1 = GPT_BYTE - BEGV_BYTE;
1167       p2 = GAP_END_ADDR;
1168       s2 = ZV_BYTE - GPT_BYTE;
1169       if (s1 < 0)
1170         {
1171           p2 = p1;
1172           s2 = ZV_BYTE - BEGV_BYTE;
1173           s1 = 0;
1174         }
1175       if (s2 < 0)
1176         {
1177           s1 = ZV_BYTE - BEGV_BYTE;
1178           s2 = 0;
1179         }
1180       re_match_object = Qnil;
1181
1182       while (n < 0)
1183         {
1184           int val;
1185           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1186                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1187                              (NILP (Vinhibit_changing_match_data)
1188                               ? &search_regs : &search_regs_1),
1189                              /* Don't allow match past current point */
1190                              pos_byte - BEGV_BYTE);
1191           if (val == -2)
1192             {
1193               matcher_overflow ();
1194             }
1195           if (val >= 0)
1196             {
1197               if (NILP (Vinhibit_changing_match_data))
1198                 {
1199                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1200                   for (i = 0; i < search_regs.num_regs; i++)
1201                     if (search_regs.start[i] >= 0)
1202                       {
1203                         search_regs.start[i]
1204                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1205                         search_regs.end[i]
1206                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1207                       }
1208                   XSETBUFFER (last_thing_searched, current_buffer);
1209                   /* Set pos to the new position. */
1210                   pos = search_regs.start[0];
1211                 }
1212               else
1213                 {
1214                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1215                   /* Set pos to the new position.  */
1216                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1217                 }
1218             }
1219           else
1220             {
1221               immediate_quit = 0;
1222               return (n);
1223             }
1224           n++;
1225         }
1226       while (n > 0)
1227         {
1228           int val;
1229           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1230                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1231                              (NILP (Vinhibit_changing_match_data)
1232                               ? &search_regs : &search_regs_1),
1233                              lim_byte - BEGV_BYTE);
1234           if (val == -2)
1235             {
1236               matcher_overflow ();
1237             }
1238           if (val >= 0)
1239             {
1240               if (NILP (Vinhibit_changing_match_data))
1241                 {
1242                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1243                   for (i = 0; i < search_regs.num_regs; i++)
1244                     if (search_regs.start[i] >= 0)
1245                       {
1246                         search_regs.start[i]
1247                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1248                         search_regs.end[i]
1249                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1250                       }
1251                   XSETBUFFER (last_thing_searched, current_buffer);
1252                   pos = search_regs.end[0];
1253                 }
1254               else
1255                 {
1256                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1257                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1258                 }
1259             }
1260           else
1261             {
1262               immediate_quit = 0;
1263               return (0 - n);
1264             }
1265           n--;
1266         }
1267       immediate_quit = 0;
1268       return (pos);
1269     }
1270   else                          /* non-RE case */
1271     {
1272       unsigned char *raw_pattern, *pat;
1273       int raw_pattern_size;
1274       int raw_pattern_size_byte;
1275       unsigned char *patbuf;
1276       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1277       unsigned char *base_pat;
1278       /* Set to positive if we find a non-ASCII char that need
1279          translation.  Otherwise set to zero later.  */
1280       int char_base = -1;
1281       int boyer_moore_ok = 1;
1282
1283       /* MULTIBYTE says whether the text to be searched is multibyte.
1284          We must convert PATTERN to match that, or we will not really
1285          find things right.  */
1286
1287       if (multibyte == STRING_MULTIBYTE (string))
1288         {
1289           raw_pattern = (unsigned char *) SDATA (string);
1290           raw_pattern_size = SCHARS (string);
1291           raw_pattern_size_byte = SBYTES (string);
1292         }
1293       else if (multibyte)
1294         {
1295           raw_pattern_size = SCHARS (string);
1296           raw_pattern_size_byte
1297             = count_size_as_multibyte (SDATA (string),
1298                                        raw_pattern_size);
1299           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1300           copy_text (SDATA (string), raw_pattern,
1301                      SCHARS (string), 0, 1);
1302         }
1303       else
1304         {
1305           /* Converting multibyte to single-byte.
1306
1307              ??? Perhaps this conversion should be done in a special way
1308              by subtracting nonascii-insert-offset from each non-ASCII char,
1309              so that only the multibyte chars which really correspond to
1310              the chosen single-byte character set can possibly match.  */
1311           raw_pattern_size = SCHARS (string);
1312           raw_pattern_size_byte = SCHARS (string);
1313           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1314           copy_text (SDATA (string), raw_pattern,
1315                      SBYTES (string), 1, 0);
1316         }
1317
1318       /* Copy and optionally translate the pattern.  */
1319       len = raw_pattern_size;
1320       len_byte = raw_pattern_size_byte;
1321       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1322       pat = patbuf;
1323       base_pat = raw_pattern;
1324       if (multibyte)
1325         {
1326           /* Fill patbuf by translated characters in STRING while
1327              checking if we can use boyer-moore search.  If TRT is
1328              non-nil, we can use boyer-moore search only if TRT can be
1329              represented by the byte array of 256 elements.  For that,
1330              all non-ASCII case-equivalents of all case-senstive
1331              characters in STRING must belong to the same charset and
1332              row.  */
1333
1334           while (--len >= 0)
1335             {
1336               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1337               int c, translated, inverse;
1338               int in_charlen, charlen;
1339
1340               /* If we got here and the RE flag is set, it's because we're
1341                  dealing with a regexp known to be trivial, so the backslash
1342                  just quotes the next character.  */
1343               if (RE && *base_pat == '\\')
1344                 {
1345                   len--;
1346                   raw_pattern_size--;
1347                   len_byte--;
1348                   base_pat++;
1349                 }
1350
1351               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1352
1353               if (NILP (trt))
1354                 {
1355                   str = base_pat;
1356                   charlen = in_charlen;
1357                 }
1358               else
1359                 {
1360                   /* Translate the character.  */
1361                   TRANSLATE (translated, trt, c);
1362                   charlen = CHAR_STRING (translated, str_base);
1363                   str = str_base;
1364
1365                   /* Check if C has any other case-equivalents.  */
1366                   TRANSLATE (inverse, inverse_trt, c);
1367                   /* If so, check if we can use boyer-moore.  */
1368                   if (c != inverse && boyer_moore_ok)
1369                     {
1370                       /* Check if all equivalents belong to the same
1371                          group of characters.  Note that the check of C
1372                          itself is done by the last iteration.  */
1373                       int this_char_base = -1;
1374
1375                       while (boyer_moore_ok)
1376                         {
1377                           if (ASCII_BYTE_P (inverse))
1378                             {
1379                               if (this_char_base > 0)
1380                                 boyer_moore_ok = 0;
1381                               else
1382                                 this_char_base = 0;
1383                             }
1384                           else if (CHAR_BYTE8_P (inverse))
1385                             /* Boyer-moore search can't handle a
1386                                translation of an eight-bit
1387                                character.  */
1388                             boyer_moore_ok = 0;
1389                           else if (this_char_base < 0)
1390                             {
1391                               this_char_base = inverse & ~0x3F;
1392                               if (char_base < 0)
1393                                 char_base = this_char_base;
1394                               else if (this_char_base != char_base)
1395                                 boyer_moore_ok = 0;
1396                             }
1397                           else if ((inverse & ~0x3F) != this_char_base)
1398                             boyer_moore_ok = 0;
1399                           if (c == inverse)
1400                             break;
1401                           TRANSLATE (inverse, inverse_trt, inverse);
1402                         }
1403                     }
1404                 }
1405
1406               /* Store this character into the translated pattern.  */
1407               memcpy (pat, str, charlen);
1408               pat += charlen;
1409               base_pat += in_charlen;
1410               len_byte -= in_charlen;
1411             }
1412
1413           /* If char_base is still negative we didn't find any translated
1414              non-ASCII characters.  */
1415           if (char_base < 0)
1416             char_base = 0;
1417         }
1418       else
1419         {
1420           /* Unibyte buffer.  */
1421           char_base = 0;
1422           while (--len >= 0)
1423             {
1424               int c, translated;
1425
1426               /* If we got here and the RE flag is set, it's because we're
1427                  dealing with a regexp known to be trivial, so the backslash
1428                  just quotes the next character.  */
1429               if (RE && *base_pat == '\\')
1430                 {
1431                   len--;
1432                   raw_pattern_size--;
1433                   base_pat++;
1434                 }
1435               c = *base_pat++;
1436               TRANSLATE (translated, trt, c);
1437               *pat++ = translated;
1438             }
1439         }
1440
1441       len_byte = pat - patbuf;
1442       len = raw_pattern_size;
1443       pat = base_pat = patbuf;
1444
1445       if (boyer_moore_ok)
1446         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1447                             pos, pos_byte, lim, lim_byte,
1448                             char_base);
1449       else
1450         return simple_search (n, pat, len, len_byte, trt,
1451                               pos, pos_byte, lim, lim_byte);
1452     }
1453 }
1454 \f
1455 /* Do a simple string search N times for the string PAT,
1456    whose length is LEN/LEN_BYTE,
1457    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1458    TRT is the translation table.
1459
1460    Return the character position where the match is found.
1461    Otherwise, if M matches remained to be found, return -M.
1462
1463    This kind of search works regardless of what is in PAT and
1464    regardless of what is in TRT.  It is used in cases where
1465    boyer_moore cannot work.  */
1466
1467 static EMACS_INT
1468 simple_search (int n, unsigned char *pat, int len, int len_byte, Lisp_Object trt, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT lim, EMACS_INT lim_byte)
1469 {
1470   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1471   int forward = n > 0;
1472   /* Number of buffer bytes matched.  Note that this may be different
1473      from len_byte in a multibyte buffer.  */
1474   int match_byte;
1475
1476   if (lim > pos && multibyte)
1477     while (n > 0)
1478       {
1479         while (1)
1480           {
1481             /* Try matching at position POS.  */
1482             EMACS_INT this_pos = pos;
1483             EMACS_INT this_pos_byte = pos_byte;
1484             int this_len = len;
1485             unsigned char *p = pat;
1486             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1487               goto stop;
1488
1489             while (this_len > 0)
1490               {
1491                 int charlen, buf_charlen;
1492                 int pat_ch, buf_ch;
1493
1494                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1495                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1496                                                  buf_charlen);
1497                 TRANSLATE (buf_ch, trt, buf_ch);
1498
1499                 if (buf_ch != pat_ch)
1500                   break;
1501
1502                 this_len--;
1503                 p += charlen;
1504
1505                 this_pos_byte += buf_charlen;
1506                 this_pos++;
1507               }
1508
1509             if (this_len == 0)
1510               {
1511                 match_byte = this_pos_byte - pos_byte;
1512                 pos += len;
1513                 pos_byte += match_byte;
1514                 break;
1515               }
1516
1517             INC_BOTH (pos, pos_byte);
1518           }
1519
1520         n--;
1521       }
1522   else if (lim > pos)
1523     while (n > 0)
1524       {
1525         while (1)
1526           {
1527             /* Try matching at position POS.  */
1528             EMACS_INT this_pos = pos;
1529             int this_len = len;
1530             unsigned char *p = pat;
1531
1532             if (pos + len > lim)
1533               goto stop;
1534
1535             while (this_len > 0)
1536               {
1537                 int pat_ch = *p++;
1538                 int buf_ch = FETCH_BYTE (this_pos);
1539                 TRANSLATE (buf_ch, trt, buf_ch);
1540
1541                 if (buf_ch != pat_ch)
1542                   break;
1543
1544                 this_len--;
1545                 this_pos++;
1546               }
1547
1548             if (this_len == 0)
1549               {
1550                 match_byte = len;
1551                 pos += len;
1552                 break;
1553               }
1554
1555             pos++;
1556           }
1557
1558         n--;
1559       }
1560   /* Backwards search.  */
1561   else if (lim < pos && multibyte)
1562     while (n < 0)
1563       {
1564         while (1)
1565           {
1566             /* Try matching at position POS.  */
1567             EMACS_INT this_pos = pos;
1568             EMACS_INT this_pos_byte = pos_byte;
1569             int this_len = len;
1570             const unsigned char *p = pat + len_byte;
1571
1572             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1573               goto stop;
1574
1575             while (this_len > 0)
1576               {
1577                 int charlen;
1578                 int pat_ch, buf_ch;
1579
1580                 DEC_BOTH (this_pos, this_pos_byte);
1581                 PREV_CHAR_BOUNDARY (p, pat);
1582                 pat_ch = STRING_CHAR (p);
1583                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1584                 TRANSLATE (buf_ch, trt, buf_ch);
1585
1586                 if (buf_ch != pat_ch)
1587                   break;
1588
1589                 this_len--;
1590               }
1591
1592             if (this_len == 0)
1593               {
1594                 match_byte = pos_byte - this_pos_byte;
1595                 pos = this_pos;
1596                 pos_byte = this_pos_byte;
1597                 break;
1598               }
1599
1600             DEC_BOTH (pos, pos_byte);
1601           }
1602
1603         n++;
1604       }
1605   else if (lim < pos)
1606     while (n < 0)
1607       {
1608         while (1)
1609           {
1610             /* Try matching at position POS.  */
1611             EMACS_INT this_pos = pos - len;
1612             int this_len = len;
1613             unsigned char *p = pat;
1614
1615             if (this_pos < lim)
1616               goto stop;
1617
1618             while (this_len > 0)
1619               {
1620                 int pat_ch = *p++;
1621                 int buf_ch = FETCH_BYTE (this_pos);
1622                 TRANSLATE (buf_ch, trt, buf_ch);
1623
1624                 if (buf_ch != pat_ch)
1625                   break;
1626                 this_len--;
1627                 this_pos++;
1628               }
1629
1630             if (this_len == 0)
1631               {
1632                 match_byte = len;
1633                 pos -= len;
1634                 break;
1635               }
1636
1637             pos--;
1638           }
1639
1640         n++;
1641       }
1642
1643  stop:
1644   if (n == 0)
1645     {
1646       if (forward)
1647         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1648       else
1649         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1650
1651       return pos;
1652     }
1653   else if (n > 0)
1654     return -n;
1655   else
1656     return n;
1657 }
1658 \f
1659 /* Do Boyer-Moore search N times for the string BASE_PAT,
1660    whose length is LEN/LEN_BYTE,
1661    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1662    DIRECTION says which direction we search in.
1663    TRT and INVERSE_TRT are translation tables.
1664    Characters in PAT are already translated by TRT.
1665
1666    This kind of search works if all the characters in BASE_PAT that
1667    have nontrivial translation are the same aside from the last byte.
1668    This makes it possible to translate just the last byte of a
1669    character, and do so after just a simple test of the context.
1670    CHAR_BASE is nonzero if there is such a non-ASCII character.
1671
1672    If that criterion is not satisfied, do not call this function.  */
1673
1674 static EMACS_INT
1675 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1676              pos, pos_byte, lim, lim_byte, char_base)
1677      int n;
1678      unsigned char *base_pat;
1679      int len, len_byte;
1680      Lisp_Object trt;
1681      Lisp_Object inverse_trt;
1682      EMACS_INT pos, pos_byte;
1683      EMACS_INT lim, lim_byte;
1684      int char_base;
1685 {
1686   int direction = ((n > 0) ? 1 : -1);
1687   register int dirlen;
1688   EMACS_INT limit;
1689   int stride_for_teases = 0;
1690   int BM_tab[0400];
1691   register unsigned char *cursor, *p_limit;
1692   register int i, j;
1693   unsigned char *pat, *pat_end;
1694   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1695
1696   unsigned char simple_translate[0400];
1697   /* These are set to the preceding bytes of a byte to be translated
1698      if char_base is nonzero.  As the maximum byte length of a
1699      multibyte character is 5, we have to check at most four previous
1700      bytes.  */
1701   int translate_prev_byte1 = 0;
1702   int translate_prev_byte2 = 0;
1703   int translate_prev_byte3 = 0;
1704   int translate_prev_byte4 = 0;
1705
1706   /* The general approach is that we are going to maintain that we know
1707      the first (closest to the present position, in whatever direction
1708      we're searching) character that could possibly be the last
1709      (furthest from present position) character of a valid match.  We
1710      advance the state of our knowledge by looking at that character
1711      and seeing whether it indeed matches the last character of the
1712      pattern.  If it does, we take a closer look.  If it does not, we
1713      move our pointer (to putative last characters) as far as is
1714      logically possible.  This amount of movement, which I call a
1715      stride, will be the length of the pattern if the actual character
1716      appears nowhere in the pattern, otherwise it will be the distance
1717      from the last occurrence of that character to the end of the
1718      pattern.  If the amount is zero we have a possible match.  */
1719
1720   /* Here we make a "mickey mouse" BM table.  The stride of the search
1721      is determined only by the last character of the putative match.
1722      If that character does not match, we will stride the proper
1723      distance to propose a match that superimposes it on the last
1724      instance of a character that matches it (per trt), or misses
1725      it entirely if there is none. */
1726
1727   dirlen = len_byte * direction;
1728
1729   /* Record position after the end of the pattern.  */
1730   pat_end = base_pat + len_byte;
1731   /* BASE_PAT points to a character that we start scanning from.
1732      It is the first character in a forward search,
1733      the last character in a backward search.  */
1734   if (direction < 0)
1735     base_pat = pat_end - 1;
1736
1737   /* A character that does not appear in the pattern induces a
1738      stride equal to the pattern length.  */
1739   for (i = 0; i < 0400; i++)
1740     BM_tab[i] = dirlen;
1741
1742   /* We use this for translation, instead of TRT itself.
1743      We fill this in to handle the characters that actually
1744      occur in the pattern.  Others don't matter anyway!  */
1745   for (i = 0; i < 0400; i++)
1746     simple_translate[i] = i;
1747
1748   if (char_base)
1749     {
1750       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1751          byte following them are the target of translation.  */
1752       unsigned char str[MAX_MULTIBYTE_LENGTH];
1753       int len = CHAR_STRING (char_base, str);
1754
1755       translate_prev_byte1 = str[len - 2];
1756       if (len > 2)
1757         {
1758           translate_prev_byte2 = str[len - 3];
1759           if (len > 3)
1760             {
1761               translate_prev_byte3 = str[len - 4];
1762               if (len > 4)
1763                 translate_prev_byte4 = str[len - 5];
1764             }
1765         }
1766     }
1767
1768   i = 0;
1769   while (i != dirlen)
1770     {
1771       unsigned char *ptr = base_pat + i;
1772       i += direction;
1773       if (! NILP (trt))
1774         {
1775           /* If the byte currently looking at is the last of a
1776              character to check case-equivalents, set CH to that
1777              character.  An ASCII character and a non-ASCII character
1778              matching with CHAR_BASE are to be checked.  */
1779           int ch = -1;
1780
1781           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1782             ch = *ptr;
1783           else if (char_base
1784                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1785             {
1786               unsigned char *charstart = ptr - 1;
1787
1788               while (! (CHAR_HEAD_P (*charstart)))
1789                 charstart--;
1790               ch = STRING_CHAR (charstart);
1791               if (char_base != (ch & ~0x3F))
1792                 ch = -1;
1793             }
1794
1795           if (ch >= 0200)
1796             j = (ch & 0x3F) | 0200;
1797           else
1798             j = *ptr;
1799
1800           if (i == dirlen)
1801             stride_for_teases = BM_tab[j];
1802
1803           BM_tab[j] = dirlen - i;
1804           /* A translation table is accompanied by its inverse -- see */
1805           /* comment following downcase_table for details */
1806           if (ch >= 0)
1807             {
1808               int starting_ch = ch;
1809               int starting_j = j;
1810
1811               while (1)
1812                 {
1813                   TRANSLATE (ch, inverse_trt, ch);
1814                   if (ch >= 0200)
1815                     j = (ch & 0x3F) | 0200;
1816                   else
1817                     j = ch;
1818
1819                   /* For all the characters that map into CH,
1820                      set up simple_translate to map the last byte
1821                      into STARTING_J.  */
1822                   simple_translate[j] = starting_j;
1823                   if (ch == starting_ch)
1824                     break;
1825                   BM_tab[j] = dirlen - i;
1826                 }
1827             }
1828         }
1829       else
1830         {
1831           j = *ptr;
1832
1833           if (i == dirlen)
1834             stride_for_teases = BM_tab[j];
1835           BM_tab[j] = dirlen - i;
1836         }
1837       /* stride_for_teases tells how much to stride if we get a
1838          match on the far character but are subsequently
1839          disappointed, by recording what the stride would have been
1840          for that character if the last character had been
1841          different.  */
1842     }
1843   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1844   /* loop invariant - POS_BYTE points at where last char (first
1845      char if reverse) of pattern would align in a possible match.  */
1846   while (n != 0)
1847     {
1848       EMACS_INT tail_end;
1849       unsigned char *tail_end_ptr;
1850
1851       /* It's been reported that some (broken) compiler thinks that
1852          Boolean expressions in an arithmetic context are unsigned.
1853          Using an explicit ?1:0 prevents this.  */
1854       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1855           < 0)
1856         return (n * (0 - direction));
1857       /* First we do the part we can by pointers (maybe nothing) */
1858       QUIT;
1859       pat = base_pat;
1860       limit = pos_byte - dirlen + direction;
1861       if (direction > 0)
1862         {
1863           limit = BUFFER_CEILING_OF (limit);
1864           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1865              can take on without hitting edge of buffer or the gap.  */
1866           limit = min (limit, pos_byte + 20000);
1867           limit = min (limit, lim_byte - 1);
1868         }
1869       else
1870         {
1871           limit = BUFFER_FLOOR_OF (limit);
1872           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1873              can take on without hitting edge of buffer or the gap.  */
1874           limit = max (limit, pos_byte - 20000);
1875           limit = max (limit, lim_byte);
1876         }
1877       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1878       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1879
1880       if ((limit - pos_byte) * direction > 20)
1881         {
1882           unsigned char *p2;
1883
1884           p_limit = BYTE_POS_ADDR (limit);
1885           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1886           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1887           while (1)             /* use one cursor setting as long as i can */
1888             {
1889               if (direction > 0) /* worth duplicating */
1890                 {
1891                   while (cursor <= p_limit)
1892                     {
1893                       if (BM_tab[*cursor] == 0)
1894                         goto hit;
1895                       cursor += BM_tab[*cursor];
1896                     }
1897                 }
1898               else
1899                 {
1900                   while (cursor >= p_limit)
1901                     {
1902                       if (BM_tab[*cursor] == 0)
1903                         goto hit;
1904                       cursor += BM_tab[*cursor];
1905                     }
1906                 }
1907               /* If you are here, cursor is beyond the end of the
1908                  searched region.  You fail to match within the
1909                  permitted region and would otherwise try a character
1910                  beyond that region.  */
1911               break;
1912
1913             hit:
1914               i = dirlen - direction;
1915               if (! NILP (trt))
1916                 {
1917                   while ((i -= direction) + direction != 0)
1918                     {
1919                       int ch;
1920                       cursor -= direction;
1921                       /* Translate only the last byte of a character.  */
1922                       if (! multibyte
1923                           || ((cursor == tail_end_ptr
1924                                || CHAR_HEAD_P (cursor[1]))
1925                               && (CHAR_HEAD_P (cursor[0])
1926                                   /* Check if this is the last byte of
1927                                      a translable character.  */
1928                                   || (translate_prev_byte1 == cursor[-1]
1929                                       && (CHAR_HEAD_P (translate_prev_byte1)
1930                                           || (translate_prev_byte2 == cursor[-2]
1931                                               && (CHAR_HEAD_P (translate_prev_byte2)
1932                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1933                         ch = simple_translate[*cursor];
1934                       else
1935                         ch = *cursor;
1936                       if (pat[i] != ch)
1937                         break;
1938                     }
1939                 }
1940               else
1941                 {
1942                   while ((i -= direction) + direction != 0)
1943                     {
1944                       cursor -= direction;
1945                       if (pat[i] != *cursor)
1946                         break;
1947                     }
1948                 }
1949               cursor += dirlen - i - direction; /* fix cursor */
1950               if (i + direction == 0)
1951                 {
1952                   EMACS_INT position, start, end;
1953
1954                   cursor -= direction;
1955
1956                   position = pos_byte + cursor - p2 + ((direction > 0)
1957                                                        ? 1 - len_byte : 0);
1958                   set_search_regs (position, len_byte);
1959
1960                   if (NILP (Vinhibit_changing_match_data))
1961                     {
1962                       start = search_regs.start[0];
1963                       end = search_regs.end[0];
1964                     }
1965                   else
1966                     /* If Vinhibit_changing_match_data is non-nil,
1967                        search_regs will not be changed.  So let's
1968                        compute start and end here.  */
1969                     {
1970                       start = BYTE_TO_CHAR (position);
1971                       end = BYTE_TO_CHAR (position + len_byte);
1972                     }
1973
1974                   if ((n -= direction) != 0)
1975                     cursor += dirlen; /* to resume search */
1976                   else
1977                     return direction > 0 ? end : start;
1978                 }
1979               else
1980                 cursor += stride_for_teases; /* <sigh> we lose -  */
1981             }
1982           pos_byte += cursor - p2;
1983         }
1984       else
1985         /* Now we'll pick up a clump that has to be done the hard
1986            way because it covers a discontinuity.  */
1987         {
1988           limit = ((direction > 0)
1989                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1990                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1991           limit = ((direction > 0)
1992                    ? min (limit + len_byte, lim_byte - 1)
1993                    : max (limit - len_byte, lim_byte));
1994           /* LIMIT is now the last value POS_BYTE can have
1995              and still be valid for a possible match.  */
1996           while (1)
1997             {
1998               /* This loop can be coded for space rather than
1999                  speed because it will usually run only once.
2000                  (the reach is at most len + 21, and typically
2001                  does not exceed len).  */
2002               while ((limit - pos_byte) * direction >= 0)
2003                 {
2004                   int ch = FETCH_BYTE (pos_byte);
2005                   if (BM_tab[ch] == 0)
2006                     goto hit2;
2007                   pos_byte += BM_tab[ch];
2008                 }
2009               break;    /* ran off the end */
2010
2011             hit2:
2012               /* Found what might be a match.  */
2013               i = dirlen - direction;
2014               while ((i -= direction) + direction != 0)
2015                 {
2016                   int ch;
2017                   unsigned char *ptr;
2018                   pos_byte -= direction;
2019                   ptr = BYTE_POS_ADDR (pos_byte);
2020                   /* Translate only the last byte of a character.  */
2021                   if (! multibyte
2022                       || ((ptr == tail_end_ptr
2023                            || CHAR_HEAD_P (ptr[1]))
2024                           && (CHAR_HEAD_P (ptr[0])
2025                               /* Check if this is the last byte of a
2026                                  translable character.  */
2027                               || (translate_prev_byte1 == ptr[-1]
2028                                   && (CHAR_HEAD_P (translate_prev_byte1)
2029                                       || (translate_prev_byte2 == ptr[-2]
2030                                           && (CHAR_HEAD_P (translate_prev_byte2)
2031                                               || translate_prev_byte3 == ptr[-3])))))))
2032                     ch = simple_translate[*ptr];
2033                   else
2034                     ch = *ptr;
2035                   if (pat[i] != ch)
2036                     break;
2037                 }
2038               /* Above loop has moved POS_BYTE part or all the way
2039                  back to the first pos (last pos if reverse).
2040                  Set it once again at the last (first if reverse) char.  */
2041               pos_byte += dirlen - i - direction;
2042               if (i + direction == 0)
2043                 {
2044                   EMACS_INT position, start, end;
2045                   pos_byte -= direction;
2046
2047                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2048                   set_search_regs (position, len_byte);
2049
2050                   if (NILP (Vinhibit_changing_match_data))
2051                     {
2052                       start = search_regs.start[0];
2053                       end = search_regs.end[0];
2054                     }
2055                   else
2056                     /* If Vinhibit_changing_match_data is non-nil,
2057                        search_regs will not be changed.  So let's
2058                        compute start and end here.  */
2059                     {
2060                       start = BYTE_TO_CHAR (position);
2061                       end = BYTE_TO_CHAR (position + len_byte);
2062                     }
2063
2064                   if ((n -= direction) != 0)
2065                     pos_byte += dirlen; /* to resume search */
2066                   else
2067                     return direction > 0 ? end : start;
2068                 }
2069               else
2070                 pos_byte += stride_for_teases;
2071             }
2072           }
2073       /* We have done one clump.  Can we continue? */
2074       if ((lim_byte - pos_byte) * direction < 0)
2075         return ((0 - n) * direction);
2076     }
2077   return BYTE_TO_CHAR (pos_byte);
2078 }
2079
2080 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2081    for the overall match just found in the current buffer.
2082    Also clear out the match data for registers 1 and up.  */
2083
2084 static void
2085 set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
2086 {
2087   int i;
2088
2089   if (!NILP (Vinhibit_changing_match_data))
2090     return;
2091
2092   /* Make sure we have registers in which to store
2093      the match position.  */
2094   if (search_regs.num_regs == 0)
2095     {
2096       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2097       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2098       search_regs.num_regs = 2;
2099     }
2100
2101   /* Clear out the other registers.  */
2102   for (i = 1; i < search_regs.num_regs; i++)
2103     {
2104       search_regs.start[i] = -1;
2105       search_regs.end[i] = -1;
2106     }
2107
2108   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2109   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2110   XSETBUFFER (last_thing_searched, current_buffer);
2111 }
2112 \f
2113 /* Given STRING, a string of words separated by word delimiters,
2114    compute a regexp that matches those exact words separated by
2115    arbitrary punctuation.  If LAX is nonzero, the end of the string
2116    need not match a word boundary unless it ends in whitespace.  */
2117
2118 static Lisp_Object
2119 wordify (Lisp_Object string, int lax)
2120 {
2121   register unsigned char *p, *o;
2122   register int i, i_byte, len, punct_count = 0, word_count = 0;
2123   Lisp_Object val;
2124   int prev_c = 0;
2125   int adjust, whitespace_at_end;
2126
2127   CHECK_STRING (string);
2128   p = SDATA (string);
2129   len = SCHARS (string);
2130
2131   for (i = 0, i_byte = 0; i < len; )
2132     {
2133       int c;
2134
2135       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2136
2137       if (SYNTAX (c) != Sword)
2138         {
2139           punct_count++;
2140           if (i > 0 && SYNTAX (prev_c) == Sword)
2141             word_count++;
2142         }
2143
2144       prev_c = c;
2145     }
2146
2147   if (SYNTAX (prev_c) == Sword)
2148     {
2149       word_count++;
2150       whitespace_at_end = 0;
2151     }
2152   else
2153     whitespace_at_end = 1;
2154
2155   if (!word_count)
2156     return empty_unibyte_string;
2157
2158   adjust = - punct_count + 5 * (word_count - 1)
2159     + ((lax && !whitespace_at_end) ? 2 : 4);
2160   if (STRING_MULTIBYTE (string))
2161     val = make_uninit_multibyte_string (len + adjust,
2162                                         SBYTES (string)
2163                                         + adjust);
2164   else
2165     val = make_uninit_string (len + adjust);
2166
2167   o = SDATA (val);
2168   *o++ = '\\';
2169   *o++ = 'b';
2170   prev_c = 0;
2171
2172   for (i = 0, i_byte = 0; i < len; )
2173     {
2174       int c;
2175       int i_byte_orig = i_byte;
2176
2177       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2178
2179       if (SYNTAX (c) == Sword)
2180         {
2181           memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
2182           o += i_byte - i_byte_orig;
2183         }
2184       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2185         {
2186           *o++ = '\\';
2187           *o++ = 'W';
2188           *o++ = '\\';
2189           *o++ = 'W';
2190           *o++ = '*';
2191         }
2192
2193       prev_c = c;
2194     }
2195
2196   if (!lax || whitespace_at_end)
2197     {
2198       *o++ = '\\';
2199       *o++ = 'b';
2200     }
2201
2202   return val;
2203 }
2204 \f
2205 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2206        "MSearch backward: ",
2207        doc: /* Search backward from point for STRING.
2208 Set point to the beginning of the occurrence found, and return point.
2209 An optional second argument bounds the search; it is a buffer position.
2210 The match found must not extend before that position.
2211 Optional third argument, if t, means if fail just return nil (no error).
2212  If not nil and not t, position at limit of search and return nil.
2213 Optional fourth argument is repeat count--search for successive occurrences.
2214
2215 Search case-sensitivity is determined by the value of the variable
2216 `case-fold-search', which see.
2217
2218 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2219      (string, bound, noerror, count)
2220      Lisp_Object string, bound, noerror, count;
2221 {
2222   return search_command (string, bound, noerror, count, -1, 0, 0);
2223 }
2224
2225 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2226        doc: /* Search forward from point for STRING.
2227 Set point to the end of the occurrence found, and return point.
2228 An optional second argument bounds the search; it is a buffer position.
2229 The match found must not extend after that position.  A value of nil is
2230   equivalent to (point-max).
2231 Optional third argument, if t, means if fail just return nil (no error).
2232   If not nil and not t, move to limit of search and return nil.
2233 Optional fourth argument is repeat count--search for successive occurrences.
2234
2235 Search case-sensitivity is determined by the value of the variable
2236 `case-fold-search', which see.
2237
2238 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2239      (string, bound, noerror, count)
2240      Lisp_Object string, bound, noerror, count;
2241 {
2242   return search_command (string, bound, noerror, count, 1, 0, 0);
2243 }
2244
2245 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2246        "sWord search backward: ",
2247        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2248 Set point to the beginning of the occurrence found, and return point.
2249 An optional second argument bounds the search; it is a buffer position.
2250 The match found must not extend before that position.
2251 Optional third argument, if t, means if fail just return nil (no error).
2252   If not nil and not t, move to limit of search and return nil.
2253 Optional fourth argument is repeat count--search for successive occurrences.  */)
2254      (string, bound, noerror, count)
2255      Lisp_Object string, bound, noerror, count;
2256 {
2257   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2258 }
2259
2260 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2261        "sWord search: ",
2262        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2263 Set point to the end of the occurrence found, and return point.
2264 An optional second argument bounds the search; it is a buffer position.
2265 The match found must not extend after that position.
2266 Optional third argument, if t, means if fail just return nil (no error).
2267   If not nil and not t, move to limit of search and return nil.
2268 Optional fourth argument is repeat count--search for successive occurrences.  */)
2269      (string, bound, noerror, count)
2270      Lisp_Object string, bound, noerror, count;
2271 {
2272   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2273 }
2274
2275 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2276        "sWord search backward: ",
2277        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2278 Set point to the beginning of the occurrence found, and return point.
2279
2280 Unlike `word-search-backward', the end of STRING need not match a word
2281 boundary unless it ends in whitespace.
2282
2283 An optional second argument bounds the search; it is a buffer position.
2284 The match found must not extend before that position.
2285 Optional third argument, if t, means if fail just return nil (no error).
2286   If not nil and not t, move to limit of search and return nil.
2287 Optional fourth argument is repeat count--search for successive occurrences.  */)
2288      (string, bound, noerror, count)
2289      Lisp_Object string, bound, noerror, count;
2290 {
2291   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2292 }
2293
2294 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2295        "sWord search: ",
2296        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2297 Set point to the end of the occurrence found, and return point.
2298
2299 Unlike `word-search-forward', the end of STRING need not match a word
2300 boundary unless it ends in whitespace.
2301
2302 An optional second argument bounds the search; it is a buffer position.
2303 The match found must not extend after that position.
2304 Optional third argument, if t, means if fail just return nil (no error).
2305   If not nil and not t, move to limit of search and return nil.
2306 Optional fourth argument is repeat count--search for successive occurrences.  */)
2307      (string, bound, noerror, count)
2308      Lisp_Object string, bound, noerror, count;
2309 {
2310   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2311 }
2312
2313 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2314        "sRE search backward: ",
2315        doc: /* Search backward from point for match for regular expression REGEXP.
2316 Set point to the beginning of the match, and return point.
2317 The match found is the one starting last in the buffer
2318 and yet ending before the origin of the search.
2319 An optional second argument bounds the search; it is a buffer position.
2320 The match found must start at or after that position.
2321 Optional third argument, if t, means if fail just return nil (no error).
2322   If not nil and not t, move to limit of search and return nil.
2323 Optional fourth argument is repeat count--search for successive occurrences.
2324 See also the functions `match-beginning', `match-end', `match-string',
2325 and `replace-match'.  */)
2326      (regexp, bound, noerror, count)
2327      Lisp_Object regexp, bound, noerror, count;
2328 {
2329   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2330 }
2331
2332 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2333        "sRE search: ",
2334        doc: /* Search forward from point for regular expression REGEXP.
2335 Set point to the end of the occurrence found, and return point.
2336 An optional second argument bounds the search; it is a buffer position.
2337 The match found must not extend after that position.
2338 Optional third argument, if t, means if fail just return nil (no error).
2339   If not nil and not t, move to limit of search and return nil.
2340 Optional fourth argument is repeat count--search for successive occurrences.
2341 See also the functions `match-beginning', `match-end', `match-string',
2342 and `replace-match'.  */)
2343      (regexp, bound, noerror, count)
2344      Lisp_Object regexp, bound, noerror, count;
2345 {
2346   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2347 }
2348
2349 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2350        "sPosix search backward: ",
2351        doc: /* Search backward from point for match for regular expression REGEXP.
2352 Find the longest match in accord with Posix regular expression rules.
2353 Set point to the beginning of the match, and return point.
2354 The match found is the one starting last in the buffer
2355 and yet ending before the origin of the search.
2356 An optional second argument bounds the search; it is a buffer position.
2357 The match found must start at or after that position.
2358 Optional third argument, if t, means if fail just return nil (no error).
2359   If not nil and not t, move to limit of search and return nil.
2360 Optional fourth argument is repeat count--search for successive occurrences.
2361 See also the functions `match-beginning', `match-end', `match-string',
2362 and `replace-match'.  */)
2363      (regexp, bound, noerror, count)
2364      Lisp_Object regexp, bound, noerror, count;
2365 {
2366   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2367 }
2368
2369 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2370        "sPosix search: ",
2371        doc: /* Search forward from point for regular expression REGEXP.
2372 Find the longest match in accord with Posix regular expression rules.
2373 Set point to the end of the occurrence found, and return point.
2374 An optional second argument bounds the search; it is a buffer position.
2375 The match found must not extend after that position.
2376 Optional third argument, if t, means if fail just return nil (no error).
2377   If not nil and not t, move to limit of search and return nil.
2378 Optional fourth argument is repeat count--search for successive occurrences.
2379 See also the functions `match-beginning', `match-end', `match-string',
2380 and `replace-match'.  */)
2381      (regexp, bound, noerror, count)
2382      Lisp_Object regexp, bound, noerror, count;
2383 {
2384   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2385 }
2386 \f
2387 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2388        doc: /* Replace text matched by last search with NEWTEXT.
2389 Leave point at the end of the replacement text.
2390
2391 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2392 Otherwise maybe capitalize the whole text, or maybe just word initials,
2393 based on the replaced text.
2394 If the replaced text has only capital letters
2395 and has at least one multiletter word, convert NEWTEXT to all caps.
2396 Otherwise if all words are capitalized in the replaced text,
2397 capitalize each word in NEWTEXT.
2398
2399 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2400 Otherwise treat `\\' as special:
2401   `\\&' in NEWTEXT means substitute original matched text.
2402   `\\N' means substitute what matched the Nth `\\(...\\)'.
2403        If Nth parens didn't match, substitute nothing.
2404   `\\\\' means insert one `\\'.
2405 Case conversion does not apply to these substitutions.
2406
2407 FIXEDCASE and LITERAL are optional arguments.
2408
2409 The optional fourth argument STRING can be a string to modify.
2410 This is meaningful when the previous match was done against STRING,
2411 using `string-match'.  When used this way, `replace-match'
2412 creates and returns a new string made by copying STRING and replacing
2413 the part of STRING that was matched.
2414
2415 The optional fifth argument SUBEXP specifies a subexpression;
2416 it says to replace just that subexpression with NEWTEXT,
2417 rather than replacing the entire matched text.
2418 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2419 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2420 NEWTEXT in place of subexp N.
2421 This is useful only after a regular expression search or match,
2422 since only regular expressions have distinguished subexpressions.  */)
2423      (newtext, fixedcase, literal, string, subexp)
2424      Lisp_Object newtext, fixedcase, literal, string, subexp;
2425 {
2426   enum { nochange, all_caps, cap_initial } case_action;
2427   register int pos, pos_byte;
2428   int some_multiletter_word;
2429   int some_lowercase;
2430   int some_uppercase;
2431   int some_nonuppercase_initial;
2432   register int c, prevc;
2433   int sub;
2434   EMACS_INT opoint, newpoint;
2435
2436   CHECK_STRING (newtext);
2437
2438   if (! NILP (string))
2439     CHECK_STRING (string);
2440
2441   case_action = nochange;       /* We tried an initialization */
2442                                 /* but some C compilers blew it */
2443
2444   if (search_regs.num_regs <= 0)
2445     error ("`replace-match' called before any match found");
2446
2447   if (NILP (subexp))
2448     sub = 0;
2449   else
2450     {
2451       CHECK_NUMBER (subexp);
2452       sub = XINT (subexp);
2453       if (sub < 0 || sub >= search_regs.num_regs)
2454         args_out_of_range (subexp, make_number (search_regs.num_regs));
2455     }
2456
2457   if (NILP (string))
2458     {
2459       if (search_regs.start[sub] < BEGV
2460           || search_regs.start[sub] > search_regs.end[sub]
2461           || search_regs.end[sub] > ZV)
2462         args_out_of_range (make_number (search_regs.start[sub]),
2463                            make_number (search_regs.end[sub]));
2464     }
2465   else
2466     {
2467       if (search_regs.start[sub] < 0
2468           || search_regs.start[sub] > search_regs.end[sub]
2469           || search_regs.end[sub] > SCHARS (string))
2470         args_out_of_range (make_number (search_regs.start[sub]),
2471                            make_number (search_regs.end[sub]));
2472     }
2473
2474   if (NILP (fixedcase))
2475     {
2476       /* Decide how to casify by examining the matched text. */
2477       EMACS_INT last;
2478
2479       pos = search_regs.start[sub];
2480       last = search_regs.end[sub];
2481
2482       if (NILP (string))
2483         pos_byte = CHAR_TO_BYTE (pos);
2484       else
2485         pos_byte = string_char_to_byte (string, pos);
2486
2487       prevc = '\n';
2488       case_action = all_caps;
2489
2490       /* some_multiletter_word is set nonzero if any original word
2491          is more than one letter long. */
2492       some_multiletter_word = 0;
2493       some_lowercase = 0;
2494       some_nonuppercase_initial = 0;
2495       some_uppercase = 0;
2496
2497       while (pos < last)
2498         {
2499           if (NILP (string))
2500             {
2501               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2502               INC_BOTH (pos, pos_byte);
2503             }
2504           else
2505             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2506
2507           if (LOWERCASEP (c))
2508             {
2509               /* Cannot be all caps if any original char is lower case */
2510
2511               some_lowercase = 1;
2512               if (SYNTAX (prevc) != Sword)
2513                 some_nonuppercase_initial = 1;
2514               else
2515                 some_multiletter_word = 1;
2516             }
2517           else if (UPPERCASEP (c))
2518             {
2519               some_uppercase = 1;
2520               if (SYNTAX (prevc) != Sword)
2521                 ;
2522               else
2523                 some_multiletter_word = 1;
2524             }
2525           else
2526             {
2527               /* If the initial is a caseless word constituent,
2528                  treat that like a lowercase initial.  */
2529               if (SYNTAX (prevc) != Sword)
2530                 some_nonuppercase_initial = 1;
2531             }
2532
2533           prevc = c;
2534         }
2535
2536       /* Convert to all caps if the old text is all caps
2537          and has at least one multiletter word.  */
2538       if (! some_lowercase && some_multiletter_word)
2539         case_action = all_caps;
2540       /* Capitalize each word, if the old text has all capitalized words.  */
2541       else if (!some_nonuppercase_initial && some_multiletter_word)
2542         case_action = cap_initial;
2543       else if (!some_nonuppercase_initial && some_uppercase)
2544         /* Should x -> yz, operating on X, give Yz or YZ?
2545            We'll assume the latter.  */
2546         case_action = all_caps;
2547       else
2548         case_action = nochange;
2549     }
2550
2551   /* Do replacement in a string.  */
2552   if (!NILP (string))
2553     {
2554       Lisp_Object before, after;
2555
2556       before = Fsubstring (string, make_number (0),
2557                            make_number (search_regs.start[sub]));
2558       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2559
2560       /* Substitute parts of the match into NEWTEXT
2561          if desired.  */
2562       if (NILP (literal))
2563         {
2564           EMACS_INT lastpos = 0;
2565           EMACS_INT lastpos_byte = 0;
2566           /* We build up the substituted string in ACCUM.  */
2567           Lisp_Object accum;
2568           Lisp_Object middle;
2569           int length = SBYTES (newtext);
2570
2571           accum = Qnil;
2572
2573           for (pos_byte = 0, pos = 0; pos_byte < length;)
2574             {
2575               int substart = -1;
2576               int subend = 0;
2577               int delbackslash = 0;
2578
2579               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2580
2581               if (c == '\\')
2582                 {
2583                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2584
2585                   if (c == '&')
2586                     {
2587                       substart = search_regs.start[sub];
2588                       subend = search_regs.end[sub];
2589                     }
2590                   else if (c >= '1' && c <= '9')
2591                     {
2592                       if (search_regs.start[c - '0'] >= 0
2593                           && c <= search_regs.num_regs + '0')
2594                         {
2595                           substart = search_regs.start[c - '0'];
2596                           subend = search_regs.end[c - '0'];
2597                         }
2598                       else
2599                         {
2600                           /* If that subexp did not match,
2601                              replace \\N with nothing.  */
2602                           substart = 0;
2603                           subend = 0;
2604                         }
2605                     }
2606                   else if (c == '\\')
2607                     delbackslash = 1;
2608                   else
2609                     error ("Invalid use of `\\' in replacement text");
2610                 }
2611               if (substart >= 0)
2612                 {
2613                   if (pos - 2 != lastpos)
2614                     middle = substring_both (newtext, lastpos,
2615                                              lastpos_byte,
2616                                              pos - 2, pos_byte - 2);
2617                   else
2618                     middle = Qnil;
2619                   accum = concat3 (accum, middle,
2620                                    Fsubstring (string,
2621                                                make_number (substart),
2622                                                make_number (subend)));
2623                   lastpos = pos;
2624                   lastpos_byte = pos_byte;
2625                 }
2626               else if (delbackslash)
2627                 {
2628                   middle = substring_both (newtext, lastpos,
2629                                            lastpos_byte,
2630                                            pos - 1, pos_byte - 1);
2631
2632                   accum = concat2 (accum, middle);
2633                   lastpos = pos;
2634                   lastpos_byte = pos_byte;
2635                 }
2636             }
2637
2638           if (pos != lastpos)
2639             middle = substring_both (newtext, lastpos,
2640                                      lastpos_byte,
2641                                      pos, pos_byte);
2642           else
2643             middle = Qnil;
2644
2645           newtext = concat2 (accum, middle);
2646         }
2647
2648       /* Do case substitution in NEWTEXT if desired.  */
2649       if (case_action == all_caps)
2650         newtext = Fupcase (newtext);
2651       else if (case_action == cap_initial)
2652         newtext = Fupcase_initials (newtext);
2653
2654       return concat3 (before, newtext, after);
2655     }
2656
2657   /* Record point, then move (quietly) to the start of the match.  */
2658   if (PT >= search_regs.end[sub])
2659     opoint = PT - ZV;
2660   else if (PT > search_regs.start[sub])
2661     opoint = search_regs.end[sub] - ZV;
2662   else
2663     opoint = PT;
2664
2665   /* If we want non-literal replacement,
2666      perform substitution on the replacement string.  */
2667   if (NILP (literal))
2668     {
2669       int length = SBYTES (newtext);
2670       unsigned char *substed;
2671       int substed_alloc_size, substed_len;
2672       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2673       int str_multibyte = STRING_MULTIBYTE (newtext);
2674       Lisp_Object rev_tbl;
2675       int really_changed = 0;
2676
2677       rev_tbl = Qnil;
2678
2679       substed_alloc_size = length * 2 + 100;
2680       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2681       substed_len = 0;
2682
2683       /* Go thru NEWTEXT, producing the actual text to insert in
2684          SUBSTED while adjusting multibyteness to that of the current
2685          buffer.  */
2686
2687       for (pos_byte = 0, pos = 0; pos_byte < length;)
2688         {
2689           unsigned char str[MAX_MULTIBYTE_LENGTH];
2690           unsigned char *add_stuff = NULL;
2691           int add_len = 0;
2692           int idx = -1;
2693
2694           if (str_multibyte)
2695             {
2696               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2697               if (!buf_multibyte)
2698                 c = multibyte_char_to_unibyte (c, rev_tbl);
2699             }
2700           else
2701             {
2702               /* Note that we don't have to increment POS.  */
2703               c = SREF (newtext, pos_byte++);
2704               if (buf_multibyte)
2705                 MAKE_CHAR_MULTIBYTE (c);
2706             }
2707
2708           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2709              or set IDX to a match index, which means put that part
2710              of the buffer text into SUBSTED.  */
2711
2712           if (c == '\\')
2713             {
2714               really_changed = 1;
2715
2716               if (str_multibyte)
2717                 {
2718                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2719                                                       pos, pos_byte);
2720                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2721                     c = multibyte_char_to_unibyte (c, rev_tbl);
2722                 }
2723               else
2724                 {
2725                   c = SREF (newtext, pos_byte++);
2726                   if (buf_multibyte)
2727                     MAKE_CHAR_MULTIBYTE (c);
2728                 }
2729
2730               if (c == '&')
2731                 idx = sub;
2732               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2733                 {
2734                   if (search_regs.start[c - '0'] >= 1)
2735                     idx = c - '0';
2736                 }
2737               else if (c == '\\')
2738                 add_len = 1, add_stuff = "\\";
2739               else
2740                 {
2741                   xfree (substed);
2742                   error ("Invalid use of `\\' in replacement text");
2743                 }
2744             }
2745           else
2746             {
2747               add_len = CHAR_STRING (c, str);
2748               add_stuff = str;
2749             }
2750
2751           /* If we want to copy part of a previous match,
2752              set up ADD_STUFF and ADD_LEN to point to it.  */
2753           if (idx >= 0)
2754             {
2755               EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2756               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2757               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2758                 move_gap (search_regs.start[idx]);
2759               add_stuff = BYTE_POS_ADDR (begbyte);
2760             }
2761
2762           /* Now the stuff we want to add to SUBSTED
2763              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2764
2765           /* Make sure SUBSTED is big enough.  */
2766           if (substed_len + add_len >= substed_alloc_size)
2767             {
2768               substed_alloc_size = substed_len + add_len + 500;
2769               substed = (unsigned char *) xrealloc (substed,
2770                                                     substed_alloc_size + 1);
2771             }
2772
2773           /* Now add to the end of SUBSTED.  */
2774           if (add_stuff)
2775             {
2776               memcpy (substed + substed_len, add_stuff, add_len);
2777               substed_len += add_len;
2778             }
2779         }
2780
2781       if (really_changed)
2782         {
2783           if (buf_multibyte)
2784             {
2785               int nchars = multibyte_chars_in_text (substed, substed_len);
2786
2787               newtext = make_multibyte_string (substed, nchars, substed_len);
2788             }
2789           else
2790             newtext = make_unibyte_string (substed, substed_len);
2791         }
2792       xfree (substed);
2793     }
2794
2795   /* Replace the old text with the new in the cleanest possible way.  */
2796   replace_range (search_regs.start[sub], search_regs.end[sub],
2797                  newtext, 1, 0, 1);
2798   newpoint = search_regs.start[sub] + SCHARS (newtext);
2799
2800   if (case_action == all_caps)
2801     Fupcase_region (make_number (search_regs.start[sub]),
2802                     make_number (newpoint));
2803   else if (case_action == cap_initial)
2804     Fupcase_initials_region (make_number (search_regs.start[sub]),
2805                              make_number (newpoint));
2806
2807   /* Adjust search data for this change.  */
2808   {
2809     EMACS_INT oldend = search_regs.end[sub];
2810     EMACS_INT oldstart = search_regs.start[sub];
2811     EMACS_INT change = newpoint - search_regs.end[sub];
2812     int i;
2813
2814     for (i = 0; i < search_regs.num_regs; i++)
2815       {
2816         if (search_regs.start[i] >= oldend)
2817           search_regs.start[i] += change;
2818         else if (search_regs.start[i] > oldstart)
2819           search_regs.start[i] = oldstart;
2820         if (search_regs.end[i] >= oldend)
2821           search_regs.end[i] += change;
2822         else if (search_regs.end[i] > oldstart)
2823           search_regs.end[i] = oldstart;
2824       }
2825   }
2826
2827   /* Put point back where it was in the text.  */
2828   if (opoint <= 0)
2829     TEMP_SET_PT (opoint + ZV);
2830   else
2831     TEMP_SET_PT (opoint);
2832
2833   /* Now move point "officially" to the start of the inserted replacement.  */
2834   move_if_not_intangible (newpoint);
2835
2836   return Qnil;
2837 }
2838 \f
2839 static Lisp_Object
2840 match_limit (Lisp_Object num, int beginningp)
2841 {
2842   register int n;
2843
2844   CHECK_NUMBER (num);
2845   n = XINT (num);
2846   if (n < 0)
2847     args_out_of_range (num, make_number (0));
2848   if (search_regs.num_regs <= 0)
2849     error ("No match data, because no search succeeded");
2850   if (n >= search_regs.num_regs
2851       || search_regs.start[n] < 0)
2852     return Qnil;
2853   return (make_number ((beginningp) ? search_regs.start[n]
2854                                     : search_regs.end[n]));
2855 }
2856
2857 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2858        doc: /* Return position of start of text matched by last search.
2859 SUBEXP, a number, specifies which parenthesized expression in the last
2860   regexp.
2861 Value is nil if SUBEXPth pair didn't match, or there were less than
2862   SUBEXP pairs.
2863 Zero means the entire text matched by the whole regexp or whole string.  */)
2864      (subexp)
2865      Lisp_Object subexp;
2866 {
2867   return match_limit (subexp, 1);
2868 }
2869
2870 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2871        doc: /* Return position of end of text matched by last search.
2872 SUBEXP, a number, specifies which parenthesized expression in the last
2873   regexp.
2874 Value is nil if SUBEXPth pair didn't match, or there were less than
2875   SUBEXP pairs.
2876 Zero means the entire text matched by the whole regexp or whole string.  */)
2877      (subexp)
2878      Lisp_Object subexp;
2879 {
2880   return match_limit (subexp, 0);
2881 }
2882
2883 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2884        doc: /* Return a list containing all info on what the last search matched.
2885 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2886 All the elements are markers or nil (nil if the Nth pair didn't match)
2887 if the last match was on a buffer; integers or nil if a string was matched.
2888 Use `set-match-data' to reinstate the data in this list.
2889
2890 If INTEGERS (the optional first argument) is non-nil, always use
2891 integers \(rather than markers) to represent buffer positions.  In
2892 this case, and if the last match was in a buffer, the buffer will get
2893 stored as one additional element at the end of the list.
2894
2895 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2896 enough to hold all the values, and if INTEGERS is non-nil, no consing
2897 is done.
2898
2899 If optional third arg RESEAT is non-nil, any previous markers on the
2900 REUSE list will be modified to point to nowhere.
2901
2902 Return value is undefined if the last search failed.  */)
2903   (integers, reuse, reseat)
2904      Lisp_Object integers, reuse, reseat;
2905 {
2906   Lisp_Object tail, prev;
2907   Lisp_Object *data;
2908   int i, len;
2909
2910   if (!NILP (reseat))
2911     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2912       if (MARKERP (XCAR (tail)))
2913         {
2914           unchain_marker (XMARKER (XCAR (tail)));
2915           XSETCAR (tail, Qnil);
2916         }
2917
2918   if (NILP (last_thing_searched))
2919     return Qnil;
2920
2921   prev = Qnil;
2922
2923   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2924                                  * sizeof (Lisp_Object));
2925
2926   len = 0;
2927   for (i = 0; i < search_regs.num_regs; i++)
2928     {
2929       int start = search_regs.start[i];
2930       if (start >= 0)
2931         {
2932           if (EQ (last_thing_searched, Qt)
2933               || ! NILP (integers))
2934             {
2935               XSETFASTINT (data[2 * i], start);
2936               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2937             }
2938           else if (BUFFERP (last_thing_searched))
2939             {
2940               data[2 * i] = Fmake_marker ();
2941               Fset_marker (data[2 * i],
2942                            make_number (start),
2943                            last_thing_searched);
2944               data[2 * i + 1] = Fmake_marker ();
2945               Fset_marker (data[2 * i + 1],
2946                            make_number (search_regs.end[i]),
2947                            last_thing_searched);
2948             }
2949           else
2950             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2951             abort ();
2952
2953           len = 2 * i + 2;
2954         }
2955       else
2956         data[2 * i] = data[2 * i + 1] = Qnil;
2957     }
2958
2959   if (BUFFERP (last_thing_searched) && !NILP (integers))
2960     {
2961       data[len] = last_thing_searched;
2962       len++;
2963     }
2964
2965   /* If REUSE is not usable, cons up the values and return them.  */
2966   if (! CONSP (reuse))
2967     return Flist (len, data);
2968
2969   /* If REUSE is a list, store as many value elements as will fit
2970      into the elements of REUSE.  */
2971   for (i = 0, tail = reuse; CONSP (tail);
2972        i++, tail = XCDR (tail))
2973     {
2974       if (i < len)
2975         XSETCAR (tail, data[i]);
2976       else
2977         XSETCAR (tail, Qnil);
2978       prev = tail;
2979     }
2980
2981   /* If we couldn't fit all value elements into REUSE,
2982      cons up the rest of them and add them to the end of REUSE.  */
2983   if (i < len)
2984     XSETCDR (prev, Flist (len - i, data + i));
2985
2986   return reuse;
2987 }
2988
2989 /* We used to have an internal use variant of `reseat' described as:
2990
2991       If RESEAT is `evaporate', put the markers back on the free list
2992       immediately.  No other references to the markers must exist in this
2993       case, so it is used only internally on the unwind stack and
2994       save-match-data from Lisp.
2995
2996    But it was ill-conceived: those supposedly-internal markers get exposed via
2997    the undo-list, so freeing them here is unsafe.  */
2998
2999 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
3000        doc: /* Set internal data on last search match from elements of LIST.
3001 LIST should have been created by calling `match-data' previously.
3002
3003 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
3004     (list, reseat)
3005      register Lisp_Object list, reseat;
3006 {
3007   register int i;
3008   register Lisp_Object marker;
3009
3010   if (running_asynch_code)
3011     save_search_regs ();
3012
3013   CHECK_LIST (list);
3014
3015   /* Unless we find a marker with a buffer or an explicit buffer
3016      in LIST, assume that this match data came from a string.  */
3017   last_thing_searched = Qt;
3018
3019   /* Allocate registers if they don't already exist.  */
3020   {
3021     int length = XFASTINT (Flength (list)) / 2;
3022
3023     if (length > search_regs.num_regs)
3024       {
3025         if (search_regs.num_regs == 0)
3026           {
3027             search_regs.start
3028               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3029             search_regs.end
3030               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3031           }
3032         else
3033           {
3034             search_regs.start
3035               = (regoff_t *) xrealloc (search_regs.start,
3036                                        length * sizeof (regoff_t));
3037             search_regs.end
3038               = (regoff_t *) xrealloc (search_regs.end,
3039                                        length * sizeof (regoff_t));
3040           }
3041
3042         for (i = search_regs.num_regs; i < length; i++)
3043           search_regs.start[i] = -1;
3044
3045         search_regs.num_regs = length;
3046       }
3047
3048     for (i = 0; CONSP (list); i++)
3049       {
3050         marker = XCAR (list);
3051         if (BUFFERP (marker))
3052           {
3053             last_thing_searched = marker;
3054             break;
3055           }
3056         if (i >= length)
3057           break;
3058         if (NILP (marker))
3059           {
3060             search_regs.start[i] = -1;
3061             list = XCDR (list);
3062           }
3063         else
3064           {
3065             EMACS_INT from;
3066             Lisp_Object m;
3067
3068             m = marker;
3069             if (MARKERP (marker))
3070               {
3071                 if (XMARKER (marker)->buffer == 0)
3072                   XSETFASTINT (marker, 0);
3073                 else
3074                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3075               }
3076
3077             CHECK_NUMBER_COERCE_MARKER (marker);
3078             from = XINT (marker);
3079
3080             if (!NILP (reseat) && MARKERP (m))
3081               {
3082                 unchain_marker (XMARKER (m));
3083                 XSETCAR (list, Qnil);
3084               }
3085
3086             if ((list = XCDR (list), !CONSP (list)))
3087               break;
3088
3089             m = marker = XCAR (list);
3090
3091             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3092               XSETFASTINT (marker, 0);
3093
3094             CHECK_NUMBER_COERCE_MARKER (marker);
3095             search_regs.start[i] = from;
3096             search_regs.end[i] = XINT (marker);
3097
3098             if (!NILP (reseat) && MARKERP (m))
3099               {
3100                 unchain_marker (XMARKER (m));
3101                 XSETCAR (list, Qnil);
3102               }
3103           }
3104         list = XCDR (list);
3105       }
3106
3107     for (; i < search_regs.num_regs; i++)
3108       search_regs.start[i] = -1;
3109   }
3110
3111   return Qnil;
3112 }
3113
3114 /* If non-zero the match data have been saved in saved_search_regs
3115    during the execution of a sentinel or filter. */
3116 static int search_regs_saved;
3117 static struct re_registers saved_search_regs;
3118 static Lisp_Object saved_last_thing_searched;
3119
3120 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3121    if asynchronous code (filter or sentinel) is running. */
3122 static void
3123 save_search_regs (void)
3124 {
3125   if (!search_regs_saved)
3126     {
3127       saved_search_regs.num_regs = search_regs.num_regs;
3128       saved_search_regs.start = search_regs.start;
3129       saved_search_regs.end = search_regs.end;
3130       saved_last_thing_searched = last_thing_searched;
3131       last_thing_searched = Qnil;
3132       search_regs.num_regs = 0;
3133       search_regs.start = 0;
3134       search_regs.end = 0;
3135
3136       search_regs_saved = 1;
3137     }
3138 }
3139
3140 /* Called upon exit from filters and sentinels. */
3141 void
3142 restore_search_regs (void)
3143 {
3144   if (search_regs_saved)
3145     {
3146       if (search_regs.num_regs > 0)
3147         {
3148           xfree (search_regs.start);
3149           xfree (search_regs.end);
3150         }
3151       search_regs.num_regs = saved_search_regs.num_regs;
3152       search_regs.start = saved_search_regs.start;
3153       search_regs.end = saved_search_regs.end;
3154       last_thing_searched = saved_last_thing_searched;
3155       saved_last_thing_searched = Qnil;
3156       search_regs_saved = 0;
3157     }
3158 }
3159
3160 static Lisp_Object
3161 unwind_set_match_data (Lisp_Object list)
3162 {
3163   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3164   return Fset_match_data (list, Qt);
3165 }
3166
3167 /* Called to unwind protect the match data.  */
3168 void
3169 record_unwind_save_match_data (void)
3170 {
3171   record_unwind_protect (unwind_set_match_data,
3172                          Fmatch_data (Qnil, Qnil, Qnil));
3173 }
3174
3175 /* Quote a string to inactivate reg-expr chars */
3176
3177 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3178        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3179      (string)
3180      Lisp_Object string;
3181 {
3182   register unsigned char *in, *out, *end;
3183   register unsigned char *temp;
3184   int backslashes_added = 0;
3185
3186   CHECK_STRING (string);
3187
3188   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3189
3190   /* Now copy the data into the new string, inserting escapes. */
3191
3192   in = SDATA (string);
3193   end = in + SBYTES (string);
3194   out = temp;
3195
3196   for (; in != end; in++)
3197     {
3198       if (*in == '['
3199           || *in == '*' || *in == '.' || *in == '\\'
3200           || *in == '?' || *in == '+'
3201           || *in == '^' || *in == '$')
3202         *out++ = '\\', backslashes_added++;
3203       *out++ = *in;
3204     }
3205
3206   return make_specified_string (temp,
3207                                 SCHARS (string) + backslashes_added,
3208                                 out - temp,
3209                                 STRING_MULTIBYTE (string));
3210 }
3211 \f
3212 void
3213 syms_of_search (void)
3214 {
3215   register int i;
3216
3217   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3218     {
3219       searchbufs[i].buf.allocated = 100;
3220       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3221       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3222       searchbufs[i].regexp = Qnil;
3223       searchbufs[i].whitespace_regexp = Qnil;
3224       searchbufs[i].syntax_table = Qnil;
3225       staticpro (&searchbufs[i].regexp);
3226       staticpro (&searchbufs[i].whitespace_regexp);
3227       staticpro (&searchbufs[i].syntax_table);
3228       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3229     }
3230   searchbuf_head = &searchbufs[0];
3231
3232   Qsearch_failed = intern_c_string ("search-failed");
3233   staticpro (&Qsearch_failed);
3234   Qinvalid_regexp = intern_c_string ("invalid-regexp");
3235   staticpro (&Qinvalid_regexp);
3236
3237   Fput (Qsearch_failed, Qerror_conditions,
3238         pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
3239   Fput (Qsearch_failed, Qerror_message,
3240         make_pure_c_string ("Search failed"));
3241
3242   Fput (Qinvalid_regexp, Qerror_conditions,
3243         pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
3244   Fput (Qinvalid_regexp, Qerror_message,
3245         make_pure_c_string ("Invalid regexp"));
3246
3247   last_thing_searched = Qnil;
3248   staticpro (&last_thing_searched);
3249
3250   saved_last_thing_searched = Qnil;
3251   staticpro (&saved_last_thing_searched);
3252
3253   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3254       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3255 Some commands use this for user-specified regexps.
3256 Spaces that occur inside character classes or repetition operators
3257 or other such regexp constructs are not replaced with this.
3258 A value of nil (which is the normal value) means treat spaces literally.  */);
3259   Vsearch_spaces_regexp = Qnil;
3260
3261   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3262       doc: /* Internal use only.
3263 If non-nil, the primitive searching and matching functions
3264 such as `looking-at', `string-match', `re-search-forward', etc.,
3265 do not set the match data.  The proper way to use this variable
3266 is to bind it with `let' around a small expression.  */);
3267   Vinhibit_changing_match_data = Qnil;
3268
3269   defsubr (&Slooking_at);
3270   defsubr (&Sposix_looking_at);
3271   defsubr (&Sstring_match);
3272   defsubr (&Sposix_string_match);
3273   defsubr (&Ssearch_forward);
3274   defsubr (&Ssearch_backward);
3275   defsubr (&Sword_search_forward);
3276   defsubr (&Sword_search_backward);
3277   defsubr (&Sword_search_forward_lax);
3278   defsubr (&Sword_search_backward_lax);
3279   defsubr (&Sre_search_forward);
3280   defsubr (&Sre_search_backward);
3281   defsubr (&Sposix_search_forward);
3282   defsubr (&Sposix_search_backward);
3283   defsubr (&Sreplace_match);
3284   defsubr (&Smatch_beginning);
3285   defsubr (&Smatch_end);
3286   defsubr (&Smatch_data);
3287   defsubr (&Sset_match_data);
3288   defsubr (&Sregexp_quote);
3289 }
3290
3291 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3292    (do not change this comment) */