src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 Lisp_Object Vsearch_spaces_regexp;
  95
  96 /* If non-nil, the match data will not be changed during call to
  97    searching or matching functions.  This variable is for internal use
  98    only.  */
  99 Lisp_Object Vinhibit_changing_match_data;
 100
 101 static void set_search_regs (EMACS_INT, EMACS_INT);
 102 static void save_search_regs (void);
 103 static EMACS_INT simple_search (int, unsigned char *, int, int,
 104                                 Lisp_Object, EMACS_INT, EMACS_INT,
 105                                 EMACS_INT, EMACS_INT);
 106 static EMACS_INT boyer_moore (int, unsigned char *, int, int,
 107                               Lisp_Object, Lisp_Object,
 108                               EMACS_INT, EMACS_INT,
 109                               EMACS_INT, EMACS_INT, int);
 110 static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
 111                                 EMACS_INT, EMACS_INT, int, int,
 112                                 Lisp_Object, Lisp_Object, int);
 113 static void matcher_overflow (void) NO_RETURN;
 114
 115 static void
 116 matcher_overflow (void)
 117 {
 118   error ("Stack overflow in regexp matcher");
 119 }
 120
 121 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 122    PATTERN is the pattern to compile.
 123    CP is the place to put the result.
 124    TRANSLATE is a translation table for ignoring case, or nil for none.
 125    REGP is the structure that says where to store the "register"
 126    values that will result from matching this pattern.
 127    If it is 0, we should compile the pattern not to record any
 128    subexpression bounds.
 129    POSIX is nonzero if we want full backtracking (POSIX style)
 130    for this pattern.  0 means backtrack only enough to get a valid match.
 131
 132    The behavior also depends on Vsearch_spaces_regexp.  */
 133
 134 static void
 135 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
 136 {
 137   char *val;
 138   reg_syntax_t old;
 139
 140   cp->regexp = Qnil;
 141   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 142   cp->posix = posix;
 143   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 144   cp->buf.charset_unibyte = charset_unibyte;
 145   if (STRINGP (Vsearch_spaces_regexp))
 146     cp->whitespace_regexp = Vsearch_spaces_regexp;
 147   else
 148     cp->whitespace_regexp = Qnil;
 149
 150   /* rms: I think BLOCK_INPUT is not needed here any more,
 151      because regex.c defines malloc to call xmalloc.
 152      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 153      So let's turn it off.  */
 154   /*  BLOCK_INPUT;  */
 155   old = re_set_syntax (RE_SYNTAX_EMACS
 156                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 157
 158   if (STRINGP (Vsearch_spaces_regexp))
 159     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 160   else
 161     re_set_whitespace_regexp (NULL);
 162
 163   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 164                                      SBYTES (pattern), &cp->buf);
 165
 166   /* If the compiled pattern hard codes some of the contents of the
 167      syntax-table, it can only be reused with *this* syntax table.  */
 168   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 169
 170   re_set_whitespace_regexp (NULL);
 171
 172   re_set_syntax (old);
 173   /* UNBLOCK_INPUT;  */
 174   if (val)
 175     xsignal1 (Qinvalid_regexp, build_string (val));
 176
 177   cp->regexp = Fcopy_sequence (pattern);
 178 }
 179
 180 /* Shrink each compiled regexp buffer in the cache
 181    to the size actually used right now.
 182    This is called from garbage collection.  */
 183
 184 void
 185 shrink_regexp_cache (void)
 186 {
 187   struct regexp_cache *cp;
 188
 189   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 190     {
 191       cp->buf.allocated = cp->buf.used;
 192       cp->buf.buffer
 193         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 194     }
 195 }
 196
 197 /* Clear the regexp cache w.r.t. a particular syntax table,
 198    because it was changed.
 199    There is no danger of memory leak here because re_compile_pattern
 200    automagically manages the memory in each re_pattern_buffer struct,
 201    based on its `allocated' and `buffer' values.  */
 202 void
 203 clear_regexp_cache (void)
 204 {
 205   int i;
 206
 207   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 208     /* It's tempting to compare with the syntax-table we've actually changed,
 209        but it's not sufficient because char-table inheritance means that
 210        modifying one syntax-table can change others at the same time.  */
 211     if (!EQ (searchbufs[i].syntax_table, Qt))
 212       searchbufs[i].regexp = Qnil;
 213 }
 214
 215 /* Compile a regexp if necessary, but first check to see if there's one in
 216    the cache.
 217    PATTERN is the pattern to compile.
 218    TRANSLATE is a translation table for ignoring case, or nil for none.
 219    REGP is the structure that says where to store the "register"
 220    values that will result from matching this pattern.
 221    If it is 0, we should compile the pattern not to record any
 222    subexpression bounds.
 223    POSIX is nonzero if we want full backtracking (POSIX style)
 224    for this pattern.  0 means backtrack only enough to get a valid match.  */
 225
 226 struct re_pattern_buffer *
 227 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
 228 {
 229   struct regexp_cache *cp, **cpp;
 230
 231   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 232     {
 233       cp = *cpp;
 234       /* Entries are initialized to nil, and may be set to nil by
 235          compile_pattern_1 if the pattern isn't valid.  Don't apply
 236          string accessors in those cases.  However, compile_pattern_1
 237          is only applied to the cache entry we pick here to reuse.  So
 238          nil should never appear before a non-nil entry.  */
 239       if (NILP (cp->regexp))
 240         goto compile_it;
 241       if (SCHARS (cp->regexp) == SCHARS (pattern)
 242           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 243           && !NILP (Fstring_equal (cp->regexp, pattern))
 244           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 245           && cp->posix == posix
 246           && (EQ (cp->syntax_table, Qt)
 247               || EQ (cp->syntax_table, current_buffer->syntax_table))
 248           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 249           && cp->buf.charset_unibyte == charset_unibyte)
 250         break;
 251
 252       /* If we're at the end of the cache, compile into the nil cell
 253          we found, or the last (least recently used) cell with a
 254          string value.  */
 255       if (cp->next == 0)
 256         {
 257         compile_it:
 258           compile_pattern_1 (cp, pattern, translate, regp, posix);
 259           break;
 260         }
 261     }
 262
 263   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 264      either because we found it in the cache or because we just compiled it.
 265      Move it to the front of the queue to mark it as most recently used.  */
 266   *cpp = cp->next;
 267   cp->next = searchbuf_head;
 268   searchbuf_head = cp;
 269
 270   /* Advise the searching functions about the space we have allocated
 271      for register data.  */
 272   if (regp)
 273     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 274
 275   /* The compiled pattern can be used both for multibyte and unibyte
 276      target.  But, we have to tell which the pattern is used for. */
 277   cp->buf.target_multibyte = multibyte;
 278
 279   return &cp->buf;
 280 }
 281
 282 \f
 283 static Lisp_Object
 284 looking_at_1 (Lisp_Object string, int posix)
 285 {
 286   Lisp_Object val;
 287   unsigned char *p1, *p2;
 288   EMACS_INT s1, s2;
 289   register int i;
 290   struct re_pattern_buffer *bufp;
 291
 292   if (running_asynch_code)
 293     save_search_regs ();
 294
 295   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 296   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 297     = current_buffer->case_eqv_table;
 298
 299   CHECK_STRING (string);
 300   bufp = compile_pattern (string,
 301                           (NILP (Vinhibit_changing_match_data)
 302                            ? &search_regs : NULL),
 303                           (!NILP (current_buffer->case_fold_search)
 304                            ? current_buffer->case_canon_table : Qnil),
 305                           posix,
 306                           !NILP (current_buffer->enable_multibyte_characters));
 307
 308   immediate_quit = 1;
 309   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 310
 311   /* Get pointers and sizes of the two strings
 312      that make up the visible portion of the buffer. */
 313
 314   p1 = BEGV_ADDR;
 315   s1 = GPT_BYTE - BEGV_BYTE;
 316   p2 = GAP_END_ADDR;
 317   s2 = ZV_BYTE - GPT_BYTE;
 318   if (s1 < 0)
 319     {
 320       p2 = p1;
 321       s2 = ZV_BYTE - BEGV_BYTE;
 322       s1 = 0;
 323     }
 324   if (s2 < 0)
 325     {
 326       s1 = ZV_BYTE - BEGV_BYTE;
 327       s2 = 0;
 328     }
 329
 330   re_match_object = Qnil;
 331
 332   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 333                   PT_BYTE - BEGV_BYTE,
 334                   (NILP (Vinhibit_changing_match_data)
 335                    ? &search_regs : NULL),
 336                   ZV_BYTE - BEGV_BYTE);
 337   immediate_quit = 0;
 338
 339   if (i == -2)
 340     matcher_overflow ();
 341
 342   val = (0 <= i ? Qt : Qnil);
 343   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 344     for (i = 0; i < search_regs.num_regs; i++)
 345       if (search_regs.start[i] >= 0)
 346         {
 347           search_regs.start[i]
 348             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 349           search_regs.end[i]
 350             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 351         }
 352
 353   /* Set last_thing_searched only when match data is changed.  */
 354   if (NILP (Vinhibit_changing_match_data))
 355     XSETBUFFER (last_thing_searched, current_buffer);
 356
 357   return val;
 358 }
 359
 360 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 361        doc: /* Return t if text after point matches regular expression REGEXP.
 362 This function modifies the match data that `match-beginning',
 363 `match-end' and `match-data' access; save and restore the match
 364 data if you want to preserve them.  */)
 365   (Lisp_Object regexp)
 366 {
 367   return looking_at_1 (regexp, 0);
 368 }
 369
 370 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 371        doc: /* Return t if text after point matches regular expression REGEXP.
 372 Find the longest match, in accord with Posix regular expression rules.
 373 This function modifies the match data that `match-beginning',
 374 `match-end' and `match-data' access; save and restore the match
 375 data if you want to preserve them.  */)
 376   (Lisp_Object regexp)
 377 {
 378   return looking_at_1 (regexp, 1);
 379 }
 380 \f
 381 static Lisp_Object
 382 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
 383 {
 384   int val;
 385   struct re_pattern_buffer *bufp;
 386   EMACS_INT pos, pos_byte;
 387   int i;
 388
 389   if (running_asynch_code)
 390     save_search_regs ();
 391
 392   CHECK_STRING (regexp);
 393   CHECK_STRING (string);
 394
 395   if (NILP (start))
 396     pos = 0, pos_byte = 0;
 397   else
 398     {
 399       int len = SCHARS (string);
 400
 401       CHECK_NUMBER (start);
 402       pos = XINT (start);
 403       if (pos < 0 && -pos <= len)
 404         pos = len + pos;
 405       else if (0 > pos || pos > len)
 406         args_out_of_range (string, start);
 407       pos_byte = string_char_to_byte (string, pos);
 408     }
 409
 410   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 411   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 412     = current_buffer->case_eqv_table;
 413
 414   bufp = compile_pattern (regexp,
 415                           (NILP (Vinhibit_changing_match_data)
 416                            ? &search_regs : NULL),
 417                           (!NILP (current_buffer->case_fold_search)
 418                            ? current_buffer->case_canon_table : Qnil),
 419                           posix,
 420                           STRING_MULTIBYTE (string));
 421   immediate_quit = 1;
 422   re_match_object = string;
 423
 424   val = re_search (bufp, (char *) SDATA (string),
 425                    SBYTES (string), pos_byte,
 426                    SBYTES (string) - pos_byte,
 427                    (NILP (Vinhibit_changing_match_data)
 428                     ? &search_regs : NULL));
 429   immediate_quit = 0;
 430
 431   /* Set last_thing_searched only when match data is changed.  */
 432   if (NILP (Vinhibit_changing_match_data))
 433     last_thing_searched = Qt;
 434
 435   if (val == -2)
 436     matcher_overflow ();
 437   if (val < 0) return Qnil;
 438
 439   if (NILP (Vinhibit_changing_match_data))
 440     for (i = 0; i < search_regs.num_regs; i++)
 441       if (search_regs.start[i] >= 0)
 442         {
 443           search_regs.start[i]
 444             = string_byte_to_char (string, search_regs.start[i]);
 445           search_regs.end[i]
 446             = string_byte_to_char (string, search_regs.end[i]);
 447         }
 448
 449   return make_number (string_byte_to_char (string, val));
 450 }
 451
 452 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 453        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 454 Matching ignores case if `case-fold-search' is non-nil.
 455 If third arg START is non-nil, start search at that index in STRING.
 456 For index of first char beyond the match, do (match-end 0).
 457 `match-end' and `match-beginning' also give indices of substrings
 458 matched by parenthesis constructs in the pattern.
 459
 460 You can use the function `match-string' to extract the substrings
 461 matched by the parenthesis constructions in REGEXP. */)
 462   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 463 {
 464   return string_match_1 (regexp, string, start, 0);
 465 }
 466
 467 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 468        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 469 Find the longest match, in accord with Posix regular expression rules.
 470 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 471 If third arg START is non-nil, start search at that index in STRING.
 472 For index of first char beyond the match, do (match-end 0).
 473 `match-end' and `match-beginning' also give indices of substrings
 474 matched by parenthesis constructs in the pattern.  */)
 475   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 476 {
 477   return string_match_1 (regexp, string, start, 1);
 478 }
 479
 480 /* Match REGEXP against STRING, searching all of STRING,
 481    and return the index of the match, or negative on failure.
 482    This does not clobber the match data.  */
 483
 484 int
 485 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 486 {
 487   int val;
 488   struct re_pattern_buffer *bufp;
 489
 490   bufp = compile_pattern (regexp, 0, Qnil,
 491                           0, STRING_MULTIBYTE (string));
 492   immediate_quit = 1;
 493   re_match_object = string;
 494
 495   val = re_search (bufp, (char *) SDATA (string),
 496                    SBYTES (string), 0,
 497                    SBYTES (string), 0);
 498   immediate_quit = 0;
 499   return val;
 500 }
 501
 502 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 503    and return the index of the match, or negative on failure.
 504    This does not clobber the match data.
 505    We assume that STRING contains single-byte characters.  */
 506
 507 extern Lisp_Object Vascii_downcase_table;
 508
 509 int
 510 fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
 511 {
 512   int val;
 513   struct re_pattern_buffer *bufp;
 514   int len = strlen (string);
 515
 516   regexp = string_make_unibyte (regexp);
 517   re_match_object = Qt;
 518   bufp = compile_pattern (regexp, 0,
 519                           Vascii_canon_table, 0,
 520                           0);
 521   immediate_quit = 1;
 522   val = re_search (bufp, string, len, 0, len, 0);
 523   immediate_quit = 0;
 524   return val;
 525 }
 526
 527 /* Like fast_string_match but ignore case.  */
 528
 529 int
 530 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 531 {
 532   int val;
 533   struct re_pattern_buffer *bufp;
 534
 535   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 536                           0, STRING_MULTIBYTE (string));
 537   immediate_quit = 1;
 538   re_match_object = string;
 539
 540   val = re_search (bufp, (char *) SDATA (string),
 541                    SBYTES (string), 0,
 542                    SBYTES (string), 0);
 543   immediate_quit = 0;
 544   return val;
 545 }
 546 \f
 547 /* Match REGEXP against the characters after POS to LIMIT, and return
 548    the number of matched characters.  If STRING is non-nil, match
 549    against the characters in it.  In that case, POS and LIMIT are
 550    indices into the string.  This function doesn't modify the match
 551    data.  */
 552
 553 EMACS_INT
 554 fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
 555 {
 556   int multibyte;
 557   struct re_pattern_buffer *buf;
 558   unsigned char *p1, *p2;
 559   EMACS_INT s1, s2;
 560   EMACS_INT len;
 561
 562   if (STRINGP (string))
 563     {
 564       if (pos_byte < 0)
 565         pos_byte = string_char_to_byte (string, pos);
 566       if (limit_byte < 0)
 567         limit_byte = string_char_to_byte (string, limit);
 568       p1 = NULL;
 569       s1 = 0;
 570       p2 = SDATA (string);
 571       s2 = SBYTES (string);
 572       re_match_object = string;
 573       multibyte = STRING_MULTIBYTE (string);
 574     }
 575   else
 576     {
 577       if (pos_byte < 0)
 578         pos_byte = CHAR_TO_BYTE (pos);
 579       if (limit_byte < 0)
 580         limit_byte = CHAR_TO_BYTE (limit);
 581       pos_byte -= BEGV_BYTE;
 582       limit_byte -= BEGV_BYTE;
 583       p1 = BEGV_ADDR;
 584       s1 = GPT_BYTE - BEGV_BYTE;
 585       p2 = GAP_END_ADDR;
 586       s2 = ZV_BYTE - GPT_BYTE;
 587       if (s1 < 0)
 588         {
 589           p2 = p1;
 590           s2 = ZV_BYTE - BEGV_BYTE;
 591           s1 = 0;
 592         }
 593       if (s2 < 0)
 594         {
 595           s1 = ZV_BYTE - BEGV_BYTE;
 596           s2 = 0;
 597         }
 598       re_match_object = Qnil;
 599       multibyte = ! NILP (current_buffer->enable_multibyte_characters);
 600     }
 601
 602   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 603   immediate_quit = 1;
 604   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 605                     pos_byte, NULL, limit_byte);
 606   immediate_quit = 0;
 607
 608   return len;
 609 }
 610
 611 \f
 612 /* The newline cache: remembering which sections of text have no newlines.  */
 613
 614 /* If the user has requested newline caching, make sure it's on.
 615    Otherwise, make sure it's off.
 616    This is our cheezy way of associating an action with the change of
 617    state of a buffer-local variable.  */
 618 static void
 619 newline_cache_on_off (struct buffer *buf)
 620 {
 621   if (NILP (buf->cache_long_line_scans))
 622     {
 623       /* It should be off.  */
 624       if (buf->newline_cache)
 625         {
 626           free_region_cache (buf->newline_cache);
 627           buf->newline_cache = 0;
 628         }
 629     }
 630   else
 631     {
 632       /* It should be on.  */
 633       if (buf->newline_cache == 0)
 634         buf->newline_cache = new_region_cache ();
 635     }
 636 }
 637
 638 \f
 639 /* Search for COUNT instances of the character TARGET between START and END.
 640
 641    If COUNT is positive, search forwards; END must be >= START.
 642    If COUNT is negative, search backwards for the -COUNTth instance;
 643       END must be <= START.
 644    If COUNT is zero, do anything you please; run rogue, for all I care.
 645
 646    If END is zero, use BEGV or ZV instead, as appropriate for the
 647    direction indicated by COUNT.
 648
 649    If we find COUNT instances, set *SHORTAGE to zero, and return the
 650    position past the COUNTth match.  Note that for reverse motion
 651    this is not the same as the usual convention for Emacs motion commands.
 652
 653    If we don't find COUNT instances before reaching END, set *SHORTAGE
 654    to the number of TARGETs left unfound, and return END.
 655
 656    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 657    except when inside redisplay.  */
 658
 659 int
 660 scan_buffer (register int target, EMACS_INT start, EMACS_INT end, int count, int *shortage, int allow_quit)
 661 {
 662   struct region_cache *newline_cache;
 663   int direction;
 664
 665   if (count > 0)
 666     {
 667       direction = 1;
 668       if (! end) end = ZV;
 669     }
 670   else
 671     {
 672       direction = -1;
 673       if (! end) end = BEGV;
 674     }
 675
 676   newline_cache_on_off (current_buffer);
 677   newline_cache = current_buffer->newline_cache;
 678
 679   if (shortage != 0)
 680     *shortage = 0;
 681
 682   immediate_quit = allow_quit;
 683
 684   if (count > 0)
 685     while (start != end)
 686       {
 687         /* Our innermost scanning loop is very simple; it doesn't know
 688            about gaps, buffer ends, or the newline cache.  ceiling is
 689            the position of the last character before the next such
 690            obstacle --- the last character the dumb search loop should
 691            examine.  */
 692         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
 693         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 694         EMACS_INT tem;
 695
 696         /* If we're looking for a newline, consult the newline cache
 697            to see where we can avoid some scanning.  */
 698         if (target == '\n' && newline_cache)
 699           {
 700             int next_change;
 701             immediate_quit = 0;
 702             while (region_cache_forward
 703                    (current_buffer, newline_cache, start_byte, &next_change))
 704               start_byte = next_change;
 705             immediate_quit = allow_quit;
 706
 707             /* START should never be after END.  */
 708             if (start_byte > ceiling_byte)
 709               start_byte = ceiling_byte;
 710
 711             /* Now the text after start is an unknown region, and
 712                next_change is the position of the next known region. */
 713             ceiling_byte = min (next_change - 1, ceiling_byte);
 714           }
 715
 716         /* The dumb loop can only scan text stored in contiguous
 717            bytes. BUFFER_CEILING_OF returns the last character
 718            position that is contiguous, so the ceiling is the
 719            position after that.  */
 720         tem = BUFFER_CEILING_OF (start_byte);
 721         ceiling_byte = min (tem, ceiling_byte);
 722
 723         {
 724           /* The termination address of the dumb loop.  */
 725           register unsigned char *ceiling_addr
 726             = BYTE_POS_ADDR (ceiling_byte) + 1;
 727           register unsigned char *cursor
 728             = BYTE_POS_ADDR (start_byte);
 729           unsigned char *base = cursor;
 730
 731           while (cursor < ceiling_addr)
 732             {
 733               unsigned char *scan_start = cursor;
 734
 735               /* The dumb loop.  */
 736               while (*cursor != target && ++cursor < ceiling_addr)
 737                 ;
 738
 739               /* If we're looking for newlines, cache the fact that
 740                  the region from start to cursor is free of them. */
 741               if (target == '\n' && newline_cache)
 742                 know_region_cache (current_buffer, newline_cache,
 743                                    start_byte + scan_start - base,
 744                                    start_byte + cursor - base);
 745
 746               /* Did we find the target character?  */
 747               if (cursor < ceiling_addr)
 748                 {
 749                   if (--count == 0)
 750                     {
 751                       immediate_quit = 0;
 752                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 753                     }
 754                   cursor++;
 755                 }
 756             }
 757
 758           start = BYTE_TO_CHAR (start_byte + cursor - base);
 759         }
 760       }
 761   else
 762     while (start > end)
 763       {
 764         /* The last character to check before the next obstacle.  */
 765         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
 766         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 767         EMACS_INT tem;
 768
 769         /* Consult the newline cache, if appropriate.  */
 770         if (target == '\n' && newline_cache)
 771           {
 772             int next_change;
 773             immediate_quit = 0;
 774             while (region_cache_backward
 775                    (current_buffer, newline_cache, start_byte, &next_change))
 776               start_byte = next_change;
 777             immediate_quit = allow_quit;
 778
 779             /* Start should never be at or before end.  */
 780             if (start_byte <= ceiling_byte)
 781               start_byte = ceiling_byte + 1;
 782
 783             /* Now the text before start is an unknown region, and
 784                next_change is the position of the next known region. */
 785             ceiling_byte = max (next_change, ceiling_byte);
 786           }
 787
 788         /* Stop scanning before the gap.  */
 789         tem = BUFFER_FLOOR_OF (start_byte - 1);
 790         ceiling_byte = max (tem, ceiling_byte);
 791
 792         {
 793           /* The termination address of the dumb loop.  */
 794           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 795           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 796           unsigned char *base = cursor;
 797
 798           while (cursor >= ceiling_addr)
 799             {
 800               unsigned char *scan_start = cursor;
 801
 802               while (*cursor != target && --cursor >= ceiling_addr)
 803                 ;
 804
 805               /* If we're looking for newlines, cache the fact that
 806                  the region from after the cursor to start is free of them.  */
 807               if (target == '\n' && newline_cache)
 808                 know_region_cache (current_buffer, newline_cache,
 809                                    start_byte + cursor - base,
 810                                    start_byte + scan_start - base);
 811
 812               /* Did we find the target character?  */
 813               if (cursor >= ceiling_addr)
 814                 {
 815                   if (++count >= 0)
 816                     {
 817                       immediate_quit = 0;
 818                       return BYTE_TO_CHAR (start_byte + cursor - base);
 819                     }
 820                   cursor--;
 821                 }
 822             }
 823
 824           start = BYTE_TO_CHAR (start_byte + cursor - base);
 825         }
 826       }
 827
 828   immediate_quit = 0;
 829   if (shortage != 0)
 830     *shortage = count * direction;
 831   return start;
 832 }
 833 \f
 834 /* Search for COUNT instances of a line boundary, which means either a
 835    newline or (if selective display enabled) a carriage return.
 836    Start at START.  If COUNT is negative, search backwards.
 837
 838    We report the resulting position by calling TEMP_SET_PT_BOTH.
 839
 840    If we find COUNT instances. we position after (always after,
 841    even if scanning backwards) the COUNTth match, and return 0.
 842
 843    If we don't find COUNT instances before reaching the end of the
 844    buffer (or the beginning, if scanning backwards), we return
 845    the number of line boundaries left unfound, and position at
 846    the limit we bumped up against.
 847
 848    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 849    except in special cases.  */
 850
 851 int
 852 scan_newline (EMACS_INT start, EMACS_INT start_byte, EMACS_INT limit, EMACS_INT limit_byte, register int count, int allow_quit)
 853 {
 854   int direction = ((count > 0) ? 1 : -1);
 855
 856   register unsigned char *cursor;
 857   unsigned char *base;
 858
 859   EMACS_INT ceiling;
 860   register unsigned char *ceiling_addr;
 861
 862   int old_immediate_quit = immediate_quit;
 863
 864   /* The code that follows is like scan_buffer
 865      but checks for either newline or carriage return.  */
 866
 867   if (allow_quit)
 868     immediate_quit++;
 869
 870   start_byte = CHAR_TO_BYTE (start);
 871
 872   if (count > 0)
 873     {
 874       while (start_byte < limit_byte)
 875         {
 876           ceiling =  BUFFER_CEILING_OF (start_byte);
 877           ceiling = min (limit_byte - 1, ceiling);
 878           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 879           base = (cursor = BYTE_POS_ADDR (start_byte));
 880           while (1)
 881             {
 882               while (*cursor != '\n' && ++cursor != ceiling_addr)
 883                 ;
 884
 885               if (cursor != ceiling_addr)
 886                 {
 887                   if (--count == 0)
 888                     {
 889                       immediate_quit = old_immediate_quit;
 890                       start_byte = start_byte + cursor - base + 1;
 891                       start = BYTE_TO_CHAR (start_byte);
 892                       TEMP_SET_PT_BOTH (start, start_byte);
 893                       return 0;
 894                     }
 895                   else
 896                     if (++cursor == ceiling_addr)
 897                       break;
 898                 }
 899               else
 900                 break;
 901             }
 902           start_byte += cursor - base;
 903         }
 904     }
 905   else
 906     {
 907       while (start_byte > limit_byte)
 908         {
 909           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 910           ceiling = max (limit_byte, ceiling);
 911           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 912           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 913           while (1)
 914             {
 915               while (--cursor != ceiling_addr && *cursor != '\n')
 916                 ;
 917
 918               if (cursor != ceiling_addr)
 919                 {
 920                   if (++count == 0)
 921                     {
 922                       immediate_quit = old_immediate_quit;
 923                       /* Return the position AFTER the match we found.  */
 924                       start_byte = start_byte + cursor - base + 1;
 925                       start = BYTE_TO_CHAR (start_byte);
 926                       TEMP_SET_PT_BOTH (start, start_byte);
 927                       return 0;
 928                     }
 929                 }
 930               else
 931                 break;
 932             }
 933           /* Here we add 1 to compensate for the last decrement
 934              of CURSOR, which took it past the valid range.  */
 935           start_byte += cursor - base + 1;
 936         }
 937     }
 938
 939   TEMP_SET_PT_BOTH (limit, limit_byte);
 940   immediate_quit = old_immediate_quit;
 941
 942   return count * direction;
 943 }
 944
 945 int
 946 find_next_newline_no_quit (EMACS_INT from, int cnt)
 947 {
 948   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 949 }
 950
 951 /* Like find_next_newline, but returns position before the newline,
 952    not after, and only search up to TO.  This isn't just
 953    find_next_newline (...)-1, because you might hit TO.  */
 954
 955 int
 956 find_before_next_newline (EMACS_INT from, EMACS_INT to, int cnt)
 957 {
 958   int shortage;
 959   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 960
 961   if (shortage == 0)
 962     pos--;
 963
 964   return pos;
 965 }
 966 \f
 967 /* Subroutines of Lisp buffer search functions. */
 968
 969 static Lisp_Object
 970 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count, int direction, int RE, int posix)
 971 {
 972   register int np;
 973   int lim, lim_byte;
 974   int n = direction;
 975
 976   if (!NILP (count))
 977     {
 978       CHECK_NUMBER (count);
 979       n *= XINT (count);
 980     }
 981
 982   CHECK_STRING (string);
 983   if (NILP (bound))
 984     {
 985       if (n > 0)
 986         lim = ZV, lim_byte = ZV_BYTE;
 987       else
 988         lim = BEGV, lim_byte = BEGV_BYTE;
 989     }
 990   else
 991     {
 992       CHECK_NUMBER_COERCE_MARKER (bound);
 993       lim = XINT (bound);
 994       if (n > 0 ? lim < PT : lim > PT)
 995         error ("Invalid search bound (wrong side of point)");
 996       if (lim > ZV)
 997         lim = ZV, lim_byte = ZV_BYTE;
 998       else if (lim < BEGV)
 999         lim = BEGV, lim_byte = BEGV_BYTE;
1000       else
1001         lim_byte = CHAR_TO_BYTE (lim);
1002     }
1003
1004   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1005   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1006     = current_buffer->case_eqv_table;
1007
1008   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1009                       (!NILP (current_buffer->case_fold_search)
1010                        ? current_buffer->case_canon_table
1011                        : Qnil),
1012                       (!NILP (current_buffer->case_fold_search)
1013                        ? current_buffer->case_eqv_table
1014                        : Qnil),
1015                       posix);
1016   if (np <= 0)
1017     {
1018       if (NILP (noerror))
1019         xsignal1 (Qsearch_failed, string);
1020
1021       if (!EQ (noerror, Qt))
1022         {
1023           if (lim < BEGV || lim > ZV)
1024             abort ();
1025           SET_PT_BOTH (lim, lim_byte);
1026           return Qnil;
1027 #if 0 /* This would be clean, but maybe programs depend on
1028          a value of nil here.  */
1029           np = lim;
1030 #endif
1031         }
1032       else
1033         return Qnil;
1034     }
1035
1036   if (np < BEGV || np > ZV)
1037     abort ();
1038
1039   SET_PT (np);
1040
1041   return make_number (np);
1042 }
1043 \f
1044 /* Return 1 if REGEXP it matches just one constant string.  */
1045
1046 static int
1047 trivial_regexp_p (Lisp_Object regexp)
1048 {
1049   int len = SBYTES (regexp);
1050   unsigned char *s = SDATA (regexp);
1051   while (--len >= 0)
1052     {
1053       switch (*s++)
1054         {
1055         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1056           return 0;
1057         case '\\':
1058           if (--len < 0)
1059             return 0;
1060           switch (*s++)
1061             {
1062             case '|': case '(': case ')': case '`': case '\'': case 'b':
1063             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1064             case 'S': case '=': case '{': case '}': case '_':
1065             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1066             case '1': case '2': case '3': case '4': case '5':
1067             case '6': case '7': case '8': case '9':
1068               return 0;
1069             }
1070         }
1071     }
1072   return 1;
1073 }
1074
1075 /* Search for the n'th occurrence of STRING in the current buffer,
1076    starting at position POS and stopping at position LIM,
1077    treating STRING as a literal string if RE is false or as
1078    a regular expression if RE is true.
1079
1080    If N is positive, searching is forward and LIM must be greater than POS.
1081    If N is negative, searching is backward and LIM must be less than POS.
1082
1083    Returns -x if x occurrences remain to be found (x > 0),
1084    or else the position at the beginning of the Nth occurrence
1085    (if searching backward) or the end (if searching forward).
1086
1087    POSIX is nonzero if we want full backtracking (POSIX style)
1088    for this pattern.  0 means backtrack only enough to get a valid match.  */
1089
1090 #define TRANSLATE(out, trt, d)                  \
1091 do                                              \
1092   {                                             \
1093     if (! NILP (trt))                           \
1094       {                                         \
1095         Lisp_Object temp;                       \
1096         temp = Faref (trt, make_number (d));    \
1097         if (INTEGERP (temp))                    \
1098           out = XINT (temp);                    \
1099         else                                    \
1100           out = d;                              \
1101       }                                         \
1102     else                                        \
1103       out = d;                                  \
1104   }                                             \
1105 while (0)
1106
1107 /* Only used in search_buffer, to record the end position of the match
1108    when searching regexps and SEARCH_REGS should not be changed
1109    (i.e. Vinhibit_changing_match_data is non-nil).  */
1110 static struct re_registers search_regs_1;
1111
1112 static EMACS_INT
1113 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1114                RE, trt, inverse_trt, posix)
1115      Lisp_Object string;
1116      EMACS_INT pos;
1117      EMACS_INT pos_byte;
1118      EMACS_INT lim;
1119      EMACS_INT lim_byte;
1120      int n;
1121      int RE;
1122      Lisp_Object trt;
1123      Lisp_Object inverse_trt;
1124      int posix;
1125 {
1126   int len = SCHARS (string);
1127   int len_byte = SBYTES (string);
1128   register int i;
1129
1130   if (running_asynch_code)
1131     save_search_regs ();
1132
1133   /* Searching 0 times means don't move.  */
1134   /* Null string is found at starting position.  */
1135   if (len == 0 || n == 0)
1136     {
1137       set_search_regs (pos_byte, 0);
1138       return pos;
1139     }
1140
1141   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1142     {
1143       unsigned char *p1, *p2;
1144       int s1, s2;
1145       struct re_pattern_buffer *bufp;
1146
1147       bufp = compile_pattern (string,
1148                               (NILP (Vinhibit_changing_match_data)
1149                                ? &search_regs : &search_regs_1),
1150                               trt, posix,
1151                               !NILP (current_buffer->enable_multibyte_characters));
1152
1153       immediate_quit = 1;       /* Quit immediately if user types ^G,
1154                                    because letting this function finish
1155                                    can take too long. */
1156       QUIT;                     /* Do a pending quit right away,
1157                                    to avoid paradoxical behavior */
1158       /* Get pointers and sizes of the two strings
1159          that make up the visible portion of the buffer. */
1160
1161       p1 = BEGV_ADDR;
1162       s1 = GPT_BYTE - BEGV_BYTE;
1163       p2 = GAP_END_ADDR;
1164       s2 = ZV_BYTE - GPT_BYTE;
1165       if (s1 < 0)
1166         {
1167           p2 = p1;
1168           s2 = ZV_BYTE - BEGV_BYTE;
1169           s1 = 0;
1170         }
1171       if (s2 < 0)
1172         {
1173           s1 = ZV_BYTE - BEGV_BYTE;
1174           s2 = 0;
1175         }
1176       re_match_object = Qnil;
1177
1178       while (n < 0)
1179         {
1180           int val;
1181           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1182                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1183                              (NILP (Vinhibit_changing_match_data)
1184                               ? &search_regs : &search_regs_1),
1185                              /* Don't allow match past current point */
1186                              pos_byte - BEGV_BYTE);
1187           if (val == -2)
1188             {
1189               matcher_overflow ();
1190             }
1191           if (val >= 0)
1192             {
1193               if (NILP (Vinhibit_changing_match_data))
1194                 {
1195                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1196                   for (i = 0; i < search_regs.num_regs; i++)
1197                     if (search_regs.start[i] >= 0)
1198                       {
1199                         search_regs.start[i]
1200                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1201                         search_regs.end[i]
1202                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1203                       }
1204                   XSETBUFFER (last_thing_searched, current_buffer);
1205                   /* Set pos to the new position. */
1206                   pos = search_regs.start[0];
1207                 }
1208               else
1209                 {
1210                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1211                   /* Set pos to the new position.  */
1212                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1213                 }
1214             }
1215           else
1216             {
1217               immediate_quit = 0;
1218               return (n);
1219             }
1220           n++;
1221         }
1222       while (n > 0)
1223         {
1224           int val;
1225           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1226                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1227                              (NILP (Vinhibit_changing_match_data)
1228                               ? &search_regs : &search_regs_1),
1229                              lim_byte - BEGV_BYTE);
1230           if (val == -2)
1231             {
1232               matcher_overflow ();
1233             }
1234           if (val >= 0)
1235             {
1236               if (NILP (Vinhibit_changing_match_data))
1237                 {
1238                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1239                   for (i = 0; i < search_regs.num_regs; i++)
1240                     if (search_regs.start[i] >= 0)
1241                       {
1242                         search_regs.start[i]
1243                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1244                         search_regs.end[i]
1245                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1246                       }
1247                   XSETBUFFER (last_thing_searched, current_buffer);
1248                   pos = search_regs.end[0];
1249                 }
1250               else
1251                 {
1252                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1253                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1254                 }
1255             }
1256           else
1257             {
1258               immediate_quit = 0;
1259               return (0 - n);
1260             }
1261           n--;
1262         }
1263       immediate_quit = 0;
1264       return (pos);
1265     }
1266   else                          /* non-RE case */
1267     {
1268       unsigned char *raw_pattern, *pat;
1269       int raw_pattern_size;
1270       int raw_pattern_size_byte;
1271       unsigned char *patbuf;
1272       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1273       unsigned char *base_pat;
1274       /* Set to positive if we find a non-ASCII char that need
1275          translation.  Otherwise set to zero later.  */
1276       int char_base = -1;
1277       int boyer_moore_ok = 1;
1278
1279       /* MULTIBYTE says whether the text to be searched is multibyte.
1280          We must convert PATTERN to match that, or we will not really
1281          find things right.  */
1282
1283       if (multibyte == STRING_MULTIBYTE (string))
1284         {
1285           raw_pattern = (unsigned char *) SDATA (string);
1286           raw_pattern_size = SCHARS (string);
1287           raw_pattern_size_byte = SBYTES (string);
1288         }
1289       else if (multibyte)
1290         {
1291           raw_pattern_size = SCHARS (string);
1292           raw_pattern_size_byte
1293             = count_size_as_multibyte (SDATA (string),
1294                                        raw_pattern_size);
1295           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1296           copy_text (SDATA (string), raw_pattern,
1297                      SCHARS (string), 0, 1);
1298         }
1299       else
1300         {
1301           /* Converting multibyte to single-byte.
1302
1303              ??? Perhaps this conversion should be done in a special way
1304              by subtracting nonascii-insert-offset from each non-ASCII char,
1305              so that only the multibyte chars which really correspond to
1306              the chosen single-byte character set can possibly match.  */
1307           raw_pattern_size = SCHARS (string);
1308           raw_pattern_size_byte = SCHARS (string);
1309           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1310           copy_text (SDATA (string), raw_pattern,
1311                      SBYTES (string), 1, 0);
1312         }
1313
1314       /* Copy and optionally translate the pattern.  */
1315       len = raw_pattern_size;
1316       len_byte = raw_pattern_size_byte;
1317       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1318       pat = patbuf;
1319       base_pat = raw_pattern;
1320       if (multibyte)
1321         {
1322           /* Fill patbuf by translated characters in STRING while
1323              checking if we can use boyer-moore search.  If TRT is
1324              non-nil, we can use boyer-moore search only if TRT can be
1325              represented by the byte array of 256 elements.  For that,
1326              all non-ASCII case-equivalents of all case-senstive
1327              characters in STRING must belong to the same charset and
1328              row.  */
1329
1330           while (--len >= 0)
1331             {
1332               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1333               int c, translated, inverse;
1334               int in_charlen, charlen;
1335
1336               /* If we got here and the RE flag is set, it's because we're
1337                  dealing with a regexp known to be trivial, so the backslash
1338                  just quotes the next character.  */
1339               if (RE && *base_pat == '\\')
1340                 {
1341                   len--;
1342                   raw_pattern_size--;
1343                   len_byte--;
1344                   base_pat++;
1345                 }
1346
1347               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1348
1349               if (NILP (trt))
1350                 {
1351                   str = base_pat;
1352                   charlen = in_charlen;
1353                 }
1354               else
1355                 {
1356                   /* Translate the character.  */
1357                   TRANSLATE (translated, trt, c);
1358                   charlen = CHAR_STRING (translated, str_base);
1359                   str = str_base;
1360
1361                   /* Check if C has any other case-equivalents.  */
1362                   TRANSLATE (inverse, inverse_trt, c);
1363                   /* If so, check if we can use boyer-moore.  */
1364                   if (c != inverse && boyer_moore_ok)
1365                     {
1366                       /* Check if all equivalents belong to the same
1367                          group of characters.  Note that the check of C
1368                          itself is done by the last iteration.  */
1369                       int this_char_base = -1;
1370
1371                       while (boyer_moore_ok)
1372                         {
1373                           if (ASCII_BYTE_P (inverse))
1374                             {
1375                               if (this_char_base > 0)
1376                                 boyer_moore_ok = 0;
1377                               else
1378                                 this_char_base = 0;
1379                             }
1380                           else if (CHAR_BYTE8_P (inverse))
1381                             /* Boyer-moore search can't handle a
1382                                translation of an eight-bit
1383                                character.  */
1384                             boyer_moore_ok = 0;
1385                           else if (this_char_base < 0)
1386                             {
1387                               this_char_base = inverse & ~0x3F;
1388                               if (char_base < 0)
1389                                 char_base = this_char_base;
1390                               else if (this_char_base != char_base)
1391                                 boyer_moore_ok = 0;
1392                             }
1393                           else if ((inverse & ~0x3F) != this_char_base)
1394                             boyer_moore_ok = 0;
1395                           if (c == inverse)
1396                             break;
1397                           TRANSLATE (inverse, inverse_trt, inverse);
1398                         }
1399                     }
1400                 }
1401
1402               /* Store this character into the translated pattern.  */
1403               memcpy (pat, str, charlen);
1404               pat += charlen;
1405               base_pat += in_charlen;
1406               len_byte -= in_charlen;
1407             }
1408
1409           /* If char_base is still negative we didn't find any translated
1410              non-ASCII characters.  */
1411           if (char_base < 0)
1412             char_base = 0;
1413         }
1414       else
1415         {
1416           /* Unibyte buffer.  */
1417           char_base = 0;
1418           while (--len >= 0)
1419             {
1420               int c, translated;
1421
1422               /* If we got here and the RE flag is set, it's because we're
1423                  dealing with a regexp known to be trivial, so the backslash
1424                  just quotes the next character.  */
1425               if (RE && *base_pat == '\\')
1426                 {
1427                   len--;
1428                   raw_pattern_size--;
1429                   base_pat++;
1430                 }
1431               c = *base_pat++;
1432               TRANSLATE (translated, trt, c);
1433               *pat++ = translated;
1434             }
1435         }
1436
1437       len_byte = pat - patbuf;
1438       len = raw_pattern_size;
1439       pat = base_pat = patbuf;
1440
1441       if (boyer_moore_ok)
1442         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1443                             pos, pos_byte, lim, lim_byte,
1444                             char_base);
1445       else
1446         return simple_search (n, pat, len, len_byte, trt,
1447                               pos, pos_byte, lim, lim_byte);
1448     }
1449 }
1450 \f
1451 /* Do a simple string search N times for the string PAT,
1452    whose length is LEN/LEN_BYTE,
1453    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1454    TRT is the translation table.
1455
1456    Return the character position where the match is found.
1457    Otherwise, if M matches remained to be found, return -M.
1458
1459    This kind of search works regardless of what is in PAT and
1460    regardless of what is in TRT.  It is used in cases where
1461    boyer_moore cannot work.  */
1462
1463 static EMACS_INT
1464 simple_search (int n, unsigned char *pat, int len, int len_byte, Lisp_Object trt, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT lim, EMACS_INT lim_byte)
1465 {
1466   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1467   int forward = n > 0;
1468   /* Number of buffer bytes matched.  Note that this may be different
1469      from len_byte in a multibyte buffer.  */
1470   int match_byte;
1471
1472   if (lim > pos && multibyte)
1473     while (n > 0)
1474       {
1475         while (1)
1476           {
1477             /* Try matching at position POS.  */
1478             EMACS_INT this_pos = pos;
1479             EMACS_INT this_pos_byte = pos_byte;
1480             int this_len = len;
1481             unsigned char *p = pat;
1482             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1483               goto stop;
1484
1485             while (this_len > 0)
1486               {
1487                 int charlen, buf_charlen;
1488                 int pat_ch, buf_ch;
1489
1490                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1491                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1492                                                  buf_charlen);
1493                 TRANSLATE (buf_ch, trt, buf_ch);
1494
1495                 if (buf_ch != pat_ch)
1496                   break;
1497
1498                 this_len--;
1499                 p += charlen;
1500
1501                 this_pos_byte += buf_charlen;
1502                 this_pos++;
1503               }
1504
1505             if (this_len == 0)
1506               {
1507                 match_byte = this_pos_byte - pos_byte;
1508                 pos += len;
1509                 pos_byte += match_byte;
1510                 break;
1511               }
1512
1513             INC_BOTH (pos, pos_byte);
1514           }
1515
1516         n--;
1517       }
1518   else if (lim > pos)
1519     while (n > 0)
1520       {
1521         while (1)
1522           {
1523             /* Try matching at position POS.  */
1524             EMACS_INT this_pos = pos;
1525             int this_len = len;
1526             unsigned char *p = pat;
1527
1528             if (pos + len > lim)
1529               goto stop;
1530
1531             while (this_len > 0)
1532               {
1533                 int pat_ch = *p++;
1534                 int buf_ch = FETCH_BYTE (this_pos);
1535                 TRANSLATE (buf_ch, trt, buf_ch);
1536
1537                 if (buf_ch != pat_ch)
1538                   break;
1539
1540                 this_len--;
1541                 this_pos++;
1542               }
1543
1544             if (this_len == 0)
1545               {
1546                 match_byte = len;
1547                 pos += len;
1548                 break;
1549               }
1550
1551             pos++;
1552           }
1553
1554         n--;
1555       }
1556   /* Backwards search.  */
1557   else if (lim < pos && multibyte)
1558     while (n < 0)
1559       {
1560         while (1)
1561           {
1562             /* Try matching at position POS.  */
1563             EMACS_INT this_pos = pos;
1564             EMACS_INT this_pos_byte = pos_byte;
1565             int this_len = len;
1566             const unsigned char *p = pat + len_byte;
1567
1568             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1569               goto stop;
1570
1571             while (this_len > 0)
1572               {
1573                 int charlen;
1574                 int pat_ch, buf_ch;
1575
1576                 DEC_BOTH (this_pos, this_pos_byte);
1577                 PREV_CHAR_BOUNDARY (p, pat);
1578                 pat_ch = STRING_CHAR (p);
1579                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1580                 TRANSLATE (buf_ch, trt, buf_ch);
1581
1582                 if (buf_ch != pat_ch)
1583                   break;
1584
1585                 this_len--;
1586               }
1587
1588             if (this_len == 0)
1589               {
1590                 match_byte = pos_byte - this_pos_byte;
1591                 pos = this_pos;
1592                 pos_byte = this_pos_byte;
1593                 break;
1594               }
1595
1596             DEC_BOTH (pos, pos_byte);
1597           }
1598
1599         n++;
1600       }
1601   else if (lim < pos)
1602     while (n < 0)
1603       {
1604         while (1)
1605           {
1606             /* Try matching at position POS.  */
1607             EMACS_INT this_pos = pos - len;
1608             int this_len = len;
1609             unsigned char *p = pat;
1610
1611             if (this_pos < lim)
1612               goto stop;
1613
1614             while (this_len > 0)
1615               {
1616                 int pat_ch = *p++;
1617                 int buf_ch = FETCH_BYTE (this_pos);
1618                 TRANSLATE (buf_ch, trt, buf_ch);
1619
1620                 if (buf_ch != pat_ch)
1621                   break;
1622                 this_len--;
1623                 this_pos++;
1624               }
1625
1626             if (this_len == 0)
1627               {
1628                 match_byte = len;
1629                 pos -= len;
1630                 break;
1631               }
1632
1633             pos--;
1634           }
1635
1636         n++;
1637       }
1638
1639  stop:
1640   if (n == 0)
1641     {
1642       if (forward)
1643         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1644       else
1645         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1646
1647       return pos;
1648     }
1649   else if (n > 0)
1650     return -n;
1651   else
1652     return n;
1653 }
1654 \f
1655 /* Do Boyer-Moore search N times for the string BASE_PAT,
1656    whose length is LEN/LEN_BYTE,
1657    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1658    DIRECTION says which direction we search in.
1659    TRT and INVERSE_TRT are translation tables.
1660    Characters in PAT are already translated by TRT.
1661
1662    This kind of search works if all the characters in BASE_PAT that
1663    have nontrivial translation are the same aside from the last byte.
1664    This makes it possible to translate just the last byte of a
1665    character, and do so after just a simple test of the context.
1666    CHAR_BASE is nonzero if there is such a non-ASCII character.
1667
1668    If that criterion is not satisfied, do not call this function.  */
1669
1670 static EMACS_INT
1671 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1672              pos, pos_byte, lim, lim_byte, char_base)
1673      int n;
1674      unsigned char *base_pat;
1675      int len, len_byte;
1676      Lisp_Object trt;
1677      Lisp_Object inverse_trt;
1678      EMACS_INT pos, pos_byte;
1679      EMACS_INT lim, lim_byte;
1680      int char_base;
1681 {
1682   int direction = ((n > 0) ? 1 : -1);
1683   register int dirlen;
1684   EMACS_INT limit;
1685   int stride_for_teases = 0;
1686   int BM_tab[0400];
1687   register unsigned char *cursor, *p_limit;
1688   register int i, j;
1689   unsigned char *pat, *pat_end;
1690   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1691
1692   unsigned char simple_translate[0400];
1693   /* These are set to the preceding bytes of a byte to be translated
1694      if char_base is nonzero.  As the maximum byte length of a
1695      multibyte character is 5, we have to check at most four previous
1696      bytes.  */
1697   int translate_prev_byte1 = 0;
1698   int translate_prev_byte2 = 0;
1699   int translate_prev_byte3 = 0;
1700   int translate_prev_byte4 = 0;
1701
1702   /* The general approach is that we are going to maintain that we know
1703      the first (closest to the present position, in whatever direction
1704      we're searching) character that could possibly be the last
1705      (furthest from present position) character of a valid match.  We
1706      advance the state of our knowledge by looking at that character
1707      and seeing whether it indeed matches the last character of the
1708      pattern.  If it does, we take a closer look.  If it does not, we
1709      move our pointer (to putative last characters) as far as is
1710      logically possible.  This amount of movement, which I call a
1711      stride, will be the length of the pattern if the actual character
1712      appears nowhere in the pattern, otherwise it will be the distance
1713      from the last occurrence of that character to the end of the
1714      pattern.  If the amount is zero we have a possible match.  */
1715
1716   /* Here we make a "mickey mouse" BM table.  The stride of the search
1717      is determined only by the last character of the putative match.
1718      If that character does not match, we will stride the proper
1719      distance to propose a match that superimposes it on the last
1720      instance of a character that matches it (per trt), or misses
1721      it entirely if there is none. */
1722
1723   dirlen = len_byte * direction;
1724
1725   /* Record position after the end of the pattern.  */
1726   pat_end = base_pat + len_byte;
1727   /* BASE_PAT points to a character that we start scanning from.
1728      It is the first character in a forward search,
1729      the last character in a backward search.  */
1730   if (direction < 0)
1731     base_pat = pat_end - 1;
1732
1733   /* A character that does not appear in the pattern induces a
1734      stride equal to the pattern length.  */
1735   for (i = 0; i < 0400; i++)
1736     BM_tab[i] = dirlen;
1737
1738   /* We use this for translation, instead of TRT itself.
1739      We fill this in to handle the characters that actually
1740      occur in the pattern.  Others don't matter anyway!  */
1741   for (i = 0; i < 0400; i++)
1742     simple_translate[i] = i;
1743
1744   if (char_base)
1745     {
1746       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1747          byte following them are the target of translation.  */
1748       unsigned char str[MAX_MULTIBYTE_LENGTH];
1749       int len = CHAR_STRING (char_base, str);
1750
1751       translate_prev_byte1 = str[len - 2];
1752       if (len > 2)
1753         {
1754           translate_prev_byte2 = str[len - 3];
1755           if (len > 3)
1756             {
1757               translate_prev_byte3 = str[len - 4];
1758               if (len > 4)
1759                 translate_prev_byte4 = str[len - 5];
1760             }
1761         }
1762     }
1763
1764   i = 0;
1765   while (i != dirlen)
1766     {
1767       unsigned char *ptr = base_pat + i;
1768       i += direction;
1769       if (! NILP (trt))
1770         {
1771           /* If the byte currently looking at is the last of a
1772              character to check case-equivalents, set CH to that
1773              character.  An ASCII character and a non-ASCII character
1774              matching with CHAR_BASE are to be checked.  */
1775           int ch = -1;
1776
1777           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1778             ch = *ptr;
1779           else if (char_base
1780                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1781             {
1782               unsigned char *charstart = ptr - 1;
1783
1784               while (! (CHAR_HEAD_P (*charstart)))
1785                 charstart--;
1786               ch = STRING_CHAR (charstart);
1787               if (char_base != (ch & ~0x3F))
1788                 ch = -1;
1789             }
1790
1791           if (ch >= 0200)
1792             j = (ch & 0x3F) | 0200;
1793           else
1794             j = *ptr;
1795
1796           if (i == dirlen)
1797             stride_for_teases = BM_tab[j];
1798
1799           BM_tab[j] = dirlen - i;
1800           /* A translation table is accompanied by its inverse -- see */
1801           /* comment following downcase_table for details */
1802           if (ch >= 0)
1803             {
1804               int starting_ch = ch;
1805               int starting_j = j;
1806
1807               while (1)
1808                 {
1809                   TRANSLATE (ch, inverse_trt, ch);
1810                   if (ch >= 0200)
1811                     j = (ch & 0x3F) | 0200;
1812                   else
1813                     j = ch;
1814
1815                   /* For all the characters that map into CH,
1816                      set up simple_translate to map the last byte
1817                      into STARTING_J.  */
1818                   simple_translate[j] = starting_j;
1819                   if (ch == starting_ch)
1820                     break;
1821                   BM_tab[j] = dirlen - i;
1822                 }
1823             }
1824         }
1825       else
1826         {
1827           j = *ptr;
1828
1829           if (i == dirlen)
1830             stride_for_teases = BM_tab[j];
1831           BM_tab[j] = dirlen - i;
1832         }
1833       /* stride_for_teases tells how much to stride if we get a
1834          match on the far character but are subsequently
1835          disappointed, by recording what the stride would have been
1836          for that character if the last character had been
1837          different.  */
1838     }
1839   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1840   /* loop invariant - POS_BYTE points at where last char (first
1841      char if reverse) of pattern would align in a possible match.  */
1842   while (n != 0)
1843     {
1844       EMACS_INT tail_end;
1845       unsigned char *tail_end_ptr;
1846
1847       /* It's been reported that some (broken) compiler thinks that
1848          Boolean expressions in an arithmetic context are unsigned.
1849          Using an explicit ?1:0 prevents this.  */
1850       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1851           < 0)
1852         return (n * (0 - direction));
1853       /* First we do the part we can by pointers (maybe nothing) */
1854       QUIT;
1855       pat = base_pat;
1856       limit = pos_byte - dirlen + direction;
1857       if (direction > 0)
1858         {
1859           limit = BUFFER_CEILING_OF (limit);
1860           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1861              can take on without hitting edge of buffer or the gap.  */
1862           limit = min (limit, pos_byte + 20000);
1863           limit = min (limit, lim_byte - 1);
1864         }
1865       else
1866         {
1867           limit = BUFFER_FLOOR_OF (limit);
1868           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1869              can take on without hitting edge of buffer or the gap.  */
1870           limit = max (limit, pos_byte - 20000);
1871           limit = max (limit, lim_byte);
1872         }
1873       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1874       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1875
1876       if ((limit - pos_byte) * direction > 20)
1877         {
1878           unsigned char *p2;
1879
1880           p_limit = BYTE_POS_ADDR (limit);
1881           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1882           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1883           while (1)             /* use one cursor setting as long as i can */
1884             {
1885               if (direction > 0) /* worth duplicating */
1886                 {
1887                   while (cursor <= p_limit)
1888                     {
1889                       if (BM_tab[*cursor] == 0)
1890                         goto hit;
1891                       cursor += BM_tab[*cursor];
1892                     }
1893                 }
1894               else
1895                 {
1896                   while (cursor >= p_limit)
1897                     {
1898                       if (BM_tab[*cursor] == 0)
1899                         goto hit;
1900                       cursor += BM_tab[*cursor];
1901                     }
1902                 }
1903               /* If you are here, cursor is beyond the end of the
1904                  searched region.  You fail to match within the
1905                  permitted region and would otherwise try a character
1906                  beyond that region.  */
1907               break;
1908
1909             hit:
1910               i = dirlen - direction;
1911               if (! NILP (trt))
1912                 {
1913                   while ((i -= direction) + direction != 0)
1914                     {
1915                       int ch;
1916                       cursor -= direction;
1917                       /* Translate only the last byte of a character.  */
1918                       if (! multibyte
1919                           || ((cursor == tail_end_ptr
1920                                || CHAR_HEAD_P (cursor[1]))
1921                               && (CHAR_HEAD_P (cursor[0])
1922                                   /* Check if this is the last byte of
1923                                      a translable character.  */
1924                                   || (translate_prev_byte1 == cursor[-1]
1925                                       && (CHAR_HEAD_P (translate_prev_byte1)
1926                                           || (translate_prev_byte2 == cursor[-2]
1927                                               && (CHAR_HEAD_P (translate_prev_byte2)
1928                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1929                         ch = simple_translate[*cursor];
1930                       else
1931                         ch = *cursor;
1932                       if (pat[i] != ch)
1933                         break;
1934                     }
1935                 }
1936               else
1937                 {
1938                   while ((i -= direction) + direction != 0)
1939                     {
1940                       cursor -= direction;
1941                       if (pat[i] != *cursor)
1942                         break;
1943                     }
1944                 }
1945               cursor += dirlen - i - direction; /* fix cursor */
1946               if (i + direction == 0)
1947                 {
1948                   EMACS_INT position, start, end;
1949
1950                   cursor -= direction;
1951
1952                   position = pos_byte + cursor - p2 + ((direction > 0)
1953                                                        ? 1 - len_byte : 0);
1954                   set_search_regs (position, len_byte);
1955
1956                   if (NILP (Vinhibit_changing_match_data))
1957                     {
1958                       start = search_regs.start[0];
1959                       end = search_regs.end[0];
1960                     }
1961                   else
1962                     /* If Vinhibit_changing_match_data is non-nil,
1963                        search_regs will not be changed.  So let's
1964                        compute start and end here.  */
1965                     {
1966                       start = BYTE_TO_CHAR (position);
1967                       end = BYTE_TO_CHAR (position + len_byte);
1968                     }
1969
1970                   if ((n -= direction) != 0)
1971                     cursor += dirlen; /* to resume search */
1972                   else
1973                     return direction > 0 ? end : start;
1974                 }
1975               else
1976                 cursor += stride_for_teases; /* <sigh> we lose -  */
1977             }
1978           pos_byte += cursor - p2;
1979         }
1980       else
1981         /* Now we'll pick up a clump that has to be done the hard
1982            way because it covers a discontinuity.  */
1983         {
1984           limit = ((direction > 0)
1985                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1986                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1987           limit = ((direction > 0)
1988                    ? min (limit + len_byte, lim_byte - 1)
1989                    : max (limit - len_byte, lim_byte));
1990           /* LIMIT is now the last value POS_BYTE can have
1991              and still be valid for a possible match.  */
1992           while (1)
1993             {
1994               /* This loop can be coded for space rather than
1995                  speed because it will usually run only once.
1996                  (the reach is at most len + 21, and typically
1997                  does not exceed len).  */
1998               while ((limit - pos_byte) * direction >= 0)
1999                 {
2000                   int ch = FETCH_BYTE (pos_byte);
2001                   if (BM_tab[ch] == 0)
2002                     goto hit2;
2003                   pos_byte += BM_tab[ch];
2004                 }
2005               break;    /* ran off the end */
2006
2007             hit2:
2008               /* Found what might be a match.  */
2009               i = dirlen - direction;
2010               while ((i -= direction) + direction != 0)
2011                 {
2012                   int ch;
2013                   unsigned char *ptr;
2014                   pos_byte -= direction;
2015                   ptr = BYTE_POS_ADDR (pos_byte);
2016                   /* Translate only the last byte of a character.  */
2017                   if (! multibyte
2018                       || ((ptr == tail_end_ptr
2019                            || CHAR_HEAD_P (ptr[1]))
2020                           && (CHAR_HEAD_P (ptr[0])
2021                               /* Check if this is the last byte of a
2022                                  translable character.  */
2023                               || (translate_prev_byte1 == ptr[-1]
2024                                   && (CHAR_HEAD_P (translate_prev_byte1)
2025                                       || (translate_prev_byte2 == ptr[-2]
2026                                           && (CHAR_HEAD_P (translate_prev_byte2)
2027                                               || translate_prev_byte3 == ptr[-3])))))))
2028                     ch = simple_translate[*ptr];
2029                   else
2030                     ch = *ptr;
2031                   if (pat[i] != ch)
2032                     break;
2033                 }
2034               /* Above loop has moved POS_BYTE part or all the way
2035                  back to the first pos (last pos if reverse).
2036                  Set it once again at the last (first if reverse) char.  */
2037               pos_byte += dirlen - i - direction;
2038               if (i + direction == 0)
2039                 {
2040                   EMACS_INT position, start, end;
2041                   pos_byte -= direction;
2042
2043                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2044                   set_search_regs (position, len_byte);
2045
2046                   if (NILP (Vinhibit_changing_match_data))
2047                     {
2048                       start = search_regs.start[0];
2049                       end = search_regs.end[0];
2050                     }
2051                   else
2052                     /* If Vinhibit_changing_match_data is non-nil,
2053                        search_regs will not be changed.  So let's
2054                        compute start and end here.  */
2055                     {
2056                       start = BYTE_TO_CHAR (position);
2057                       end = BYTE_TO_CHAR (position + len_byte);
2058                     }
2059
2060                   if ((n -= direction) != 0)
2061                     pos_byte += dirlen; /* to resume search */
2062                   else
2063                     return direction > 0 ? end : start;
2064                 }
2065               else
2066                 pos_byte += stride_for_teases;
2067             }
2068           }
2069       /* We have done one clump.  Can we continue? */
2070       if ((lim_byte - pos_byte) * direction < 0)
2071         return ((0 - n) * direction);
2072     }
2073   return BYTE_TO_CHAR (pos_byte);
2074 }
2075
2076 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2077    for the overall match just found in the current buffer.
2078    Also clear out the match data for registers 1 and up.  */
2079
2080 static void
2081 set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
2082 {
2083   int i;
2084
2085   if (!NILP (Vinhibit_changing_match_data))
2086     return;
2087
2088   /* Make sure we have registers in which to store
2089      the match position.  */
2090   if (search_regs.num_regs == 0)
2091     {
2092       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2093       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2094       search_regs.num_regs = 2;
2095     }
2096
2097   /* Clear out the other registers.  */
2098   for (i = 1; i < search_regs.num_regs; i++)
2099     {
2100       search_regs.start[i] = -1;
2101       search_regs.end[i] = -1;
2102     }
2103
2104   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2105   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2106   XSETBUFFER (last_thing_searched, current_buffer);
2107 }
2108 \f
2109 /* Given STRING, a string of words separated by word delimiters,
2110    compute a regexp that matches those exact words separated by
2111    arbitrary punctuation.  If LAX is nonzero, the end of the string
2112    need not match a word boundary unless it ends in whitespace.  */
2113
2114 static Lisp_Object
2115 wordify (Lisp_Object string, int lax)
2116 {
2117   register unsigned char *p, *o;
2118   register int i, i_byte, len, punct_count = 0, word_count = 0;
2119   Lisp_Object val;
2120   int prev_c = 0;
2121   int adjust, whitespace_at_end;
2122
2123   CHECK_STRING (string);
2124   p = SDATA (string);
2125   len = SCHARS (string);
2126
2127   for (i = 0, i_byte = 0; i < len; )
2128     {
2129       int c;
2130
2131       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2132
2133       if (SYNTAX (c) != Sword)
2134         {
2135           punct_count++;
2136           if (i > 0 && SYNTAX (prev_c) == Sword)
2137             word_count++;
2138         }
2139
2140       prev_c = c;
2141     }
2142
2143   if (SYNTAX (prev_c) == Sword)
2144     {
2145       word_count++;
2146       whitespace_at_end = 0;
2147     }
2148   else
2149     whitespace_at_end = 1;
2150
2151   if (!word_count)
2152     return empty_unibyte_string;
2153
2154   adjust = - punct_count + 5 * (word_count - 1)
2155     + ((lax && !whitespace_at_end) ? 2 : 4);
2156   if (STRING_MULTIBYTE (string))
2157     val = make_uninit_multibyte_string (len + adjust,
2158                                         SBYTES (string)
2159                                         + adjust);
2160   else
2161     val = make_uninit_string (len + adjust);
2162
2163   o = SDATA (val);
2164   *o++ = '\\';
2165   *o++ = 'b';
2166   prev_c = 0;
2167
2168   for (i = 0, i_byte = 0; i < len; )
2169     {
2170       int c;
2171       int i_byte_orig = i_byte;
2172
2173       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2174
2175       if (SYNTAX (c) == Sword)
2176         {
2177           memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
2178           o += i_byte - i_byte_orig;
2179         }
2180       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2181         {
2182           *o++ = '\\';
2183           *o++ = 'W';
2184           *o++ = '\\';
2185           *o++ = 'W';
2186           *o++ = '*';
2187         }
2188
2189       prev_c = c;
2190     }
2191
2192   if (!lax || whitespace_at_end)
2193     {
2194       *o++ = '\\';
2195       *o++ = 'b';
2196     }
2197
2198   return val;
2199 }
2200 \f
2201 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2202        "MSearch backward: ",
2203        doc: /* Search backward from point for STRING.
2204 Set point to the beginning of the occurrence found, and return point.
2205 An optional second argument bounds the search; it is a buffer position.
2206 The match found must not extend before that position.
2207 Optional third argument, if t, means if fail just return nil (no error).
2208  If not nil and not t, position at limit of search and return nil.
2209 Optional fourth argument is repeat count--search for successive occurrences.
2210
2211 Search case-sensitivity is determined by the value of the variable
2212 `case-fold-search', which see.
2213
2214 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2215   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2216 {
2217   return search_command (string, bound, noerror, count, -1, 0, 0);
2218 }
2219
2220 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2221        doc: /* Search forward from point for STRING.
2222 Set point to the end of the occurrence found, and return point.
2223 An optional second argument bounds the search; it is a buffer position.
2224 The match found must not extend after that position.  A value of nil is
2225   equivalent to (point-max).
2226 Optional third argument, if t, means if fail just return nil (no error).
2227   If not nil and not t, move to limit of search and return nil.
2228 Optional fourth argument is repeat count--search for successive occurrences.
2229
2230 Search case-sensitivity is determined by the value of the variable
2231 `case-fold-search', which see.
2232
2233 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2234   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2235 {
2236   return search_command (string, bound, noerror, count, 1, 0, 0);
2237 }
2238
2239 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2240        "sWord search backward: ",
2241        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2242 Set point to the beginning of the occurrence found, and return point.
2243 An optional second argument bounds the search; it is a buffer position.
2244 The match found must not extend before that position.
2245 Optional third argument, if t, means if fail just return nil (no error).
2246   If not nil and not t, move to limit of search and return nil.
2247 Optional fourth argument is repeat count--search for successive occurrences.  */)
2248   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2249 {
2250   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2251 }
2252
2253 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2254        "sWord search: ",
2255        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2256 Set point to the end of the occurrence found, and return point.
2257 An optional second argument bounds the search; it is a buffer position.
2258 The match found must not extend after that position.
2259 Optional third argument, if t, means if fail just return nil (no error).
2260   If not nil and not t, move to limit of search and return nil.
2261 Optional fourth argument is repeat count--search for successive occurrences.  */)
2262   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2263 {
2264   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2265 }
2266
2267 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2268        "sWord search backward: ",
2269        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2270 Set point to the beginning of the occurrence found, and return point.
2271
2272 Unlike `word-search-backward', the end of STRING need not match a word
2273 boundary unless it ends in whitespace.
2274
2275 An optional second argument bounds the search; it is a buffer position.
2276 The match found must not extend before that position.
2277 Optional third argument, if t, means if fail just return nil (no error).
2278   If not nil and not t, move to limit of search and return nil.
2279 Optional fourth argument is repeat count--search for successive occurrences.  */)
2280   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2281 {
2282   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2283 }
2284
2285 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2286        "sWord search: ",
2287        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2288 Set point to the end of the occurrence found, and return point.
2289
2290 Unlike `word-search-forward', the end of STRING need not match a word
2291 boundary unless it ends in whitespace.
2292
2293 An optional second argument bounds the search; it is a buffer position.
2294 The match found must not extend after that position.
2295 Optional third argument, if t, means if fail just return nil (no error).
2296   If not nil and not t, move to limit of search and return nil.
2297 Optional fourth argument is repeat count--search for successive occurrences.  */)
2298   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2299 {
2300   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2301 }
2302
2303 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2304        "sRE search backward: ",
2305        doc: /* Search backward from point for match for regular expression REGEXP.
2306 Set point to the beginning of the match, and return point.
2307 The match found is the one starting last in the buffer
2308 and yet ending before the origin of the search.
2309 An optional second argument bounds the search; it is a buffer position.
2310 The match found must start at or after that position.
2311 Optional third argument, if t, means if fail just return nil (no error).
2312   If not nil and not t, move to limit of search and return nil.
2313 Optional fourth argument is repeat count--search for successive occurrences.
2314 See also the functions `match-beginning', `match-end', `match-string',
2315 and `replace-match'.  */)
2316   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2317 {
2318   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2319 }
2320
2321 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2322        "sRE search: ",
2323        doc: /* Search forward from point for regular expression REGEXP.
2324 Set point to the end of the occurrence found, and return point.
2325 An optional second argument bounds the search; it is a buffer position.
2326 The match found must not extend after that position.
2327 Optional third argument, if t, means if fail just return nil (no error).
2328   If not nil and not t, move to limit of search and return nil.
2329 Optional fourth argument is repeat count--search for successive occurrences.
2330 See also the functions `match-beginning', `match-end', `match-string',
2331 and `replace-match'.  */)
2332   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2333 {
2334   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2335 }
2336
2337 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2338        "sPosix search backward: ",
2339        doc: /* Search backward from point for match for regular expression REGEXP.
2340 Find the longest match in accord with Posix regular expression rules.
2341 Set point to the beginning of the match, and return point.
2342 The match found is the one starting last in the buffer
2343 and yet ending before the origin of the search.
2344 An optional second argument bounds the search; it is a buffer position.
2345 The match found must start at or after that position.
2346 Optional third argument, if t, means if fail just return nil (no error).
2347   If not nil and not t, move to limit of search and return nil.
2348 Optional fourth argument is repeat count--search for successive occurrences.
2349 See also the functions `match-beginning', `match-end', `match-string',
2350 and `replace-match'.  */)
2351   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2352 {
2353   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2354 }
2355
2356 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2357        "sPosix search: ",
2358        doc: /* Search forward from point for regular expression REGEXP.
2359 Find the longest match in accord with Posix regular expression rules.
2360 Set point to the end of the occurrence found, and return point.
2361 An optional second argument bounds the search; it is a buffer position.
2362 The match found must not extend after that position.
2363 Optional third argument, if t, means if fail just return nil (no error).
2364   If not nil and not t, move to limit of search and return nil.
2365 Optional fourth argument is repeat count--search for successive occurrences.
2366 See also the functions `match-beginning', `match-end', `match-string',
2367 and `replace-match'.  */)
2368   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2369 {
2370   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2371 }
2372 \f
2373 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2374        doc: /* Replace text matched by last search with NEWTEXT.
2375 Leave point at the end of the replacement text.
2376
2377 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2378 Otherwise maybe capitalize the whole text, or maybe just word initials,
2379 based on the replaced text.
2380 If the replaced text has only capital letters
2381 and has at least one multiletter word, convert NEWTEXT to all caps.
2382 Otherwise if all words are capitalized in the replaced text,
2383 capitalize each word in NEWTEXT.
2384
2385 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2386 Otherwise treat `\\' as special:
2387   `\\&' in NEWTEXT means substitute original matched text.
2388   `\\N' means substitute what matched the Nth `\\(...\\)'.
2389        If Nth parens didn't match, substitute nothing.
2390   `\\\\' means insert one `\\'.
2391 Case conversion does not apply to these substitutions.
2392
2393 FIXEDCASE and LITERAL are optional arguments.
2394
2395 The optional fourth argument STRING can be a string to modify.
2396 This is meaningful when the previous match was done against STRING,
2397 using `string-match'.  When used this way, `replace-match'
2398 creates and returns a new string made by copying STRING and replacing
2399 the part of STRING that was matched.
2400
2401 The optional fifth argument SUBEXP specifies a subexpression;
2402 it says to replace just that subexpression with NEWTEXT,
2403 rather than replacing the entire matched text.
2404 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2405 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2406 NEWTEXT in place of subexp N.
2407 This is useful only after a regular expression search or match,
2408 since only regular expressions have distinguished subexpressions.  */)
2409   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2410 {
2411   enum { nochange, all_caps, cap_initial } case_action;
2412   register int pos, pos_byte;
2413   int some_multiletter_word;
2414   int some_lowercase;
2415   int some_uppercase;
2416   int some_nonuppercase_initial;
2417   register int c, prevc;
2418   int sub;
2419   EMACS_INT opoint, newpoint;
2420
2421   CHECK_STRING (newtext);
2422
2423   if (! NILP (string))
2424     CHECK_STRING (string);
2425
2426   case_action = nochange;       /* We tried an initialization */
2427                                 /* but some C compilers blew it */
2428
2429   if (search_regs.num_regs <= 0)
2430     error ("`replace-match' called before any match found");
2431
2432   if (NILP (subexp))
2433     sub = 0;
2434   else
2435     {
2436       CHECK_NUMBER (subexp);
2437       sub = XINT (subexp);
2438       if (sub < 0 || sub >= search_regs.num_regs)
2439         args_out_of_range (subexp, make_number (search_regs.num_regs));
2440     }
2441
2442   if (NILP (string))
2443     {
2444       if (search_regs.start[sub] < BEGV
2445           || search_regs.start[sub] > search_regs.end[sub]
2446           || search_regs.end[sub] > ZV)
2447         args_out_of_range (make_number (search_regs.start[sub]),
2448                            make_number (search_regs.end[sub]));
2449     }
2450   else
2451     {
2452       if (search_regs.start[sub] < 0
2453           || search_regs.start[sub] > search_regs.end[sub]
2454           || search_regs.end[sub] > SCHARS (string))
2455         args_out_of_range (make_number (search_regs.start[sub]),
2456                            make_number (search_regs.end[sub]));
2457     }
2458
2459   if (NILP (fixedcase))
2460     {
2461       /* Decide how to casify by examining the matched text. */
2462       EMACS_INT last;
2463
2464       pos = search_regs.start[sub];
2465       last = search_regs.end[sub];
2466
2467       if (NILP (string))
2468         pos_byte = CHAR_TO_BYTE (pos);
2469       else
2470         pos_byte = string_char_to_byte (string, pos);
2471
2472       prevc = '\n';
2473       case_action = all_caps;
2474
2475       /* some_multiletter_word is set nonzero if any original word
2476          is more than one letter long. */
2477       some_multiletter_word = 0;
2478       some_lowercase = 0;
2479       some_nonuppercase_initial = 0;
2480       some_uppercase = 0;
2481
2482       while (pos < last)
2483         {
2484           if (NILP (string))
2485             {
2486               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2487               INC_BOTH (pos, pos_byte);
2488             }
2489           else
2490             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2491
2492           if (LOWERCASEP (c))
2493             {
2494               /* Cannot be all caps if any original char is lower case */
2495
2496               some_lowercase = 1;
2497               if (SYNTAX (prevc) != Sword)
2498                 some_nonuppercase_initial = 1;
2499               else
2500                 some_multiletter_word = 1;
2501             }
2502           else if (UPPERCASEP (c))
2503             {
2504               some_uppercase = 1;
2505               if (SYNTAX (prevc) != Sword)
2506                 ;
2507               else
2508                 some_multiletter_word = 1;
2509             }
2510           else
2511             {
2512               /* If the initial is a caseless word constituent,
2513                  treat that like a lowercase initial.  */
2514               if (SYNTAX (prevc) != Sword)
2515                 some_nonuppercase_initial = 1;
2516             }
2517
2518           prevc = c;
2519         }
2520
2521       /* Convert to all caps if the old text is all caps
2522          and has at least one multiletter word.  */
2523       if (! some_lowercase && some_multiletter_word)
2524         case_action = all_caps;
2525       /* Capitalize each word, if the old text has all capitalized words.  */
2526       else if (!some_nonuppercase_initial && some_multiletter_word)
2527         case_action = cap_initial;
2528       else if (!some_nonuppercase_initial && some_uppercase)
2529         /* Should x -> yz, operating on X, give Yz or YZ?
2530            We'll assume the latter.  */
2531         case_action = all_caps;
2532       else
2533         case_action = nochange;
2534     }
2535
2536   /* Do replacement in a string.  */
2537   if (!NILP (string))
2538     {
2539       Lisp_Object before, after;
2540
2541       before = Fsubstring (string, make_number (0),
2542                            make_number (search_regs.start[sub]));
2543       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2544
2545       /* Substitute parts of the match into NEWTEXT
2546          if desired.  */
2547       if (NILP (literal))
2548         {
2549           EMACS_INT lastpos = 0;
2550           EMACS_INT lastpos_byte = 0;
2551           /* We build up the substituted string in ACCUM.  */
2552           Lisp_Object accum;
2553           Lisp_Object middle;
2554           int length = SBYTES (newtext);
2555
2556           accum = Qnil;
2557
2558           for (pos_byte = 0, pos = 0; pos_byte < length;)
2559             {
2560               int substart = -1;
2561               int subend = 0;
2562               int delbackslash = 0;
2563
2564               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2565
2566               if (c == '\\')
2567                 {
2568                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2569
2570                   if (c == '&')
2571                     {
2572                       substart = search_regs.start[sub];
2573                       subend = search_regs.end[sub];
2574                     }
2575                   else if (c >= '1' && c <= '9')
2576                     {
2577                       if (search_regs.start[c - '0'] >= 0
2578                           && c <= search_regs.num_regs + '0')
2579                         {
2580                           substart = search_regs.start[c - '0'];
2581                           subend = search_regs.end[c - '0'];
2582                         }
2583                       else
2584                         {
2585                           /* If that subexp did not match,
2586                              replace \\N with nothing.  */
2587                           substart = 0;
2588                           subend = 0;
2589                         }
2590                     }
2591                   else if (c == '\\')
2592                     delbackslash = 1;
2593                   else
2594                     error ("Invalid use of `\\' in replacement text");
2595                 }
2596               if (substart >= 0)
2597                 {
2598                   if (pos - 2 != lastpos)
2599                     middle = substring_both (newtext, lastpos,
2600                                              lastpos_byte,
2601                                              pos - 2, pos_byte - 2);
2602                   else
2603                     middle = Qnil;
2604                   accum = concat3 (accum, middle,
2605                                    Fsubstring (string,
2606                                                make_number (substart),
2607                                                make_number (subend)));
2608                   lastpos = pos;
2609                   lastpos_byte = pos_byte;
2610                 }
2611               else if (delbackslash)
2612                 {
2613                   middle = substring_both (newtext, lastpos,
2614                                            lastpos_byte,
2615                                            pos - 1, pos_byte - 1);
2616
2617                   accum = concat2 (accum, middle);
2618                   lastpos = pos;
2619                   lastpos_byte = pos_byte;
2620                 }
2621             }
2622
2623           if (pos != lastpos)
2624             middle = substring_both (newtext, lastpos,
2625                                      lastpos_byte,
2626                                      pos, pos_byte);
2627           else
2628             middle = Qnil;
2629
2630           newtext = concat2 (accum, middle);
2631         }
2632
2633       /* Do case substitution in NEWTEXT if desired.  */
2634       if (case_action == all_caps)
2635         newtext = Fupcase (newtext);
2636       else if (case_action == cap_initial)
2637         newtext = Fupcase_initials (newtext);
2638
2639       return concat3 (before, newtext, after);
2640     }
2641
2642   /* Record point, then move (quietly) to the start of the match.  */
2643   if (PT >= search_regs.end[sub])
2644     opoint = PT - ZV;
2645   else if (PT > search_regs.start[sub])
2646     opoint = search_regs.end[sub] - ZV;
2647   else
2648     opoint = PT;
2649
2650   /* If we want non-literal replacement,
2651      perform substitution on the replacement string.  */
2652   if (NILP (literal))
2653     {
2654       int length = SBYTES (newtext);
2655       unsigned char *substed;
2656       int substed_alloc_size, substed_len;
2657       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2658       int str_multibyte = STRING_MULTIBYTE (newtext);
2659       Lisp_Object rev_tbl;
2660       int really_changed = 0;
2661
2662       rev_tbl = Qnil;
2663
2664       substed_alloc_size = length * 2 + 100;
2665       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2666       substed_len = 0;
2667
2668       /* Go thru NEWTEXT, producing the actual text to insert in
2669          SUBSTED while adjusting multibyteness to that of the current
2670          buffer.  */
2671
2672       for (pos_byte = 0, pos = 0; pos_byte < length;)
2673         {
2674           unsigned char str[MAX_MULTIBYTE_LENGTH];
2675           unsigned char *add_stuff = NULL;
2676           int add_len = 0;
2677           int idx = -1;
2678
2679           if (str_multibyte)
2680             {
2681               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2682               if (!buf_multibyte)
2683                 c = multibyte_char_to_unibyte (c, rev_tbl);
2684             }
2685           else
2686             {
2687               /* Note that we don't have to increment POS.  */
2688               c = SREF (newtext, pos_byte++);
2689               if (buf_multibyte)
2690                 MAKE_CHAR_MULTIBYTE (c);
2691             }
2692
2693           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2694              or set IDX to a match index, which means put that part
2695              of the buffer text into SUBSTED.  */
2696
2697           if (c == '\\')
2698             {
2699               really_changed = 1;
2700
2701               if (str_multibyte)
2702                 {
2703                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2704                                                       pos, pos_byte);
2705                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2706                     c = multibyte_char_to_unibyte (c, rev_tbl);
2707                 }
2708               else
2709                 {
2710                   c = SREF (newtext, pos_byte++);
2711                   if (buf_multibyte)
2712                     MAKE_CHAR_MULTIBYTE (c);
2713                 }
2714
2715               if (c == '&')
2716                 idx = sub;
2717               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2718                 {
2719                   if (search_regs.start[c - '0'] >= 1)
2720                     idx = c - '0';
2721                 }
2722               else if (c == '\\')
2723                 add_len = 1, add_stuff = "\\";
2724               else
2725                 {
2726                   xfree (substed);
2727                   error ("Invalid use of `\\' in replacement text");
2728                 }
2729             }
2730           else
2731             {
2732               add_len = CHAR_STRING (c, str);
2733               add_stuff = str;
2734             }
2735
2736           /* If we want to copy part of a previous match,
2737              set up ADD_STUFF and ADD_LEN to point to it.  */
2738           if (idx >= 0)
2739             {
2740               EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2741               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2742               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2743                 move_gap (search_regs.start[idx]);
2744               add_stuff = BYTE_POS_ADDR (begbyte);
2745             }
2746
2747           /* Now the stuff we want to add to SUBSTED
2748              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2749
2750           /* Make sure SUBSTED is big enough.  */
2751           if (substed_len + add_len >= substed_alloc_size)
2752             {
2753               substed_alloc_size = substed_len + add_len + 500;
2754               substed = (unsigned char *) xrealloc (substed,
2755                                                     substed_alloc_size + 1);
2756             }
2757
2758           /* Now add to the end of SUBSTED.  */
2759           if (add_stuff)
2760             {
2761               memcpy (substed + substed_len, add_stuff, add_len);
2762               substed_len += add_len;
2763             }
2764         }
2765
2766       if (really_changed)
2767         {
2768           if (buf_multibyte)
2769             {
2770               int nchars = multibyte_chars_in_text (substed, substed_len);
2771
2772               newtext = make_multibyte_string (substed, nchars, substed_len);
2773             }
2774           else
2775             newtext = make_unibyte_string (substed, substed_len);
2776         }
2777       xfree (substed);
2778     }
2779
2780   /* Replace the old text with the new in the cleanest possible way.  */
2781   replace_range (search_regs.start[sub], search_regs.end[sub],
2782                  newtext, 1, 0, 1);
2783   newpoint = search_regs.start[sub] + SCHARS (newtext);
2784
2785   if (case_action == all_caps)
2786     Fupcase_region (make_number (search_regs.start[sub]),
2787                     make_number (newpoint));
2788   else if (case_action == cap_initial)
2789     Fupcase_initials_region (make_number (search_regs.start[sub]),
2790                              make_number (newpoint));
2791
2792   /* Adjust search data for this change.  */
2793   {
2794     EMACS_INT oldend = search_regs.end[sub];
2795     EMACS_INT oldstart = search_regs.start[sub];
2796     EMACS_INT change = newpoint - search_regs.end[sub];
2797     int i;
2798
2799     for (i = 0; i < search_regs.num_regs; i++)
2800       {
2801         if (search_regs.start[i] >= oldend)
2802           search_regs.start[i] += change;
2803         else if (search_regs.start[i] > oldstart)
2804           search_regs.start[i] = oldstart;
2805         if (search_regs.end[i] >= oldend)
2806           search_regs.end[i] += change;
2807         else if (search_regs.end[i] > oldstart)
2808           search_regs.end[i] = oldstart;
2809       }
2810   }
2811
2812   /* Put point back where it was in the text.  */
2813   if (opoint <= 0)
2814     TEMP_SET_PT (opoint + ZV);
2815   else
2816     TEMP_SET_PT (opoint);
2817
2818   /* Now move point "officially" to the start of the inserted replacement.  */
2819   move_if_not_intangible (newpoint);
2820
2821   return Qnil;
2822 }
2823 \f
2824 static Lisp_Object
2825 match_limit (Lisp_Object num, int beginningp)
2826 {
2827   register int n;
2828
2829   CHECK_NUMBER (num);
2830   n = XINT (num);
2831   if (n < 0)
2832     args_out_of_range (num, make_number (0));
2833   if (search_regs.num_regs <= 0)
2834     error ("No match data, because no search succeeded");
2835   if (n >= search_regs.num_regs
2836       || search_regs.start[n] < 0)
2837     return Qnil;
2838   return (make_number ((beginningp) ? search_regs.start[n]
2839                                     : search_regs.end[n]));
2840 }
2841
2842 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2843        doc: /* Return position of start of text matched by last search.
2844 SUBEXP, a number, specifies which parenthesized expression in the last
2845   regexp.
2846 Value is nil if SUBEXPth pair didn't match, or there were less than
2847   SUBEXP pairs.
2848 Zero means the entire text matched by the whole regexp or whole string.  */)
2849   (Lisp_Object subexp)
2850 {
2851   return match_limit (subexp, 1);
2852 }
2853
2854 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2855        doc: /* Return position of end of text matched by last search.
2856 SUBEXP, a number, specifies which parenthesized expression in the last
2857   regexp.
2858 Value is nil if SUBEXPth pair didn't match, or there were less than
2859   SUBEXP pairs.
2860 Zero means the entire text matched by the whole regexp or whole string.  */)
2861   (Lisp_Object subexp)
2862 {
2863   return match_limit (subexp, 0);
2864 }
2865
2866 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2867        doc: /* Return a list containing all info on what the last search matched.
2868 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2869 All the elements are markers or nil (nil if the Nth pair didn't match)
2870 if the last match was on a buffer; integers or nil if a string was matched.
2871 Use `set-match-data' to reinstate the data in this list.
2872
2873 If INTEGERS (the optional first argument) is non-nil, always use
2874 integers \(rather than markers) to represent buffer positions.  In
2875 this case, and if the last match was in a buffer, the buffer will get
2876 stored as one additional element at the end of the list.
2877
2878 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2879 enough to hold all the values, and if INTEGERS is non-nil, no consing
2880 is done.
2881
2882 If optional third arg RESEAT is non-nil, any previous markers on the
2883 REUSE list will be modified to point to nowhere.
2884
2885 Return value is undefined if the last search failed.  */)
2886   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2887 {
2888   Lisp_Object tail, prev;
2889   Lisp_Object *data;
2890   int i, len;
2891
2892   if (!NILP (reseat))
2893     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2894       if (MARKERP (XCAR (tail)))
2895         {
2896           unchain_marker (XMARKER (XCAR (tail)));
2897           XSETCAR (tail, Qnil);
2898         }
2899
2900   if (NILP (last_thing_searched))
2901     return Qnil;
2902
2903   prev = Qnil;
2904
2905   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2906                                  * sizeof (Lisp_Object));
2907
2908   len = 0;
2909   for (i = 0; i < search_regs.num_regs; i++)
2910     {
2911       int start = search_regs.start[i];
2912       if (start >= 0)
2913         {
2914           if (EQ (last_thing_searched, Qt)
2915               || ! NILP (integers))
2916             {
2917               XSETFASTINT (data[2 * i], start);
2918               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2919             }
2920           else if (BUFFERP (last_thing_searched))
2921             {
2922               data[2 * i] = Fmake_marker ();
2923               Fset_marker (data[2 * i],
2924                            make_number (start),
2925                            last_thing_searched);
2926               data[2 * i + 1] = Fmake_marker ();
2927               Fset_marker (data[2 * i + 1],
2928                            make_number (search_regs.end[i]),
2929                            last_thing_searched);
2930             }
2931           else
2932             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2933             abort ();
2934
2935           len = 2 * i + 2;
2936         }
2937       else
2938         data[2 * i] = data[2 * i + 1] = Qnil;
2939     }
2940
2941   if (BUFFERP (last_thing_searched) && !NILP (integers))
2942     {
2943       data[len] = last_thing_searched;
2944       len++;
2945     }
2946
2947   /* If REUSE is not usable, cons up the values and return them.  */
2948   if (! CONSP (reuse))
2949     return Flist (len, data);
2950
2951   /* If REUSE is a list, store as many value elements as will fit
2952      into the elements of REUSE.  */
2953   for (i = 0, tail = reuse; CONSP (tail);
2954        i++, tail = XCDR (tail))
2955     {
2956       if (i < len)
2957         XSETCAR (tail, data[i]);
2958       else
2959         XSETCAR (tail, Qnil);
2960       prev = tail;
2961     }
2962
2963   /* If we couldn't fit all value elements into REUSE,
2964      cons up the rest of them and add them to the end of REUSE.  */
2965   if (i < len)
2966     XSETCDR (prev, Flist (len - i, data + i));
2967
2968   return reuse;
2969 }
2970
2971 /* We used to have an internal use variant of `reseat' described as:
2972
2973       If RESEAT is `evaporate', put the markers back on the free list
2974       immediately.  No other references to the markers must exist in this
2975       case, so it is used only internally on the unwind stack and
2976       save-match-data from Lisp.
2977
2978    But it was ill-conceived: those supposedly-internal markers get exposed via
2979    the undo-list, so freeing them here is unsafe.  */
2980
2981 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2982        doc: /* Set internal data on last search match from elements of LIST.
2983 LIST should have been created by calling `match-data' previously.
2984
2985 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2986   (register Lisp_Object list, Lisp_Object reseat)
2987 {
2988   register int i;
2989   register Lisp_Object marker;
2990
2991   if (running_asynch_code)
2992     save_search_regs ();
2993
2994   CHECK_LIST (list);
2995
2996   /* Unless we find a marker with a buffer or an explicit buffer
2997      in LIST, assume that this match data came from a string.  */
2998   last_thing_searched = Qt;
2999
3000   /* Allocate registers if they don't already exist.  */
3001   {
3002     int length = XFASTINT (Flength (list)) / 2;
3003
3004     if (length > search_regs.num_regs)
3005       {
3006         if (search_regs.num_regs == 0)
3007           {
3008             search_regs.start
3009               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3010             search_regs.end
3011               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3012           }
3013         else
3014           {
3015             search_regs.start
3016               = (regoff_t *) xrealloc (search_regs.start,
3017                                        length * sizeof (regoff_t));
3018             search_regs.end
3019               = (regoff_t *) xrealloc (search_regs.end,
3020                                        length * sizeof (regoff_t));
3021           }
3022
3023         for (i = search_regs.num_regs; i < length; i++)
3024           search_regs.start[i] = -1;
3025
3026         search_regs.num_regs = length;
3027       }
3028
3029     for (i = 0; CONSP (list); i++)
3030       {
3031         marker = XCAR (list);
3032         if (BUFFERP (marker))
3033           {
3034             last_thing_searched = marker;
3035             break;
3036           }
3037         if (i >= length)
3038           break;
3039         if (NILP (marker))
3040           {
3041             search_regs.start[i] = -1;
3042             list = XCDR (list);
3043           }
3044         else
3045           {
3046             EMACS_INT from;
3047             Lisp_Object m;
3048
3049             m = marker;
3050             if (MARKERP (marker))
3051               {
3052                 if (XMARKER (marker)->buffer == 0)
3053                   XSETFASTINT (marker, 0);
3054                 else
3055                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3056               }
3057
3058             CHECK_NUMBER_COERCE_MARKER (marker);
3059             from = XINT (marker);
3060
3061             if (!NILP (reseat) && MARKERP (m))
3062               {
3063                 unchain_marker (XMARKER (m));
3064                 XSETCAR (list, Qnil);
3065               }
3066
3067             if ((list = XCDR (list), !CONSP (list)))
3068               break;
3069
3070             m = marker = XCAR (list);
3071
3072             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3073               XSETFASTINT (marker, 0);
3074
3075             CHECK_NUMBER_COERCE_MARKER (marker);
3076             search_regs.start[i] = from;
3077             search_regs.end[i] = XINT (marker);
3078
3079             if (!NILP (reseat) && MARKERP (m))
3080               {
3081                 unchain_marker (XMARKER (m));
3082                 XSETCAR (list, Qnil);
3083               }
3084           }
3085         list = XCDR (list);
3086       }
3087
3088     for (; i < search_regs.num_regs; i++)
3089       search_regs.start[i] = -1;
3090   }
3091
3092   return Qnil;
3093 }
3094
3095 /* If non-zero the match data have been saved in saved_search_regs
3096    during the execution of a sentinel or filter. */
3097 static int search_regs_saved;
3098 static struct re_registers saved_search_regs;
3099 static Lisp_Object saved_last_thing_searched;
3100
3101 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3102    if asynchronous code (filter or sentinel) is running. */
3103 static void
3104 save_search_regs (void)
3105 {
3106   if (!search_regs_saved)
3107     {
3108       saved_search_regs.num_regs = search_regs.num_regs;
3109       saved_search_regs.start = search_regs.start;
3110       saved_search_regs.end = search_regs.end;
3111       saved_last_thing_searched = last_thing_searched;
3112       last_thing_searched = Qnil;
3113       search_regs.num_regs = 0;
3114       search_regs.start = 0;
3115       search_regs.end = 0;
3116
3117       search_regs_saved = 1;
3118     }
3119 }
3120
3121 /* Called upon exit from filters and sentinels. */
3122 void
3123 restore_search_regs (void)
3124 {
3125   if (search_regs_saved)
3126     {
3127       if (search_regs.num_regs > 0)
3128         {
3129           xfree (search_regs.start);
3130           xfree (search_regs.end);
3131         }
3132       search_regs.num_regs = saved_search_regs.num_regs;
3133       search_regs.start = saved_search_regs.start;
3134       search_regs.end = saved_search_regs.end;
3135       last_thing_searched = saved_last_thing_searched;
3136       saved_last_thing_searched = Qnil;
3137       search_regs_saved = 0;
3138     }
3139 }
3140
3141 static Lisp_Object
3142 unwind_set_match_data (Lisp_Object list)
3143 {
3144   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3145   return Fset_match_data (list, Qt);
3146 }
3147
3148 /* Called to unwind protect the match data.  */
3149 void
3150 record_unwind_save_match_data (void)
3151 {
3152   record_unwind_protect (unwind_set_match_data,
3153                          Fmatch_data (Qnil, Qnil, Qnil));
3154 }
3155
3156 /* Quote a string to inactivate reg-expr chars */
3157
3158 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3159        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3160   (Lisp_Object string)
3161 {
3162   register unsigned char *in, *out, *end;
3163   register unsigned char *temp;
3164   int backslashes_added = 0;
3165
3166   CHECK_STRING (string);
3167
3168   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3169
3170   /* Now copy the data into the new string, inserting escapes. */
3171
3172   in = SDATA (string);
3173   end = in + SBYTES (string);
3174   out = temp;
3175
3176   for (; in != end; in++)
3177     {
3178       if (*in == '['
3179           || *in == '*' || *in == '.' || *in == '\\'
3180           || *in == '?' || *in == '+'
3181           || *in == '^' || *in == '$')
3182         *out++ = '\\', backslashes_added++;
3183       *out++ = *in;
3184     }
3185
3186   return make_specified_string (temp,
3187                                 SCHARS (string) + backslashes_added,
3188                                 out - temp,
3189                                 STRING_MULTIBYTE (string));
3190 }
3191 \f
3192 void
3193 syms_of_search (void)
3194 {
3195   register int i;
3196
3197   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3198     {
3199       searchbufs[i].buf.allocated = 100;
3200       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3201       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3202       searchbufs[i].regexp = Qnil;
3203       searchbufs[i].whitespace_regexp = Qnil;
3204       searchbufs[i].syntax_table = Qnil;
3205       staticpro (&searchbufs[i].regexp);
3206       staticpro (&searchbufs[i].whitespace_regexp);
3207       staticpro (&searchbufs[i].syntax_table);
3208       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3209     }
3210   searchbuf_head = &searchbufs[0];
3211
3212   Qsearch_failed = intern_c_string ("search-failed");
3213   staticpro (&Qsearch_failed);
3214   Qinvalid_regexp = intern_c_string ("invalid-regexp");
3215   staticpro (&Qinvalid_regexp);
3216
3217   Fput (Qsearch_failed, Qerror_conditions,
3218         pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
3219   Fput (Qsearch_failed, Qerror_message,
3220         make_pure_c_string ("Search failed"));
3221
3222   Fput (Qinvalid_regexp, Qerror_conditions,
3223         pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
3224   Fput (Qinvalid_regexp, Qerror_message,
3225         make_pure_c_string ("Invalid regexp"));
3226
3227   last_thing_searched = Qnil;
3228   staticpro (&last_thing_searched);
3229
3230   saved_last_thing_searched = Qnil;
3231   staticpro (&saved_last_thing_searched);
3232
3233   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3234       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3235 Some commands use this for user-specified regexps.
3236 Spaces that occur inside character classes or repetition operators
3237 or other such regexp constructs are not replaced with this.
3238 A value of nil (which is the normal value) means treat spaces literally.  */);
3239   Vsearch_spaces_regexp = Qnil;
3240
3241   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3242       doc: /* Internal use only.
3243 If non-nil, the primitive searching and matching functions
3244 such as `looking-at', `string-match', `re-search-forward', etc.,
3245 do not set the match data.  The proper way to use this variable
3246 is to bind it with `let' around a small expression.  */);
3247   Vinhibit_changing_match_data = Qnil;
3248
3249   defsubr (&Slooking_at);
3250   defsubr (&Sposix_looking_at);
3251   defsubr (&Sstring_match);
3252   defsubr (&Sposix_string_match);
3253   defsubr (&Ssearch_forward);
3254   defsubr (&Ssearch_backward);
3255   defsubr (&Sword_search_forward);
3256   defsubr (&Sword_search_backward);
3257   defsubr (&Sword_search_forward_lax);
3258   defsubr (&Sword_search_backward_lax);
3259   defsubr (&Sre_search_forward);
3260   defsubr (&Sre_search_backward);
3261   defsubr (&Sposix_search_forward);
3262   defsubr (&Sposix_search_backward);
3263   defsubr (&Sreplace_match);
3264   defsubr (&Smatch_beginning);
3265   defsubr (&Smatch_end);
3266   defsubr (&Smatch_data);
3267   defsubr (&Sset_match_data);
3268   defsubr (&Sregexp_quote);
3269 }
3270
3271 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3272    (do not change this comment) */