src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include <setjmp.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 Lisp_Object Vsearch_spaces_regexp;
  95
  96 /* If non-nil, the match data will not be changed during call to
  97    searching or matching functions.  This variable is for internal use
  98    only.  */
  99 Lisp_Object Vinhibit_changing_match_data;
 100
 101 static void set_search_regs (EMACS_INT, EMACS_INT);
 102 static void save_search_regs (void);
 103 static EMACS_INT simple_search (int, unsigned char *, int, int,
 104                                 Lisp_Object, EMACS_INT, EMACS_INT,
 105                                 EMACS_INT, EMACS_INT);
 106 static EMACS_INT boyer_moore (int, unsigned char *, int, int,
 107                               Lisp_Object, Lisp_Object,
 108                               EMACS_INT, EMACS_INT,
 109                               EMACS_INT, EMACS_INT, int);
 110 static EMACS_INT search_buffer (Lisp_Object, EMACS_INT, EMACS_INT,
 111                                 EMACS_INT, EMACS_INT, int, int,
 112                                 Lisp_Object, Lisp_Object, int);
 113 static void matcher_overflow (void) NO_RETURN;
 114
 115 static void
 116 matcher_overflow (void)
 117 {
 118   error ("Stack overflow in regexp matcher");
 119 }
 120
 121 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 122    PATTERN is the pattern to compile.
 123    CP is the place to put the result.
 124    TRANSLATE is a translation table for ignoring case, or nil for none.
 125    REGP is the structure that says where to store the "register"
 126    values that will result from matching this pattern.
 127    If it is 0, we should compile the pattern not to record any
 128    subexpression bounds.
 129    POSIX is nonzero if we want full backtracking (POSIX style)
 130    for this pattern.  0 means backtrack only enough to get a valid match.
 131
 132    The behavior also depends on Vsearch_spaces_regexp.  */
 133
 134 static void
 135 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, struct re_registers *regp, int posix)
 136 {
 137   char *val;
 138   reg_syntax_t old;
 139
 140   cp->regexp = Qnil;
 141   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 142   cp->posix = posix;
 143   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 144   cp->buf.charset_unibyte = charset_unibyte;
 145   if (STRINGP (Vsearch_spaces_regexp))
 146     cp->whitespace_regexp = Vsearch_spaces_regexp;
 147   else
 148     cp->whitespace_regexp = Qnil;
 149
 150   /* rms: I think BLOCK_INPUT is not needed here any more,
 151      because regex.c defines malloc to call xmalloc.
 152      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 153      So let's turn it off.  */
 154   /*  BLOCK_INPUT;  */
 155   old = re_set_syntax (RE_SYNTAX_EMACS
 156                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 157
 158   if (STRINGP (Vsearch_spaces_regexp))
 159     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 160   else
 161     re_set_whitespace_regexp (NULL);
 162
 163   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 164                                      SBYTES (pattern), &cp->buf);
 165
 166   /* If the compiled pattern hard codes some of the contents of the
 167      syntax-table, it can only be reused with *this* syntax table.  */
 168   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 169
 170   re_set_whitespace_regexp (NULL);
 171
 172   re_set_syntax (old);
 173   /* UNBLOCK_INPUT;  */
 174   if (val)
 175     xsignal1 (Qinvalid_regexp, build_string (val));
 176
 177   cp->regexp = Fcopy_sequence (pattern);
 178 }
 179
 180 /* Shrink each compiled regexp buffer in the cache
 181    to the size actually used right now.
 182    This is called from garbage collection.  */
 183
 184 void
 185 shrink_regexp_cache (void)
 186 {
 187   struct regexp_cache *cp;
 188
 189   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 190     {
 191       cp->buf.allocated = cp->buf.used;
 192       cp->buf.buffer
 193         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 194     }
 195 }
 196
 197 /* Clear the regexp cache w.r.t. a particular syntax table,
 198    because it was changed.
 199    There is no danger of memory leak here because re_compile_pattern
 200    automagically manages the memory in each re_pattern_buffer struct,
 201    based on its `allocated' and `buffer' values.  */
 202 void
 203 clear_regexp_cache (void)
 204 {
 205   int i;
 206
 207   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 208     /* It's tempting to compare with the syntax-table we've actually changed,
 209        but it's not sufficient because char-table inheritance means that
 210        modifying one syntax-table can change others at the same time.  */
 211     if (!EQ (searchbufs[i].syntax_table, Qt))
 212       searchbufs[i].regexp = Qnil;
 213 }
 214
 215 /* Compile a regexp if necessary, but first check to see if there's one in
 216    the cache.
 217    PATTERN is the pattern to compile.
 218    TRANSLATE is a translation table for ignoring case, or nil for none.
 219    REGP is the structure that says where to store the "register"
 220    values that will result from matching this pattern.
 221    If it is 0, we should compile the pattern not to record any
 222    subexpression bounds.
 223    POSIX is nonzero if we want full backtracking (POSIX style)
 224    for this pattern.  0 means backtrack only enough to get a valid match.  */
 225
 226 struct re_pattern_buffer *
 227 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
 228 {
 229   struct regexp_cache *cp, **cpp;
 230
 231   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 232     {
 233       cp = *cpp;
 234       /* Entries are initialized to nil, and may be set to nil by
 235          compile_pattern_1 if the pattern isn't valid.  Don't apply
 236          string accessors in those cases.  However, compile_pattern_1
 237          is only applied to the cache entry we pick here to reuse.  So
 238          nil should never appear before a non-nil entry.  */
 239       if (NILP (cp->regexp))
 240         goto compile_it;
 241       if (SCHARS (cp->regexp) == SCHARS (pattern)
 242           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 243           && !NILP (Fstring_equal (cp->regexp, pattern))
 244           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 245           && cp->posix == posix
 246           && (EQ (cp->syntax_table, Qt)
 247               || EQ (cp->syntax_table, current_buffer->syntax_table))
 248           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 249           && cp->buf.charset_unibyte == charset_unibyte)
 250         break;
 251
 252       /* If we're at the end of the cache, compile into the nil cell
 253          we found, or the last (least recently used) cell with a
 254          string value.  */
 255       if (cp->next == 0)
 256         {
 257         compile_it:
 258           compile_pattern_1 (cp, pattern, translate, regp, posix);
 259           break;
 260         }
 261     }
 262
 263   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 264      either because we found it in the cache or because we just compiled it.
 265      Move it to the front of the queue to mark it as most recently used.  */
 266   *cpp = cp->next;
 267   cp->next = searchbuf_head;
 268   searchbuf_head = cp;
 269
 270   /* Advise the searching functions about the space we have allocated
 271      for register data.  */
 272   if (regp)
 273     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 274
 275   /* The compiled pattern can be used both for multibyte and unibyte
 276      target.  But, we have to tell which the pattern is used for. */
 277   cp->buf.target_multibyte = multibyte;
 278
 279   return &cp->buf;
 280 }
 281
 282 \f
 283 static Lisp_Object
 284 looking_at_1 (Lisp_Object string, int posix)
 285 {
 286   Lisp_Object val;
 287   unsigned char *p1, *p2;
 288   EMACS_INT s1, s2;
 289   register int i;
 290   struct re_pattern_buffer *bufp;
 291
 292   if (running_asynch_code)
 293     save_search_regs ();
 294
 295   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 296   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 297     = current_buffer->case_eqv_table;
 298
 299   CHECK_STRING (string);
 300   bufp = compile_pattern (string,
 301                           (NILP (Vinhibit_changing_match_data)
 302                            ? &search_regs : NULL),
 303                           (!NILP (current_buffer->case_fold_search)
 304                            ? current_buffer->case_canon_table : Qnil),
 305                           posix,
 306                           !NILP (current_buffer->enable_multibyte_characters));
 307
 308   immediate_quit = 1;
 309   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 310
 311   /* Get pointers and sizes of the two strings
 312      that make up the visible portion of the buffer. */
 313
 314   p1 = BEGV_ADDR;
 315   s1 = GPT_BYTE - BEGV_BYTE;
 316   p2 = GAP_END_ADDR;
 317   s2 = ZV_BYTE - GPT_BYTE;
 318   if (s1 < 0)
 319     {
 320       p2 = p1;
 321       s2 = ZV_BYTE - BEGV_BYTE;
 322       s1 = 0;
 323     }
 324   if (s2 < 0)
 325     {
 326       s1 = ZV_BYTE - BEGV_BYTE;
 327       s2 = 0;
 328     }
 329
 330   re_match_object = Qnil;
 331
 332   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 333                   PT_BYTE - BEGV_BYTE,
 334                   (NILP (Vinhibit_changing_match_data)
 335                    ? &search_regs : NULL),
 336                   ZV_BYTE - BEGV_BYTE);
 337   immediate_quit = 0;
 338
 339   if (i == -2)
 340     matcher_overflow ();
 341
 342   val = (0 <= i ? Qt : Qnil);
 343   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 344     for (i = 0; i < search_regs.num_regs; i++)
 345       if (search_regs.start[i] >= 0)
 346         {
 347           search_regs.start[i]
 348             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 349           search_regs.end[i]
 350             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 351         }
 352
 353   /* Set last_thing_searched only when match data is changed.  */
 354   if (NILP (Vinhibit_changing_match_data))
 355     XSETBUFFER (last_thing_searched, current_buffer);
 356
 357   return val;
 358 }
 359
 360 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 361        doc: /* Return t if text after point matches regular expression REGEXP.
 362 This function modifies the match data that `match-beginning',
 363 `match-end' and `match-data' access; save and restore the match
 364 data if you want to preserve them.  */)
 365   (Lisp_Object regexp)
 366 {
 367   return looking_at_1 (regexp, 0);
 368 }
 369
 370 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 371        doc: /* Return t if text after point matches regular expression REGEXP.
 372 Find the longest match, in accord with Posix regular expression rules.
 373 This function modifies the match data that `match-beginning',
 374 `match-end' and `match-data' access; save and restore the match
 375 data if you want to preserve them.  */)
 376   (Lisp_Object regexp)
 377 {
 378   return looking_at_1 (regexp, 1);
 379 }
 380 \f
 381 static Lisp_Object
 382 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
 383 {
 384   int val;
 385   struct re_pattern_buffer *bufp;
 386   EMACS_INT pos, pos_byte;
 387   int i;
 388
 389   if (running_asynch_code)
 390     save_search_regs ();
 391
 392   CHECK_STRING (regexp);
 393   CHECK_STRING (string);
 394
 395   if (NILP (start))
 396     pos = 0, pos_byte = 0;
 397   else
 398     {
 399       int len = SCHARS (string);
 400
 401       CHECK_NUMBER (start);
 402       pos = XINT (start);
 403       if (pos < 0 && -pos <= len)
 404         pos = len + pos;
 405       else if (0 > pos || pos > len)
 406         args_out_of_range (string, start);
 407       pos_byte = string_char_to_byte (string, pos);
 408     }
 409
 410   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 411   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 412     = current_buffer->case_eqv_table;
 413
 414   bufp = compile_pattern (regexp,
 415                           (NILP (Vinhibit_changing_match_data)
 416                            ? &search_regs : NULL),
 417                           (!NILP (current_buffer->case_fold_search)
 418                            ? current_buffer->case_canon_table : Qnil),
 419                           posix,
 420                           STRING_MULTIBYTE (string));
 421   immediate_quit = 1;
 422   re_match_object = string;
 423
 424   val = re_search (bufp, (char *) SDATA (string),
 425                    SBYTES (string), pos_byte,
 426                    SBYTES (string) - pos_byte,
 427                    (NILP (Vinhibit_changing_match_data)
 428                     ? &search_regs : NULL));
 429   immediate_quit = 0;
 430
 431   /* Set last_thing_searched only when match data is changed.  */
 432   if (NILP (Vinhibit_changing_match_data))
 433     last_thing_searched = Qt;
 434
 435   if (val == -2)
 436     matcher_overflow ();
 437   if (val < 0) return Qnil;
 438
 439   if (NILP (Vinhibit_changing_match_data))
 440     for (i = 0; i < search_regs.num_regs; i++)
 441       if (search_regs.start[i] >= 0)
 442         {
 443           search_regs.start[i]
 444             = string_byte_to_char (string, search_regs.start[i]);
 445           search_regs.end[i]
 446             = string_byte_to_char (string, search_regs.end[i]);
 447         }
 448
 449   return make_number (string_byte_to_char (string, val));
 450 }
 451
 452 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 453        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 454 Matching ignores case if `case-fold-search' is non-nil.
 455 If third arg START is non-nil, start search at that index in STRING.
 456 For index of first char beyond the match, do (match-end 0).
 457 `match-end' and `match-beginning' also give indices of substrings
 458 matched by parenthesis constructs in the pattern.
 459
 460 You can use the function `match-string' to extract the substrings
 461 matched by the parenthesis constructions in REGEXP. */)
 462   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 463 {
 464   return string_match_1 (regexp, string, start, 0);
 465 }
 466
 467 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 468        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 469 Find the longest match, in accord with Posix regular expression rules.
 470 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 471 If third arg START is non-nil, start search at that index in STRING.
 472 For index of first char beyond the match, do (match-end 0).
 473 `match-end' and `match-beginning' also give indices of substrings
 474 matched by parenthesis constructs in the pattern.  */)
 475   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 476 {
 477   return string_match_1 (regexp, string, start, 1);
 478 }
 479
 480 /* Match REGEXP against STRING, searching all of STRING,
 481    and return the index of the match, or negative on failure.
 482    This does not clobber the match data.  */
 483
 484 int
 485 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 486 {
 487   int val;
 488   struct re_pattern_buffer *bufp;
 489
 490   bufp = compile_pattern (regexp, 0, Qnil,
 491                           0, STRING_MULTIBYTE (string));
 492   immediate_quit = 1;
 493   re_match_object = string;
 494
 495   val = re_search (bufp, (char *) SDATA (string),
 496                    SBYTES (string), 0,
 497                    SBYTES (string), 0);
 498   immediate_quit = 0;
 499   return val;
 500 }
 501
 502 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 503    and return the index of the match, or negative on failure.
 504    This does not clobber the match data.
 505    We assume that STRING contains single-byte characters.  */
 506
 507 extern Lisp_Object Vascii_downcase_table;
 508
 509 int
 510 fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
 511 {
 512   int val;
 513   struct re_pattern_buffer *bufp;
 514   int len = strlen (string);
 515
 516   regexp = string_make_unibyte (regexp);
 517   re_match_object = Qt;
 518   bufp = compile_pattern (regexp, 0,
 519                           Vascii_canon_table, 0,
 520                           0);
 521   immediate_quit = 1;
 522   val = re_search (bufp, string, len, 0, len, 0);
 523   immediate_quit = 0;
 524   return val;
 525 }
 526
 527 /* Like fast_string_match but ignore case.  */
 528
 529 int
 530 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 531 {
 532   int val;
 533   struct re_pattern_buffer *bufp;
 534
 535   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 536                           0, STRING_MULTIBYTE (string));
 537   immediate_quit = 1;
 538   re_match_object = string;
 539
 540   val = re_search (bufp, (char *) SDATA (string),
 541                    SBYTES (string), 0,
 542                    SBYTES (string), 0);
 543   immediate_quit = 0;
 544   return val;
 545 }
 546 \f
 547 /* Match REGEXP against the characters after POS to LIMIT, and return
 548    the number of matched characters.  If STRING is non-nil, match
 549    against the characters in it.  In that case, POS and LIMIT are
 550    indices into the string.  This function doesn't modify the match
 551    data.  */
 552
 553 EMACS_INT
 554 fast_looking_at (Lisp_Object regexp, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT limit, EMACS_INT limit_byte, Lisp_Object string)
 555 {
 556   int multibyte;
 557   struct re_pattern_buffer *buf;
 558   unsigned char *p1, *p2;
 559   EMACS_INT s1, s2;
 560   EMACS_INT len;
 561
 562   if (STRINGP (string))
 563     {
 564       if (pos_byte < 0)
 565         pos_byte = string_char_to_byte (string, pos);
 566       if (limit_byte < 0)
 567         limit_byte = string_char_to_byte (string, limit);
 568       p1 = NULL;
 569       s1 = 0;
 570       p2 = SDATA (string);
 571       s2 = SBYTES (string);
 572       re_match_object = string;
 573       multibyte = STRING_MULTIBYTE (string);
 574     }
 575   else
 576     {
 577       if (pos_byte < 0)
 578         pos_byte = CHAR_TO_BYTE (pos);
 579       if (limit_byte < 0)
 580         limit_byte = CHAR_TO_BYTE (limit);
 581       pos_byte -= BEGV_BYTE;
 582       limit_byte -= BEGV_BYTE;
 583       p1 = BEGV_ADDR;
 584       s1 = GPT_BYTE - BEGV_BYTE;
 585       p2 = GAP_END_ADDR;
 586       s2 = ZV_BYTE - GPT_BYTE;
 587       if (s1 < 0)
 588         {
 589           p2 = p1;
 590           s2 = ZV_BYTE - BEGV_BYTE;
 591           s1 = 0;
 592         }
 593       if (s2 < 0)
 594         {
 595           s1 = ZV_BYTE - BEGV_BYTE;
 596           s2 = 0;
 597         }
 598       re_match_object = Qnil;
 599       multibyte = ! NILP (current_buffer->enable_multibyte_characters);
 600     }
 601
 602   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 603   immediate_quit = 1;
 604   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 605                     pos_byte, NULL, limit_byte);
 606   immediate_quit = 0;
 607
 608   return len;
 609 }
 610
 611 \f
 612 /* The newline cache: remembering which sections of text have no newlines.  */
 613
 614 /* If the user has requested newline caching, make sure it's on.
 615    Otherwise, make sure it's off.
 616    This is our cheezy way of associating an action with the change of
 617    state of a buffer-local variable.  */
 618 static void
 619 newline_cache_on_off (struct buffer *buf)
 620 {
 621   if (NILP (buf->cache_long_line_scans))
 622     {
 623       /* It should be off.  */
 624       if (buf->newline_cache)
 625         {
 626           free_region_cache (buf->newline_cache);
 627           buf->newline_cache = 0;
 628         }
 629     }
 630   else
 631     {
 632       /* It should be on.  */
 633       if (buf->newline_cache == 0)
 634         buf->newline_cache = new_region_cache ();
 635     }
 636 }
 637
 638 \f
 639 /* Search for COUNT instances of the character TARGET between START and END.
 640
 641    If COUNT is positive, search forwards; END must be >= START.
 642    If COUNT is negative, search backwards for the -COUNTth instance;
 643       END must be <= START.
 644    If COUNT is zero, do anything you please; run rogue, for all I care.
 645
 646    If END is zero, use BEGV or ZV instead, as appropriate for the
 647    direction indicated by COUNT.
 648
 649    If we find COUNT instances, set *SHORTAGE to zero, and return the
 650    position past the COUNTth match.  Note that for reverse motion
 651    this is not the same as the usual convention for Emacs motion commands.
 652
 653    If we don't find COUNT instances before reaching END, set *SHORTAGE
 654    to the number of TARGETs left unfound, and return END.
 655
 656    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 657    except when inside redisplay.  */
 658
 659 int
 660 scan_buffer (register int target, EMACS_INT start, EMACS_INT end, int count, int *shortage, int allow_quit)
 661 {
 662   struct region_cache *newline_cache;
 663   int direction;
 664
 665   if (count > 0)
 666     {
 667       direction = 1;
 668       if (! end) end = ZV;
 669     }
 670   else
 671     {
 672       direction = -1;
 673       if (! end) end = BEGV;
 674     }
 675
 676   newline_cache_on_off (current_buffer);
 677   newline_cache = current_buffer->newline_cache;
 678
 679   if (shortage != 0)
 680     *shortage = 0;
 681
 682   immediate_quit = allow_quit;
 683
 684   if (count > 0)
 685     while (start != end)
 686       {
 687         /* Our innermost scanning loop is very simple; it doesn't know
 688            about gaps, buffer ends, or the newline cache.  ceiling is
 689            the position of the last character before the next such
 690            obstacle --- the last character the dumb search loop should
 691            examine.  */
 692         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
 693         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 694         EMACS_INT tem;
 695
 696         /* If we're looking for a newline, consult the newline cache
 697            to see where we can avoid some scanning.  */
 698         if (target == '\n' && newline_cache)
 699           {
 700             int next_change;
 701             immediate_quit = 0;
 702             while (region_cache_forward
 703                    (current_buffer, newline_cache, start_byte, &next_change))
 704               start_byte = next_change;
 705             immediate_quit = allow_quit;
 706
 707             /* START should never be after END.  */
 708             if (start_byte > ceiling_byte)
 709               start_byte = ceiling_byte;
 710
 711             /* Now the text after start is an unknown region, and
 712                next_change is the position of the next known region. */
 713             ceiling_byte = min (next_change - 1, ceiling_byte);
 714           }
 715
 716         /* The dumb loop can only scan text stored in contiguous
 717            bytes. BUFFER_CEILING_OF returns the last character
 718            position that is contiguous, so the ceiling is the
 719            position after that.  */
 720         tem = BUFFER_CEILING_OF (start_byte);
 721         ceiling_byte = min (tem, ceiling_byte);
 722
 723         {
 724           /* The termination address of the dumb loop.  */
 725           register unsigned char *ceiling_addr
 726             = BYTE_POS_ADDR (ceiling_byte) + 1;
 727           register unsigned char *cursor
 728             = BYTE_POS_ADDR (start_byte);
 729           unsigned char *base = cursor;
 730
 731           while (cursor < ceiling_addr)
 732             {
 733               unsigned char *scan_start = cursor;
 734
 735               /* The dumb loop.  */
 736               while (*cursor != target && ++cursor < ceiling_addr)
 737                 ;
 738
 739               /* If we're looking for newlines, cache the fact that
 740                  the region from start to cursor is free of them. */
 741               if (target == '\n' && newline_cache)
 742                 know_region_cache (current_buffer, newline_cache,
 743                                    start_byte + scan_start - base,
 744                                    start_byte + cursor - base);
 745
 746               /* Did we find the target character?  */
 747               if (cursor < ceiling_addr)
 748                 {
 749                   if (--count == 0)
 750                     {
 751                       immediate_quit = 0;
 752                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 753                     }
 754                   cursor++;
 755                 }
 756             }
 757
 758           start = BYTE_TO_CHAR (start_byte + cursor - base);
 759         }
 760       }
 761   else
 762     while (start > end)
 763       {
 764         /* The last character to check before the next obstacle.  */
 765         EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
 766         EMACS_INT start_byte = CHAR_TO_BYTE (start);
 767         EMACS_INT tem;
 768
 769         /* Consult the newline cache, if appropriate.  */
 770         if (target == '\n' && newline_cache)
 771           {
 772             int next_change;
 773             immediate_quit = 0;
 774             while (region_cache_backward
 775                    (current_buffer, newline_cache, start_byte, &next_change))
 776               start_byte = next_change;
 777             immediate_quit = allow_quit;
 778
 779             /* Start should never be at or before end.  */
 780             if (start_byte <= ceiling_byte)
 781               start_byte = ceiling_byte + 1;
 782
 783             /* Now the text before start is an unknown region, and
 784                next_change is the position of the next known region. */
 785             ceiling_byte = max (next_change, ceiling_byte);
 786           }
 787
 788         /* Stop scanning before the gap.  */
 789         tem = BUFFER_FLOOR_OF (start_byte - 1);
 790         ceiling_byte = max (tem, ceiling_byte);
 791
 792         {
 793           /* The termination address of the dumb loop.  */
 794           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 795           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 796           unsigned char *base = cursor;
 797
 798           while (cursor >= ceiling_addr)
 799             {
 800               unsigned char *scan_start = cursor;
 801
 802               while (*cursor != target && --cursor >= ceiling_addr)
 803                 ;
 804
 805               /* If we're looking for newlines, cache the fact that
 806                  the region from after the cursor to start is free of them.  */
 807               if (target == '\n' && newline_cache)
 808                 know_region_cache (current_buffer, newline_cache,
 809                                    start_byte + cursor - base,
 810                                    start_byte + scan_start - base);
 811
 812               /* Did we find the target character?  */
 813               if (cursor >= ceiling_addr)
 814                 {
 815                   if (++count >= 0)
 816                     {
 817                       immediate_quit = 0;
 818                       return BYTE_TO_CHAR (start_byte + cursor - base);
 819                     }
 820                   cursor--;
 821                 }
 822             }
 823
 824           start = BYTE_TO_CHAR (start_byte + cursor - base);
 825         }
 826       }
 827
 828   immediate_quit = 0;
 829   if (shortage != 0)
 830     *shortage = count * direction;
 831   return start;
 832 }
 833 \f
 834 /* Search for COUNT instances of a line boundary, which means either a
 835    newline or (if selective display enabled) a carriage return.
 836    Start at START.  If COUNT is negative, search backwards.
 837
 838    We report the resulting position by calling TEMP_SET_PT_BOTH.
 839
 840    If we find COUNT instances. we position after (always after,
 841    even if scanning backwards) the COUNTth match, and return 0.
 842
 843    If we don't find COUNT instances before reaching the end of the
 844    buffer (or the beginning, if scanning backwards), we return
 845    the number of line boundaries left unfound, and position at
 846    the limit we bumped up against.
 847
 848    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 849    except in special cases.  */
 850
 851 int
 852 scan_newline (EMACS_INT start, EMACS_INT start_byte, EMACS_INT limit, EMACS_INT limit_byte, register int count, int allow_quit)
 853 {
 854   int direction = ((count > 0) ? 1 : -1);
 855
 856   register unsigned char *cursor;
 857   unsigned char *base;
 858
 859   EMACS_INT ceiling;
 860   register unsigned char *ceiling_addr;
 861
 862   int old_immediate_quit = immediate_quit;
 863
 864   /* The code that follows is like scan_buffer
 865      but checks for either newline or carriage return.  */
 866
 867   if (allow_quit)
 868     immediate_quit++;
 869
 870   start_byte = CHAR_TO_BYTE (start);
 871
 872   if (count > 0)
 873     {
 874       while (start_byte < limit_byte)
 875         {
 876           ceiling =  BUFFER_CEILING_OF (start_byte);
 877           ceiling = min (limit_byte - 1, ceiling);
 878           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 879           base = (cursor = BYTE_POS_ADDR (start_byte));
 880           while (1)
 881             {
 882               while (*cursor != '\n' && ++cursor != ceiling_addr)
 883                 ;
 884
 885               if (cursor != ceiling_addr)
 886                 {
 887                   if (--count == 0)
 888                     {
 889                       immediate_quit = old_immediate_quit;
 890                       start_byte = start_byte + cursor - base + 1;
 891                       start = BYTE_TO_CHAR (start_byte);
 892                       TEMP_SET_PT_BOTH (start, start_byte);
 893                       return 0;
 894                     }
 895                   else
 896                     if (++cursor == ceiling_addr)
 897                       break;
 898                 }
 899               else
 900                 break;
 901             }
 902           start_byte += cursor - base;
 903         }
 904     }
 905   else
 906     {
 907       while (start_byte > limit_byte)
 908         {
 909           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 910           ceiling = max (limit_byte, ceiling);
 911           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 912           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 913           while (1)
 914             {
 915               while (--cursor != ceiling_addr && *cursor != '\n')
 916                 ;
 917
 918               if (cursor != ceiling_addr)
 919                 {
 920                   if (++count == 0)
 921                     {
 922                       immediate_quit = old_immediate_quit;
 923                       /* Return the position AFTER the match we found.  */
 924                       start_byte = start_byte + cursor - base + 1;
 925                       start = BYTE_TO_CHAR (start_byte);
 926                       TEMP_SET_PT_BOTH (start, start_byte);
 927                       return 0;
 928                     }
 929                 }
 930               else
 931                 break;
 932             }
 933           /* Here we add 1 to compensate for the last decrement
 934              of CURSOR, which took it past the valid range.  */
 935           start_byte += cursor - base + 1;
 936         }
 937     }
 938
 939   TEMP_SET_PT_BOTH (limit, limit_byte);
 940   immediate_quit = old_immediate_quit;
 941
 942   return count * direction;
 943 }
 944
 945 int
 946 find_next_newline_no_quit (EMACS_INT from, int cnt)
 947 {
 948   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 949 }
 950
 951 /* Like find_next_newline, but returns position before the newline,
 952    not after, and only search up to TO.  This isn't just
 953    find_next_newline (...)-1, because you might hit TO.  */
 954
 955 int
 956 find_before_next_newline (EMACS_INT from, EMACS_INT to, int cnt)
 957 {
 958   int shortage;
 959   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 960
 961   if (shortage == 0)
 962     pos--;
 963
 964   return pos;
 965 }
 966 \f
 967 /* Subroutines of Lisp buffer search functions. */
 968
 969 static Lisp_Object
 970 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count, int direction, int RE, int posix)
 971 {
 972   register int np;
 973   int lim, lim_byte;
 974   int n = direction;
 975
 976   if (!NILP (count))
 977     {
 978       CHECK_NUMBER (count);
 979       n *= XINT (count);
 980     }
 981
 982   CHECK_STRING (string);
 983   if (NILP (bound))
 984     {
 985       if (n > 0)
 986         lim = ZV, lim_byte = ZV_BYTE;
 987       else
 988         lim = BEGV, lim_byte = BEGV_BYTE;
 989     }
 990   else
 991     {
 992       CHECK_NUMBER_COERCE_MARKER (bound);
 993       lim = XINT (bound);
 994       if (n > 0 ? lim < PT : lim > PT)
 995         error ("Invalid search bound (wrong side of point)");
 996       if (lim > ZV)
 997         lim = ZV, lim_byte = ZV_BYTE;
 998       else if (lim < BEGV)
 999         lim = BEGV, lim_byte = BEGV_BYTE;
1000       else
1001         lim_byte = CHAR_TO_BYTE (lim);
1002     }
1003
1004   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1005   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1006     = current_buffer->case_eqv_table;
1007
1008   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1009                       (!NILP (current_buffer->case_fold_search)
1010                        ? current_buffer->case_canon_table
1011                        : Qnil),
1012                       (!NILP (current_buffer->case_fold_search)
1013                        ? current_buffer->case_eqv_table
1014                        : Qnil),
1015                       posix);
1016   if (np <= 0)
1017     {
1018       if (NILP (noerror))
1019         xsignal1 (Qsearch_failed, string);
1020
1021       if (!EQ (noerror, Qt))
1022         {
1023           if (lim < BEGV || lim > ZV)
1024             abort ();
1025           SET_PT_BOTH (lim, lim_byte);
1026           return Qnil;
1027 #if 0 /* This would be clean, but maybe programs depend on
1028          a value of nil here.  */
1029           np = lim;
1030 #endif
1031         }
1032       else
1033         return Qnil;
1034     }
1035
1036   if (np < BEGV || np > ZV)
1037     abort ();
1038
1039   SET_PT (np);
1040
1041   return make_number (np);
1042 }
1043 \f
1044 /* Return 1 if REGEXP it matches just one constant string.  */
1045
1046 static int
1047 trivial_regexp_p (Lisp_Object regexp)
1048 {
1049   int len = SBYTES (regexp);
1050   unsigned char *s = SDATA (regexp);
1051   while (--len >= 0)
1052     {
1053       switch (*s++)
1054         {
1055         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1056           return 0;
1057         case '\\':
1058           if (--len < 0)
1059             return 0;
1060           switch (*s++)
1061             {
1062             case '|': case '(': case ')': case '`': case '\'': case 'b':
1063             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1064             case 'S': case '=': case '{': case '}': case '_':
1065             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1066             case '1': case '2': case '3': case '4': case '5':
1067             case '6': case '7': case '8': case '9':
1068               return 0;
1069             }
1070         }
1071     }
1072   return 1;
1073 }
1074
1075 /* Search for the n'th occurrence of STRING in the current buffer,
1076    starting at position POS and stopping at position LIM,
1077    treating STRING as a literal string if RE is false or as
1078    a regular expression if RE is true.
1079
1080    If N is positive, searching is forward and LIM must be greater than POS.
1081    If N is negative, searching is backward and LIM must be less than POS.
1082
1083    Returns -x if x occurrences remain to be found (x > 0),
1084    or else the position at the beginning of the Nth occurrence
1085    (if searching backward) or the end (if searching forward).
1086
1087    POSIX is nonzero if we want full backtracking (POSIX style)
1088    for this pattern.  0 means backtrack only enough to get a valid match.  */
1089
1090 #define TRANSLATE(out, trt, d)                  \
1091 do                                              \
1092   {                                             \
1093     if (! NILP (trt))                           \
1094       {                                         \
1095         Lisp_Object temp;                       \
1096         temp = Faref (trt, make_number (d));    \
1097         if (INTEGERP (temp))                    \
1098           out = XINT (temp);                    \
1099         else                                    \
1100           out = d;                              \
1101       }                                         \
1102     else                                        \
1103       out = d;                                  \
1104   }                                             \
1105 while (0)
1106
1107 /* Only used in search_buffer, to record the end position of the match
1108    when searching regexps and SEARCH_REGS should not be changed
1109    (i.e. Vinhibit_changing_match_data is non-nil).  */
1110 static struct re_registers search_regs_1;
1111
1112 static EMACS_INT
1113 search_buffer (Lisp_Object string, EMACS_INT pos, EMACS_INT pos_byte,
1114                EMACS_INT lim, EMACS_INT lim_byte, int n,
1115                int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
1116 {
1117   int len = SCHARS (string);
1118   int len_byte = SBYTES (string);
1119   register int i;
1120
1121   if (running_asynch_code)
1122     save_search_regs ();
1123
1124   /* Searching 0 times means don't move.  */
1125   /* Null string is found at starting position.  */
1126   if (len == 0 || n == 0)
1127     {
1128       set_search_regs (pos_byte, 0);
1129       return pos;
1130     }
1131
1132   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1133     {
1134       unsigned char *p1, *p2;
1135       int s1, s2;
1136       struct re_pattern_buffer *bufp;
1137
1138       bufp = compile_pattern (string,
1139                               (NILP (Vinhibit_changing_match_data)
1140                                ? &search_regs : &search_regs_1),
1141                               trt, posix,
1142                               !NILP (current_buffer->enable_multibyte_characters));
1143
1144       immediate_quit = 1;       /* Quit immediately if user types ^G,
1145                                    because letting this function finish
1146                                    can take too long. */
1147       QUIT;                     /* Do a pending quit right away,
1148                                    to avoid paradoxical behavior */
1149       /* Get pointers and sizes of the two strings
1150          that make up the visible portion of the buffer. */
1151
1152       p1 = BEGV_ADDR;
1153       s1 = GPT_BYTE - BEGV_BYTE;
1154       p2 = GAP_END_ADDR;
1155       s2 = ZV_BYTE - GPT_BYTE;
1156       if (s1 < 0)
1157         {
1158           p2 = p1;
1159           s2 = ZV_BYTE - BEGV_BYTE;
1160           s1 = 0;
1161         }
1162       if (s2 < 0)
1163         {
1164           s1 = ZV_BYTE - BEGV_BYTE;
1165           s2 = 0;
1166         }
1167       re_match_object = Qnil;
1168
1169       while (n < 0)
1170         {
1171           int val;
1172           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1173                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1174                              (NILP (Vinhibit_changing_match_data)
1175                               ? &search_regs : &search_regs_1),
1176                              /* Don't allow match past current point */
1177                              pos_byte - BEGV_BYTE);
1178           if (val == -2)
1179             {
1180               matcher_overflow ();
1181             }
1182           if (val >= 0)
1183             {
1184               if (NILP (Vinhibit_changing_match_data))
1185                 {
1186                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1187                   for (i = 0; i < search_regs.num_regs; i++)
1188                     if (search_regs.start[i] >= 0)
1189                       {
1190                         search_regs.start[i]
1191                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1192                         search_regs.end[i]
1193                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1194                       }
1195                   XSETBUFFER (last_thing_searched, current_buffer);
1196                   /* Set pos to the new position. */
1197                   pos = search_regs.start[0];
1198                 }
1199               else
1200                 {
1201                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1202                   /* Set pos to the new position.  */
1203                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1204                 }
1205             }
1206           else
1207             {
1208               immediate_quit = 0;
1209               return (n);
1210             }
1211           n++;
1212         }
1213       while (n > 0)
1214         {
1215           int val;
1216           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1217                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1218                              (NILP (Vinhibit_changing_match_data)
1219                               ? &search_regs : &search_regs_1),
1220                              lim_byte - BEGV_BYTE);
1221           if (val == -2)
1222             {
1223               matcher_overflow ();
1224             }
1225           if (val >= 0)
1226             {
1227               if (NILP (Vinhibit_changing_match_data))
1228                 {
1229                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1230                   for (i = 0; i < search_regs.num_regs; i++)
1231                     if (search_regs.start[i] >= 0)
1232                       {
1233                         search_regs.start[i]
1234                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1235                         search_regs.end[i]
1236                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1237                       }
1238                   XSETBUFFER (last_thing_searched, current_buffer);
1239                   pos = search_regs.end[0];
1240                 }
1241               else
1242                 {
1243                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1244                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1245                 }
1246             }
1247           else
1248             {
1249               immediate_quit = 0;
1250               return (0 - n);
1251             }
1252           n--;
1253         }
1254       immediate_quit = 0;
1255       return (pos);
1256     }
1257   else                          /* non-RE case */
1258     {
1259       unsigned char *raw_pattern, *pat;
1260       int raw_pattern_size;
1261       int raw_pattern_size_byte;
1262       unsigned char *patbuf;
1263       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1264       unsigned char *base_pat;
1265       /* Set to positive if we find a non-ASCII char that need
1266          translation.  Otherwise set to zero later.  */
1267       int char_base = -1;
1268       int boyer_moore_ok = 1;
1269
1270       /* MULTIBYTE says whether the text to be searched is multibyte.
1271          We must convert PATTERN to match that, or we will not really
1272          find things right.  */
1273
1274       if (multibyte == STRING_MULTIBYTE (string))
1275         {
1276           raw_pattern = (unsigned char *) SDATA (string);
1277           raw_pattern_size = SCHARS (string);
1278           raw_pattern_size_byte = SBYTES (string);
1279         }
1280       else if (multibyte)
1281         {
1282           raw_pattern_size = SCHARS (string);
1283           raw_pattern_size_byte
1284             = count_size_as_multibyte (SDATA (string),
1285                                        raw_pattern_size);
1286           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1287           copy_text (SDATA (string), raw_pattern,
1288                      SCHARS (string), 0, 1);
1289         }
1290       else
1291         {
1292           /* Converting multibyte to single-byte.
1293
1294              ??? Perhaps this conversion should be done in a special way
1295              by subtracting nonascii-insert-offset from each non-ASCII char,
1296              so that only the multibyte chars which really correspond to
1297              the chosen single-byte character set can possibly match.  */
1298           raw_pattern_size = SCHARS (string);
1299           raw_pattern_size_byte = SCHARS (string);
1300           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1301           copy_text (SDATA (string), raw_pattern,
1302                      SBYTES (string), 1, 0);
1303         }
1304
1305       /* Copy and optionally translate the pattern.  */
1306       len = raw_pattern_size;
1307       len_byte = raw_pattern_size_byte;
1308       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1309       pat = patbuf;
1310       base_pat = raw_pattern;
1311       if (multibyte)
1312         {
1313           /* Fill patbuf by translated characters in STRING while
1314              checking if we can use boyer-moore search.  If TRT is
1315              non-nil, we can use boyer-moore search only if TRT can be
1316              represented by the byte array of 256 elements.  For that,
1317              all non-ASCII case-equivalents of all case-senstive
1318              characters in STRING must belong to the same charset and
1319              row.  */
1320
1321           while (--len >= 0)
1322             {
1323               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1324               int c, translated, inverse;
1325               int in_charlen, charlen;
1326
1327               /* If we got here and the RE flag is set, it's because we're
1328                  dealing with a regexp known to be trivial, so the backslash
1329                  just quotes the next character.  */
1330               if (RE && *base_pat == '\\')
1331                 {
1332                   len--;
1333                   raw_pattern_size--;
1334                   len_byte--;
1335                   base_pat++;
1336                 }
1337
1338               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1339
1340               if (NILP (trt))
1341                 {
1342                   str = base_pat;
1343                   charlen = in_charlen;
1344                 }
1345               else
1346                 {
1347                   /* Translate the character.  */
1348                   TRANSLATE (translated, trt, c);
1349                   charlen = CHAR_STRING (translated, str_base);
1350                   str = str_base;
1351
1352                   /* Check if C has any other case-equivalents.  */
1353                   TRANSLATE (inverse, inverse_trt, c);
1354                   /* If so, check if we can use boyer-moore.  */
1355                   if (c != inverse && boyer_moore_ok)
1356                     {
1357                       /* Check if all equivalents belong to the same
1358                          group of characters.  Note that the check of C
1359                          itself is done by the last iteration.  */
1360                       int this_char_base = -1;
1361
1362                       while (boyer_moore_ok)
1363                         {
1364                           if (ASCII_BYTE_P (inverse))
1365                             {
1366                               if (this_char_base > 0)
1367                                 boyer_moore_ok = 0;
1368                               else
1369                                 this_char_base = 0;
1370                             }
1371                           else if (CHAR_BYTE8_P (inverse))
1372                             /* Boyer-moore search can't handle a
1373                                translation of an eight-bit
1374                                character.  */
1375                             boyer_moore_ok = 0;
1376                           else if (this_char_base < 0)
1377                             {
1378                               this_char_base = inverse & ~0x3F;
1379                               if (char_base < 0)
1380                                 char_base = this_char_base;
1381                               else if (this_char_base != char_base)
1382                                 boyer_moore_ok = 0;
1383                             }
1384                           else if ((inverse & ~0x3F) != this_char_base)
1385                             boyer_moore_ok = 0;
1386                           if (c == inverse)
1387                             break;
1388                           TRANSLATE (inverse, inverse_trt, inverse);
1389                         }
1390                     }
1391                 }
1392
1393               /* Store this character into the translated pattern.  */
1394               memcpy (pat, str, charlen);
1395               pat += charlen;
1396               base_pat += in_charlen;
1397               len_byte -= in_charlen;
1398             }
1399
1400           /* If char_base is still negative we didn't find any translated
1401              non-ASCII characters.  */
1402           if (char_base < 0)
1403             char_base = 0;
1404         }
1405       else
1406         {
1407           /* Unibyte buffer.  */
1408           char_base = 0;
1409           while (--len >= 0)
1410             {
1411               int c, translated;
1412
1413               /* If we got here and the RE flag is set, it's because we're
1414                  dealing with a regexp known to be trivial, so the backslash
1415                  just quotes the next character.  */
1416               if (RE && *base_pat == '\\')
1417                 {
1418                   len--;
1419                   raw_pattern_size--;
1420                   base_pat++;
1421                 }
1422               c = *base_pat++;
1423               TRANSLATE (translated, trt, c);
1424               *pat++ = translated;
1425             }
1426         }
1427
1428       len_byte = pat - patbuf;
1429       len = raw_pattern_size;
1430       pat = base_pat = patbuf;
1431
1432       if (boyer_moore_ok)
1433         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1434                             pos, pos_byte, lim, lim_byte,
1435                             char_base);
1436       else
1437         return simple_search (n, pat, len, len_byte, trt,
1438                               pos, pos_byte, lim, lim_byte);
1439     }
1440 }
1441 \f
1442 /* Do a simple string search N times for the string PAT,
1443    whose length is LEN/LEN_BYTE,
1444    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1445    TRT is the translation table.
1446
1447    Return the character position where the match is found.
1448    Otherwise, if M matches remained to be found, return -M.
1449
1450    This kind of search works regardless of what is in PAT and
1451    regardless of what is in TRT.  It is used in cases where
1452    boyer_moore cannot work.  */
1453
1454 static EMACS_INT
1455 simple_search (int n, unsigned char *pat, int len, int len_byte, Lisp_Object trt, EMACS_INT pos, EMACS_INT pos_byte, EMACS_INT lim, EMACS_INT lim_byte)
1456 {
1457   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1458   int forward = n > 0;
1459   /* Number of buffer bytes matched.  Note that this may be different
1460      from len_byte in a multibyte buffer.  */
1461   int match_byte;
1462
1463   if (lim > pos && multibyte)
1464     while (n > 0)
1465       {
1466         while (1)
1467           {
1468             /* Try matching at position POS.  */
1469             EMACS_INT this_pos = pos;
1470             EMACS_INT this_pos_byte = pos_byte;
1471             int this_len = len;
1472             unsigned char *p = pat;
1473             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1474               goto stop;
1475
1476             while (this_len > 0)
1477               {
1478                 int charlen, buf_charlen;
1479                 int pat_ch, buf_ch;
1480
1481                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1482                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1483                                                  buf_charlen);
1484                 TRANSLATE (buf_ch, trt, buf_ch);
1485
1486                 if (buf_ch != pat_ch)
1487                   break;
1488
1489                 this_len--;
1490                 p += charlen;
1491
1492                 this_pos_byte += buf_charlen;
1493                 this_pos++;
1494               }
1495
1496             if (this_len == 0)
1497               {
1498                 match_byte = this_pos_byte - pos_byte;
1499                 pos += len;
1500                 pos_byte += match_byte;
1501                 break;
1502               }
1503
1504             INC_BOTH (pos, pos_byte);
1505           }
1506
1507         n--;
1508       }
1509   else if (lim > pos)
1510     while (n > 0)
1511       {
1512         while (1)
1513           {
1514             /* Try matching at position POS.  */
1515             EMACS_INT this_pos = pos;
1516             int this_len = len;
1517             unsigned char *p = pat;
1518
1519             if (pos + len > lim)
1520               goto stop;
1521
1522             while (this_len > 0)
1523               {
1524                 int pat_ch = *p++;
1525                 int buf_ch = FETCH_BYTE (this_pos);
1526                 TRANSLATE (buf_ch, trt, buf_ch);
1527
1528                 if (buf_ch != pat_ch)
1529                   break;
1530
1531                 this_len--;
1532                 this_pos++;
1533               }
1534
1535             if (this_len == 0)
1536               {
1537                 match_byte = len;
1538                 pos += len;
1539                 break;
1540               }
1541
1542             pos++;
1543           }
1544
1545         n--;
1546       }
1547   /* Backwards search.  */
1548   else if (lim < pos && multibyte)
1549     while (n < 0)
1550       {
1551         while (1)
1552           {
1553             /* Try matching at position POS.  */
1554             EMACS_INT this_pos = pos;
1555             EMACS_INT this_pos_byte = pos_byte;
1556             int this_len = len;
1557             const unsigned char *p = pat + len_byte;
1558
1559             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1560               goto stop;
1561
1562             while (this_len > 0)
1563               {
1564                 int charlen;
1565                 int pat_ch, buf_ch;
1566
1567                 DEC_BOTH (this_pos, this_pos_byte);
1568                 PREV_CHAR_BOUNDARY (p, pat);
1569                 pat_ch = STRING_CHAR (p);
1570                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1571                 TRANSLATE (buf_ch, trt, buf_ch);
1572
1573                 if (buf_ch != pat_ch)
1574                   break;
1575
1576                 this_len--;
1577               }
1578
1579             if (this_len == 0)
1580               {
1581                 match_byte = pos_byte - this_pos_byte;
1582                 pos = this_pos;
1583                 pos_byte = this_pos_byte;
1584                 break;
1585               }
1586
1587             DEC_BOTH (pos, pos_byte);
1588           }
1589
1590         n++;
1591       }
1592   else if (lim < pos)
1593     while (n < 0)
1594       {
1595         while (1)
1596           {
1597             /* Try matching at position POS.  */
1598             EMACS_INT this_pos = pos - len;
1599             int this_len = len;
1600             unsigned char *p = pat;
1601
1602             if (this_pos < lim)
1603               goto stop;
1604
1605             while (this_len > 0)
1606               {
1607                 int pat_ch = *p++;
1608                 int buf_ch = FETCH_BYTE (this_pos);
1609                 TRANSLATE (buf_ch, trt, buf_ch);
1610
1611                 if (buf_ch != pat_ch)
1612                   break;
1613                 this_len--;
1614                 this_pos++;
1615               }
1616
1617             if (this_len == 0)
1618               {
1619                 match_byte = len;
1620                 pos -= len;
1621                 break;
1622               }
1623
1624             pos--;
1625           }
1626
1627         n++;
1628       }
1629
1630  stop:
1631   if (n == 0)
1632     {
1633       if (forward)
1634         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1635       else
1636         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1637
1638       return pos;
1639     }
1640   else if (n > 0)
1641     return -n;
1642   else
1643     return n;
1644 }
1645 \f
1646 /* Do Boyer-Moore search N times for the string BASE_PAT,
1647    whose length is LEN/LEN_BYTE,
1648    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1649    DIRECTION says which direction we search in.
1650    TRT and INVERSE_TRT are translation tables.
1651    Characters in PAT are already translated by TRT.
1652
1653    This kind of search works if all the characters in BASE_PAT that
1654    have nontrivial translation are the same aside from the last byte.
1655    This makes it possible to translate just the last byte of a
1656    character, and do so after just a simple test of the context.
1657    CHAR_BASE is nonzero if there is such a non-ASCII character.
1658
1659    If that criterion is not satisfied, do not call this function.  */
1660
1661 static EMACS_INT
1662 boyer_moore (int n, unsigned char *base_pat, int len, int len_byte,
1663              Lisp_Object trt, Lisp_Object inverse_trt,
1664              EMACS_INT pos, EMACS_INT pos_byte,
1665              EMACS_INT lim, EMACS_INT lim_byte, int char_base)
1666 {
1667   int direction = ((n > 0) ? 1 : -1);
1668   register int dirlen;
1669   EMACS_INT limit;
1670   int stride_for_teases = 0;
1671   int BM_tab[0400];
1672   register unsigned char *cursor, *p_limit;
1673   register int i, j;
1674   unsigned char *pat, *pat_end;
1675   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1676
1677   unsigned char simple_translate[0400];
1678   /* These are set to the preceding bytes of a byte to be translated
1679      if char_base is nonzero.  As the maximum byte length of a
1680      multibyte character is 5, we have to check at most four previous
1681      bytes.  */
1682   int translate_prev_byte1 = 0;
1683   int translate_prev_byte2 = 0;
1684   int translate_prev_byte3 = 0;
1685   int translate_prev_byte4 = 0;
1686
1687   /* The general approach is that we are going to maintain that we know
1688      the first (closest to the present position, in whatever direction
1689      we're searching) character that could possibly be the last
1690      (furthest from present position) character of a valid match.  We
1691      advance the state of our knowledge by looking at that character
1692      and seeing whether it indeed matches the last character of the
1693      pattern.  If it does, we take a closer look.  If it does not, we
1694      move our pointer (to putative last characters) as far as is
1695      logically possible.  This amount of movement, which I call a
1696      stride, will be the length of the pattern if the actual character
1697      appears nowhere in the pattern, otherwise it will be the distance
1698      from the last occurrence of that character to the end of the
1699      pattern.  If the amount is zero we have a possible match.  */
1700
1701   /* Here we make a "mickey mouse" BM table.  The stride of the search
1702      is determined only by the last character of the putative match.
1703      If that character does not match, we will stride the proper
1704      distance to propose a match that superimposes it on the last
1705      instance of a character that matches it (per trt), or misses
1706      it entirely if there is none. */
1707
1708   dirlen = len_byte * direction;
1709
1710   /* Record position after the end of the pattern.  */
1711   pat_end = base_pat + len_byte;
1712   /* BASE_PAT points to a character that we start scanning from.
1713      It is the first character in a forward search,
1714      the last character in a backward search.  */
1715   if (direction < 0)
1716     base_pat = pat_end - 1;
1717
1718   /* A character that does not appear in the pattern induces a
1719      stride equal to the pattern length.  */
1720   for (i = 0; i < 0400; i++)
1721     BM_tab[i] = dirlen;
1722
1723   /* We use this for translation, instead of TRT itself.
1724      We fill this in to handle the characters that actually
1725      occur in the pattern.  Others don't matter anyway!  */
1726   for (i = 0; i < 0400; i++)
1727     simple_translate[i] = i;
1728
1729   if (char_base)
1730     {
1731       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1732          byte following them are the target of translation.  */
1733       unsigned char str[MAX_MULTIBYTE_LENGTH];
1734       int len = CHAR_STRING (char_base, str);
1735
1736       translate_prev_byte1 = str[len - 2];
1737       if (len > 2)
1738         {
1739           translate_prev_byte2 = str[len - 3];
1740           if (len > 3)
1741             {
1742               translate_prev_byte3 = str[len - 4];
1743               if (len > 4)
1744                 translate_prev_byte4 = str[len - 5];
1745             }
1746         }
1747     }
1748
1749   i = 0;
1750   while (i != dirlen)
1751     {
1752       unsigned char *ptr = base_pat + i;
1753       i += direction;
1754       if (! NILP (trt))
1755         {
1756           /* If the byte currently looking at is the last of a
1757              character to check case-equivalents, set CH to that
1758              character.  An ASCII character and a non-ASCII character
1759              matching with CHAR_BASE are to be checked.  */
1760           int ch = -1;
1761
1762           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1763             ch = *ptr;
1764           else if (char_base
1765                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1766             {
1767               unsigned char *charstart = ptr - 1;
1768
1769               while (! (CHAR_HEAD_P (*charstart)))
1770                 charstart--;
1771               ch = STRING_CHAR (charstart);
1772               if (char_base != (ch & ~0x3F))
1773                 ch = -1;
1774             }
1775
1776           if (ch >= 0200)
1777             j = (ch & 0x3F) | 0200;
1778           else
1779             j = *ptr;
1780
1781           if (i == dirlen)
1782             stride_for_teases = BM_tab[j];
1783
1784           BM_tab[j] = dirlen - i;
1785           /* A translation table is accompanied by its inverse -- see */
1786           /* comment following downcase_table for details */
1787           if (ch >= 0)
1788             {
1789               int starting_ch = ch;
1790               int starting_j = j;
1791
1792               while (1)
1793                 {
1794                   TRANSLATE (ch, inverse_trt, ch);
1795                   if (ch >= 0200)
1796                     j = (ch & 0x3F) | 0200;
1797                   else
1798                     j = ch;
1799
1800                   /* For all the characters that map into CH,
1801                      set up simple_translate to map the last byte
1802                      into STARTING_J.  */
1803                   simple_translate[j] = starting_j;
1804                   if (ch == starting_ch)
1805                     break;
1806                   BM_tab[j] = dirlen - i;
1807                 }
1808             }
1809         }
1810       else
1811         {
1812           j = *ptr;
1813
1814           if (i == dirlen)
1815             stride_for_teases = BM_tab[j];
1816           BM_tab[j] = dirlen - i;
1817         }
1818       /* stride_for_teases tells how much to stride if we get a
1819          match on the far character but are subsequently
1820          disappointed, by recording what the stride would have been
1821          for that character if the last character had been
1822          different.  */
1823     }
1824   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1825   /* loop invariant - POS_BYTE points at where last char (first
1826      char if reverse) of pattern would align in a possible match.  */
1827   while (n != 0)
1828     {
1829       EMACS_INT tail_end;
1830       unsigned char *tail_end_ptr;
1831
1832       /* It's been reported that some (broken) compiler thinks that
1833          Boolean expressions in an arithmetic context are unsigned.
1834          Using an explicit ?1:0 prevents this.  */
1835       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1836           < 0)
1837         return (n * (0 - direction));
1838       /* First we do the part we can by pointers (maybe nothing) */
1839       QUIT;
1840       pat = base_pat;
1841       limit = pos_byte - dirlen + direction;
1842       if (direction > 0)
1843         {
1844           limit = BUFFER_CEILING_OF (limit);
1845           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1846              can take on without hitting edge of buffer or the gap.  */
1847           limit = min (limit, pos_byte + 20000);
1848           limit = min (limit, lim_byte - 1);
1849         }
1850       else
1851         {
1852           limit = BUFFER_FLOOR_OF (limit);
1853           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1854              can take on without hitting edge of buffer or the gap.  */
1855           limit = max (limit, pos_byte - 20000);
1856           limit = max (limit, lim_byte);
1857         }
1858       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1859       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1860
1861       if ((limit - pos_byte) * direction > 20)
1862         {
1863           unsigned char *p2;
1864
1865           p_limit = BYTE_POS_ADDR (limit);
1866           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1867           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1868           while (1)             /* use one cursor setting as long as i can */
1869             {
1870               if (direction > 0) /* worth duplicating */
1871                 {
1872                   while (cursor <= p_limit)
1873                     {
1874                       if (BM_tab[*cursor] == 0)
1875                         goto hit;
1876                       cursor += BM_tab[*cursor];
1877                     }
1878                 }
1879               else
1880                 {
1881                   while (cursor >= p_limit)
1882                     {
1883                       if (BM_tab[*cursor] == 0)
1884                         goto hit;
1885                       cursor += BM_tab[*cursor];
1886                     }
1887                 }
1888               /* If you are here, cursor is beyond the end of the
1889                  searched region.  You fail to match within the
1890                  permitted region and would otherwise try a character
1891                  beyond that region.  */
1892               break;
1893
1894             hit:
1895               i = dirlen - direction;
1896               if (! NILP (trt))
1897                 {
1898                   while ((i -= direction) + direction != 0)
1899                     {
1900                       int ch;
1901                       cursor -= direction;
1902                       /* Translate only the last byte of a character.  */
1903                       if (! multibyte
1904                           || ((cursor == tail_end_ptr
1905                                || CHAR_HEAD_P (cursor[1]))
1906                               && (CHAR_HEAD_P (cursor[0])
1907                                   /* Check if this is the last byte of
1908                                      a translable character.  */
1909                                   || (translate_prev_byte1 == cursor[-1]
1910                                       && (CHAR_HEAD_P (translate_prev_byte1)
1911                                           || (translate_prev_byte2 == cursor[-2]
1912                                               && (CHAR_HEAD_P (translate_prev_byte2)
1913                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1914                         ch = simple_translate[*cursor];
1915                       else
1916                         ch = *cursor;
1917                       if (pat[i] != ch)
1918                         break;
1919                     }
1920                 }
1921               else
1922                 {
1923                   while ((i -= direction) + direction != 0)
1924                     {
1925                       cursor -= direction;
1926                       if (pat[i] != *cursor)
1927                         break;
1928                     }
1929                 }
1930               cursor += dirlen - i - direction; /* fix cursor */
1931               if (i + direction == 0)
1932                 {
1933                   EMACS_INT position, start, end;
1934
1935                   cursor -= direction;
1936
1937                   position = pos_byte + cursor - p2 + ((direction > 0)
1938                                                        ? 1 - len_byte : 0);
1939                   set_search_regs (position, len_byte);
1940
1941                   if (NILP (Vinhibit_changing_match_data))
1942                     {
1943                       start = search_regs.start[0];
1944                       end = search_regs.end[0];
1945                     }
1946                   else
1947                     /* If Vinhibit_changing_match_data is non-nil,
1948                        search_regs will not be changed.  So let's
1949                        compute start and end here.  */
1950                     {
1951                       start = BYTE_TO_CHAR (position);
1952                       end = BYTE_TO_CHAR (position + len_byte);
1953                     }
1954
1955                   if ((n -= direction) != 0)
1956                     cursor += dirlen; /* to resume search */
1957                   else
1958                     return direction > 0 ? end : start;
1959                 }
1960               else
1961                 cursor += stride_for_teases; /* <sigh> we lose -  */
1962             }
1963           pos_byte += cursor - p2;
1964         }
1965       else
1966         /* Now we'll pick up a clump that has to be done the hard
1967            way because it covers a discontinuity.  */
1968         {
1969           limit = ((direction > 0)
1970                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1971                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1972           limit = ((direction > 0)
1973                    ? min (limit + len_byte, lim_byte - 1)
1974                    : max (limit - len_byte, lim_byte));
1975           /* LIMIT is now the last value POS_BYTE can have
1976              and still be valid for a possible match.  */
1977           while (1)
1978             {
1979               /* This loop can be coded for space rather than
1980                  speed because it will usually run only once.
1981                  (the reach is at most len + 21, and typically
1982                  does not exceed len).  */
1983               while ((limit - pos_byte) * direction >= 0)
1984                 {
1985                   int ch = FETCH_BYTE (pos_byte);
1986                   if (BM_tab[ch] == 0)
1987                     goto hit2;
1988                   pos_byte += BM_tab[ch];
1989                 }
1990               break;    /* ran off the end */
1991
1992             hit2:
1993               /* Found what might be a match.  */
1994               i = dirlen - direction;
1995               while ((i -= direction) + direction != 0)
1996                 {
1997                   int ch;
1998                   unsigned char *ptr;
1999                   pos_byte -= direction;
2000                   ptr = BYTE_POS_ADDR (pos_byte);
2001                   /* Translate only the last byte of a character.  */
2002                   if (! multibyte
2003                       || ((ptr == tail_end_ptr
2004                            || CHAR_HEAD_P (ptr[1]))
2005                           && (CHAR_HEAD_P (ptr[0])
2006                               /* Check if this is the last byte of a
2007                                  translable character.  */
2008                               || (translate_prev_byte1 == ptr[-1]
2009                                   && (CHAR_HEAD_P (translate_prev_byte1)
2010                                       || (translate_prev_byte2 == ptr[-2]
2011                                           && (CHAR_HEAD_P (translate_prev_byte2)
2012                                               || translate_prev_byte3 == ptr[-3])))))))
2013                     ch = simple_translate[*ptr];
2014                   else
2015                     ch = *ptr;
2016                   if (pat[i] != ch)
2017                     break;
2018                 }
2019               /* Above loop has moved POS_BYTE part or all the way
2020                  back to the first pos (last pos if reverse).
2021                  Set it once again at the last (first if reverse) char.  */
2022               pos_byte += dirlen - i - direction;
2023               if (i + direction == 0)
2024                 {
2025                   EMACS_INT position, start, end;
2026                   pos_byte -= direction;
2027
2028                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2029                   set_search_regs (position, len_byte);
2030
2031                   if (NILP (Vinhibit_changing_match_data))
2032                     {
2033                       start = search_regs.start[0];
2034                       end = search_regs.end[0];
2035                     }
2036                   else
2037                     /* If Vinhibit_changing_match_data is non-nil,
2038                        search_regs will not be changed.  So let's
2039                        compute start and end here.  */
2040                     {
2041                       start = BYTE_TO_CHAR (position);
2042                       end = BYTE_TO_CHAR (position + len_byte);
2043                     }
2044
2045                   if ((n -= direction) != 0)
2046                     pos_byte += dirlen; /* to resume search */
2047                   else
2048                     return direction > 0 ? end : start;
2049                 }
2050               else
2051                 pos_byte += stride_for_teases;
2052             }
2053           }
2054       /* We have done one clump.  Can we continue? */
2055       if ((lim_byte - pos_byte) * direction < 0)
2056         return ((0 - n) * direction);
2057     }
2058   return BYTE_TO_CHAR (pos_byte);
2059 }
2060
2061 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2062    for the overall match just found in the current buffer.
2063    Also clear out the match data for registers 1 and up.  */
2064
2065 static void
2066 set_search_regs (EMACS_INT beg_byte, EMACS_INT nbytes)
2067 {
2068   int i;
2069
2070   if (!NILP (Vinhibit_changing_match_data))
2071     return;
2072
2073   /* Make sure we have registers in which to store
2074      the match position.  */
2075   if (search_regs.num_regs == 0)
2076     {
2077       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2078       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2079       search_regs.num_regs = 2;
2080     }
2081
2082   /* Clear out the other registers.  */
2083   for (i = 1; i < search_regs.num_regs; i++)
2084     {
2085       search_regs.start[i] = -1;
2086       search_regs.end[i] = -1;
2087     }
2088
2089   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2090   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2091   XSETBUFFER (last_thing_searched, current_buffer);
2092 }
2093 \f
2094 /* Given STRING, a string of words separated by word delimiters,
2095    compute a regexp that matches those exact words separated by
2096    arbitrary punctuation.  If LAX is nonzero, the end of the string
2097    need not match a word boundary unless it ends in whitespace.  */
2098
2099 static Lisp_Object
2100 wordify (Lisp_Object string, int lax)
2101 {
2102   register unsigned char *p, *o;
2103   register int i, i_byte, len, punct_count = 0, word_count = 0;
2104   Lisp_Object val;
2105   int prev_c = 0;
2106   int adjust, whitespace_at_end;
2107
2108   CHECK_STRING (string);
2109   p = SDATA (string);
2110   len = SCHARS (string);
2111
2112   for (i = 0, i_byte = 0; i < len; )
2113     {
2114       int c;
2115
2116       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2117
2118       if (SYNTAX (c) != Sword)
2119         {
2120           punct_count++;
2121           if (i > 0 && SYNTAX (prev_c) == Sword)
2122             word_count++;
2123         }
2124
2125       prev_c = c;
2126     }
2127
2128   if (SYNTAX (prev_c) == Sword)
2129     {
2130       word_count++;
2131       whitespace_at_end = 0;
2132     }
2133   else
2134     whitespace_at_end = 1;
2135
2136   if (!word_count)
2137     return empty_unibyte_string;
2138
2139   adjust = - punct_count + 5 * (word_count - 1)
2140     + ((lax && !whitespace_at_end) ? 2 : 4);
2141   if (STRING_MULTIBYTE (string))
2142     val = make_uninit_multibyte_string (len + adjust,
2143                                         SBYTES (string)
2144                                         + adjust);
2145   else
2146     val = make_uninit_string (len + adjust);
2147
2148   o = SDATA (val);
2149   *o++ = '\\';
2150   *o++ = 'b';
2151   prev_c = 0;
2152
2153   for (i = 0, i_byte = 0; i < len; )
2154     {
2155       int c;
2156       int i_byte_orig = i_byte;
2157
2158       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2159
2160       if (SYNTAX (c) == Sword)
2161         {
2162           memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
2163           o += i_byte - i_byte_orig;
2164         }
2165       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2166         {
2167           *o++ = '\\';
2168           *o++ = 'W';
2169           *o++ = '\\';
2170           *o++ = 'W';
2171           *o++ = '*';
2172         }
2173
2174       prev_c = c;
2175     }
2176
2177   if (!lax || whitespace_at_end)
2178     {
2179       *o++ = '\\';
2180       *o++ = 'b';
2181     }
2182
2183   return val;
2184 }
2185 \f
2186 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2187        "MSearch backward: ",
2188        doc: /* Search backward from point for STRING.
2189 Set point to the beginning of the occurrence found, and return point.
2190 An optional second argument bounds the search; it is a buffer position.
2191 The match found must not extend before that position.
2192 Optional third argument, if t, means if fail just return nil (no error).
2193  If not nil and not t, position at limit of search and return nil.
2194 Optional fourth argument is repeat count--search for successive occurrences.
2195
2196 Search case-sensitivity is determined by the value of the variable
2197 `case-fold-search', which see.
2198
2199 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2200   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2201 {
2202   return search_command (string, bound, noerror, count, -1, 0, 0);
2203 }
2204
2205 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2206        doc: /* Search forward from point for STRING.
2207 Set point to the end of the occurrence found, and return point.
2208 An optional second argument bounds the search; it is a buffer position.
2209 The match found must not extend after that position.  A value of nil is
2210   equivalent to (point-max).
2211 Optional third argument, if t, means if fail just return nil (no error).
2212   If not nil and not t, move to limit of search and return nil.
2213 Optional fourth argument is repeat count--search for successive occurrences.
2214
2215 Search case-sensitivity is determined by the value of the variable
2216 `case-fold-search', which see.
2217
2218 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2219   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2220 {
2221   return search_command (string, bound, noerror, count, 1, 0, 0);
2222 }
2223
2224 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2225        "sWord search backward: ",
2226        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2227 Set point to the beginning of the occurrence found, and return point.
2228 An optional second argument bounds the search; it is a buffer position.
2229 The match found must not extend before that position.
2230 Optional third argument, if t, means if fail just return nil (no error).
2231   If not nil and not t, move to limit of search and return nil.
2232 Optional fourth argument is repeat count--search for successive occurrences.  */)
2233   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2234 {
2235   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2236 }
2237
2238 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2239        "sWord search: ",
2240        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2241 Set point to the end of the occurrence found, and return point.
2242 An optional second argument bounds the search; it is a buffer position.
2243 The match found must not extend after that position.
2244 Optional third argument, if t, means if fail just return nil (no error).
2245   If not nil and not t, move to limit of search and return nil.
2246 Optional fourth argument is repeat count--search for successive occurrences.  */)
2247   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2248 {
2249   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2250 }
2251
2252 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2253        "sWord search backward: ",
2254        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2255 Set point to the beginning of the occurrence found, and return point.
2256
2257 Unlike `word-search-backward', the end of STRING need not match a word
2258 boundary unless it ends in whitespace.
2259
2260 An optional second argument bounds the search; it is a buffer position.
2261 The match found must not extend before that position.
2262 Optional third argument, if t, means if fail just return nil (no error).
2263   If not nil and not t, move to limit of search and return nil.
2264 Optional fourth argument is repeat count--search for successive occurrences.  */)
2265   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2266 {
2267   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2268 }
2269
2270 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2271        "sWord search: ",
2272        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2273 Set point to the end of the occurrence found, and return point.
2274
2275 Unlike `word-search-forward', the end of STRING need not match a word
2276 boundary unless it ends in whitespace.
2277
2278 An optional second argument bounds the search; it is a buffer position.
2279 The match found must not extend after that position.
2280 Optional third argument, if t, means if fail just return nil (no error).
2281   If not nil and not t, move to limit of search and return nil.
2282 Optional fourth argument is repeat count--search for successive occurrences.  */)
2283   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2284 {
2285   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2286 }
2287
2288 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2289        "sRE search backward: ",
2290        doc: /* Search backward from point for match for regular expression REGEXP.
2291 Set point to the beginning of the match, and return point.
2292 The match found is the one starting last in the buffer
2293 and yet ending before the origin of the search.
2294 An optional second argument bounds the search; it is a buffer position.
2295 The match found must start at or after that position.
2296 Optional third argument, if t, means if fail just return nil (no error).
2297   If not nil and not t, move to limit of search and return nil.
2298 Optional fourth argument is repeat count--search for successive occurrences.
2299 See also the functions `match-beginning', `match-end', `match-string',
2300 and `replace-match'.  */)
2301   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2302 {
2303   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2304 }
2305
2306 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2307        "sRE search: ",
2308        doc: /* Search forward from point for regular expression REGEXP.
2309 Set point to the end of the occurrence found, and return point.
2310 An optional second argument bounds the search; it is a buffer position.
2311 The match found must not extend after that position.
2312 Optional third argument, if t, means if fail just return nil (no error).
2313   If not nil and not t, move to limit of search and return nil.
2314 Optional fourth argument is repeat count--search for successive occurrences.
2315 See also the functions `match-beginning', `match-end', `match-string',
2316 and `replace-match'.  */)
2317   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2318 {
2319   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2320 }
2321
2322 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2323        "sPosix search backward: ",
2324        doc: /* Search backward from point for match for regular expression REGEXP.
2325 Find the longest match in accord with Posix regular expression rules.
2326 Set point to the beginning of the match, and return point.
2327 The match found is the one starting last in the buffer
2328 and yet ending before the origin of the search.
2329 An optional second argument bounds the search; it is a buffer position.
2330 The match found must start at or after that position.
2331 Optional third argument, if t, means if fail just return nil (no error).
2332   If not nil and not t, move to limit of search and return nil.
2333 Optional fourth argument is repeat count--search for successive occurrences.
2334 See also the functions `match-beginning', `match-end', `match-string',
2335 and `replace-match'.  */)
2336   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2337 {
2338   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2339 }
2340
2341 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2342        "sPosix search: ",
2343        doc: /* Search forward from point for regular expression REGEXP.
2344 Find the longest match in accord with Posix regular expression rules.
2345 Set point to the end of the occurrence found, and return point.
2346 An optional second argument bounds the search; it is a buffer position.
2347 The match found must not extend after that position.
2348 Optional third argument, if t, means if fail just return nil (no error).
2349   If not nil and not t, move to limit of search and return nil.
2350 Optional fourth argument is repeat count--search for successive occurrences.
2351 See also the functions `match-beginning', `match-end', `match-string',
2352 and `replace-match'.  */)
2353   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2354 {
2355   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2356 }
2357 \f
2358 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2359        doc: /* Replace text matched by last search with NEWTEXT.
2360 Leave point at the end of the replacement text.
2361
2362 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2363 Otherwise maybe capitalize the whole text, or maybe just word initials,
2364 based on the replaced text.
2365 If the replaced text has only capital letters
2366 and has at least one multiletter word, convert NEWTEXT to all caps.
2367 Otherwise if all words are capitalized in the replaced text,
2368 capitalize each word in NEWTEXT.
2369
2370 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2371 Otherwise treat `\\' as special:
2372   `\\&' in NEWTEXT means substitute original matched text.
2373   `\\N' means substitute what matched the Nth `\\(...\\)'.
2374        If Nth parens didn't match, substitute nothing.
2375   `\\\\' means insert one `\\'.
2376 Case conversion does not apply to these substitutions.
2377
2378 FIXEDCASE and LITERAL are optional arguments.
2379
2380 The optional fourth argument STRING can be a string to modify.
2381 This is meaningful when the previous match was done against STRING,
2382 using `string-match'.  When used this way, `replace-match'
2383 creates and returns a new string made by copying STRING and replacing
2384 the part of STRING that was matched.
2385
2386 The optional fifth argument SUBEXP specifies a subexpression;
2387 it says to replace just that subexpression with NEWTEXT,
2388 rather than replacing the entire matched text.
2389 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2390 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2391 NEWTEXT in place of subexp N.
2392 This is useful only after a regular expression search or match,
2393 since only regular expressions have distinguished subexpressions.  */)
2394   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2395 {
2396   enum { nochange, all_caps, cap_initial } case_action;
2397   register int pos, pos_byte;
2398   int some_multiletter_word;
2399   int some_lowercase;
2400   int some_uppercase;
2401   int some_nonuppercase_initial;
2402   register int c, prevc;
2403   int sub;
2404   EMACS_INT opoint, newpoint;
2405
2406   CHECK_STRING (newtext);
2407
2408   if (! NILP (string))
2409     CHECK_STRING (string);
2410
2411   case_action = nochange;       /* We tried an initialization */
2412                                 /* but some C compilers blew it */
2413
2414   if (search_regs.num_regs <= 0)
2415     error ("`replace-match' called before any match found");
2416
2417   if (NILP (subexp))
2418     sub = 0;
2419   else
2420     {
2421       CHECK_NUMBER (subexp);
2422       sub = XINT (subexp);
2423       if (sub < 0 || sub >= search_regs.num_regs)
2424         args_out_of_range (subexp, make_number (search_regs.num_regs));
2425     }
2426
2427   if (NILP (string))
2428     {
2429       if (search_regs.start[sub] < BEGV
2430           || search_regs.start[sub] > search_regs.end[sub]
2431           || search_regs.end[sub] > ZV)
2432         args_out_of_range (make_number (search_regs.start[sub]),
2433                            make_number (search_regs.end[sub]));
2434     }
2435   else
2436     {
2437       if (search_regs.start[sub] < 0
2438           || search_regs.start[sub] > search_regs.end[sub]
2439           || search_regs.end[sub] > SCHARS (string))
2440         args_out_of_range (make_number (search_regs.start[sub]),
2441                            make_number (search_regs.end[sub]));
2442     }
2443
2444   if (NILP (fixedcase))
2445     {
2446       /* Decide how to casify by examining the matched text. */
2447       EMACS_INT last;
2448
2449       pos = search_regs.start[sub];
2450       last = search_regs.end[sub];
2451
2452       if (NILP (string))
2453         pos_byte = CHAR_TO_BYTE (pos);
2454       else
2455         pos_byte = string_char_to_byte (string, pos);
2456
2457       prevc = '\n';
2458       case_action = all_caps;
2459
2460       /* some_multiletter_word is set nonzero if any original word
2461          is more than one letter long. */
2462       some_multiletter_word = 0;
2463       some_lowercase = 0;
2464       some_nonuppercase_initial = 0;
2465       some_uppercase = 0;
2466
2467       while (pos < last)
2468         {
2469           if (NILP (string))
2470             {
2471               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2472               INC_BOTH (pos, pos_byte);
2473             }
2474           else
2475             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2476
2477           if (LOWERCASEP (c))
2478             {
2479               /* Cannot be all caps if any original char is lower case */
2480
2481               some_lowercase = 1;
2482               if (SYNTAX (prevc) != Sword)
2483                 some_nonuppercase_initial = 1;
2484               else
2485                 some_multiletter_word = 1;
2486             }
2487           else if (UPPERCASEP (c))
2488             {
2489               some_uppercase = 1;
2490               if (SYNTAX (prevc) != Sword)
2491                 ;
2492               else
2493                 some_multiletter_word = 1;
2494             }
2495           else
2496             {
2497               /* If the initial is a caseless word constituent,
2498                  treat that like a lowercase initial.  */
2499               if (SYNTAX (prevc) != Sword)
2500                 some_nonuppercase_initial = 1;
2501             }
2502
2503           prevc = c;
2504         }
2505
2506       /* Convert to all caps if the old text is all caps
2507          and has at least one multiletter word.  */
2508       if (! some_lowercase && some_multiletter_word)
2509         case_action = all_caps;
2510       /* Capitalize each word, if the old text has all capitalized words.  */
2511       else if (!some_nonuppercase_initial && some_multiletter_word)
2512         case_action = cap_initial;
2513       else if (!some_nonuppercase_initial && some_uppercase)
2514         /* Should x -> yz, operating on X, give Yz or YZ?
2515            We'll assume the latter.  */
2516         case_action = all_caps;
2517       else
2518         case_action = nochange;
2519     }
2520
2521   /* Do replacement in a string.  */
2522   if (!NILP (string))
2523     {
2524       Lisp_Object before, after;
2525
2526       before = Fsubstring (string, make_number (0),
2527                            make_number (search_regs.start[sub]));
2528       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2529
2530       /* Substitute parts of the match into NEWTEXT
2531          if desired.  */
2532       if (NILP (literal))
2533         {
2534           EMACS_INT lastpos = 0;
2535           EMACS_INT lastpos_byte = 0;
2536           /* We build up the substituted string in ACCUM.  */
2537           Lisp_Object accum;
2538           Lisp_Object middle;
2539           int length = SBYTES (newtext);
2540
2541           accum = Qnil;
2542
2543           for (pos_byte = 0, pos = 0; pos_byte < length;)
2544             {
2545               int substart = -1;
2546               int subend = 0;
2547               int delbackslash = 0;
2548
2549               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2550
2551               if (c == '\\')
2552                 {
2553                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2554
2555                   if (c == '&')
2556                     {
2557                       substart = search_regs.start[sub];
2558                       subend = search_regs.end[sub];
2559                     }
2560                   else if (c >= '1' && c <= '9')
2561                     {
2562                       if (search_regs.start[c - '0'] >= 0
2563                           && c <= search_regs.num_regs + '0')
2564                         {
2565                           substart = search_regs.start[c - '0'];
2566                           subend = search_regs.end[c - '0'];
2567                         }
2568                       else
2569                         {
2570                           /* If that subexp did not match,
2571                              replace \\N with nothing.  */
2572                           substart = 0;
2573                           subend = 0;
2574                         }
2575                     }
2576                   else if (c == '\\')
2577                     delbackslash = 1;
2578                   else
2579                     error ("Invalid use of `\\' in replacement text");
2580                 }
2581               if (substart >= 0)
2582                 {
2583                   if (pos - 2 != lastpos)
2584                     middle = substring_both (newtext, lastpos,
2585                                              lastpos_byte,
2586                                              pos - 2, pos_byte - 2);
2587                   else
2588                     middle = Qnil;
2589                   accum = concat3 (accum, middle,
2590                                    Fsubstring (string,
2591                                                make_number (substart),
2592                                                make_number (subend)));
2593                   lastpos = pos;
2594                   lastpos_byte = pos_byte;
2595                 }
2596               else if (delbackslash)
2597                 {
2598                   middle = substring_both (newtext, lastpos,
2599                                            lastpos_byte,
2600                                            pos - 1, pos_byte - 1);
2601
2602                   accum = concat2 (accum, middle);
2603                   lastpos = pos;
2604                   lastpos_byte = pos_byte;
2605                 }
2606             }
2607
2608           if (pos != lastpos)
2609             middle = substring_both (newtext, lastpos,
2610                                      lastpos_byte,
2611                                      pos, pos_byte);
2612           else
2613             middle = Qnil;
2614
2615           newtext = concat2 (accum, middle);
2616         }
2617
2618       /* Do case substitution in NEWTEXT if desired.  */
2619       if (case_action == all_caps)
2620         newtext = Fupcase (newtext);
2621       else if (case_action == cap_initial)
2622         newtext = Fupcase_initials (newtext);
2623
2624       return concat3 (before, newtext, after);
2625     }
2626
2627   /* Record point, then move (quietly) to the start of the match.  */
2628   if (PT >= search_regs.end[sub])
2629     opoint = PT - ZV;
2630   else if (PT > search_regs.start[sub])
2631     opoint = search_regs.end[sub] - ZV;
2632   else
2633     opoint = PT;
2634
2635   /* If we want non-literal replacement,
2636      perform substitution on the replacement string.  */
2637   if (NILP (literal))
2638     {
2639       int length = SBYTES (newtext);
2640       unsigned char *substed;
2641       int substed_alloc_size, substed_len;
2642       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2643       int str_multibyte = STRING_MULTIBYTE (newtext);
2644       Lisp_Object rev_tbl;
2645       int really_changed = 0;
2646
2647       rev_tbl = Qnil;
2648
2649       substed_alloc_size = length * 2 + 100;
2650       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2651       substed_len = 0;
2652
2653       /* Go thru NEWTEXT, producing the actual text to insert in
2654          SUBSTED while adjusting multibyteness to that of the current
2655          buffer.  */
2656
2657       for (pos_byte = 0, pos = 0; pos_byte < length;)
2658         {
2659           unsigned char str[MAX_MULTIBYTE_LENGTH];
2660           unsigned char *add_stuff = NULL;
2661           int add_len = 0;
2662           int idx = -1;
2663
2664           if (str_multibyte)
2665             {
2666               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2667               if (!buf_multibyte)
2668                 c = multibyte_char_to_unibyte (c, rev_tbl);
2669             }
2670           else
2671             {
2672               /* Note that we don't have to increment POS.  */
2673               c = SREF (newtext, pos_byte++);
2674               if (buf_multibyte)
2675                 MAKE_CHAR_MULTIBYTE (c);
2676             }
2677
2678           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2679              or set IDX to a match index, which means put that part
2680              of the buffer text into SUBSTED.  */
2681
2682           if (c == '\\')
2683             {
2684               really_changed = 1;
2685
2686               if (str_multibyte)
2687                 {
2688                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2689                                                       pos, pos_byte);
2690                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2691                     c = multibyte_char_to_unibyte (c, rev_tbl);
2692                 }
2693               else
2694                 {
2695                   c = SREF (newtext, pos_byte++);
2696                   if (buf_multibyte)
2697                     MAKE_CHAR_MULTIBYTE (c);
2698                 }
2699
2700               if (c == '&')
2701                 idx = sub;
2702               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2703                 {
2704                   if (search_regs.start[c - '0'] >= 1)
2705                     idx = c - '0';
2706                 }
2707               else if (c == '\\')
2708                 add_len = 1, add_stuff = "\\";
2709               else
2710                 {
2711                   xfree (substed);
2712                   error ("Invalid use of `\\' in replacement text");
2713                 }
2714             }
2715           else
2716             {
2717               add_len = CHAR_STRING (c, str);
2718               add_stuff = str;
2719             }
2720
2721           /* If we want to copy part of a previous match,
2722              set up ADD_STUFF and ADD_LEN to point to it.  */
2723           if (idx >= 0)
2724             {
2725               EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2726               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2727               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2728                 move_gap (search_regs.start[idx]);
2729               add_stuff = BYTE_POS_ADDR (begbyte);
2730             }
2731
2732           /* Now the stuff we want to add to SUBSTED
2733              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2734
2735           /* Make sure SUBSTED is big enough.  */
2736           if (substed_len + add_len >= substed_alloc_size)
2737             {
2738               substed_alloc_size = substed_len + add_len + 500;
2739               substed = (unsigned char *) xrealloc (substed,
2740                                                     substed_alloc_size + 1);
2741             }
2742
2743           /* Now add to the end of SUBSTED.  */
2744           if (add_stuff)
2745             {
2746               memcpy (substed + substed_len, add_stuff, add_len);
2747               substed_len += add_len;
2748             }
2749         }
2750
2751       if (really_changed)
2752         {
2753           if (buf_multibyte)
2754             {
2755               int nchars = multibyte_chars_in_text (substed, substed_len);
2756
2757               newtext = make_multibyte_string (substed, nchars, substed_len);
2758             }
2759           else
2760             newtext = make_unibyte_string (substed, substed_len);
2761         }
2762       xfree (substed);
2763     }
2764
2765   /* Replace the old text with the new in the cleanest possible way.  */
2766   replace_range (search_regs.start[sub], search_regs.end[sub],
2767                  newtext, 1, 0, 1);
2768   newpoint = search_regs.start[sub] + SCHARS (newtext);
2769
2770   if (case_action == all_caps)
2771     Fupcase_region (make_number (search_regs.start[sub]),
2772                     make_number (newpoint));
2773   else if (case_action == cap_initial)
2774     Fupcase_initials_region (make_number (search_regs.start[sub]),
2775                              make_number (newpoint));
2776
2777   /* Adjust search data for this change.  */
2778   {
2779     EMACS_INT oldend = search_regs.end[sub];
2780     EMACS_INT oldstart = search_regs.start[sub];
2781     EMACS_INT change = newpoint - search_regs.end[sub];
2782     int i;
2783
2784     for (i = 0; i < search_regs.num_regs; i++)
2785       {
2786         if (search_regs.start[i] >= oldend)
2787           search_regs.start[i] += change;
2788         else if (search_regs.start[i] > oldstart)
2789           search_regs.start[i] = oldstart;
2790         if (search_regs.end[i] >= oldend)
2791           search_regs.end[i] += change;
2792         else if (search_regs.end[i] > oldstart)
2793           search_regs.end[i] = oldstart;
2794       }
2795   }
2796
2797   /* Put point back where it was in the text.  */
2798   if (opoint <= 0)
2799     TEMP_SET_PT (opoint + ZV);
2800   else
2801     TEMP_SET_PT (opoint);
2802
2803   /* Now move point "officially" to the start of the inserted replacement.  */
2804   move_if_not_intangible (newpoint);
2805
2806   return Qnil;
2807 }
2808 \f
2809 static Lisp_Object
2810 match_limit (Lisp_Object num, int beginningp)
2811 {
2812   register int n;
2813
2814   CHECK_NUMBER (num);
2815   n = XINT (num);
2816   if (n < 0)
2817     args_out_of_range (num, make_number (0));
2818   if (search_regs.num_regs <= 0)
2819     error ("No match data, because no search succeeded");
2820   if (n >= search_regs.num_regs
2821       || search_regs.start[n] < 0)
2822     return Qnil;
2823   return (make_number ((beginningp) ? search_regs.start[n]
2824                                     : search_regs.end[n]));
2825 }
2826
2827 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2828        doc: /* Return position of start of text matched by last search.
2829 SUBEXP, a number, specifies which parenthesized expression in the last
2830   regexp.
2831 Value is nil if SUBEXPth pair didn't match, or there were less than
2832   SUBEXP pairs.
2833 Zero means the entire text matched by the whole regexp or whole string.  */)
2834   (Lisp_Object subexp)
2835 {
2836   return match_limit (subexp, 1);
2837 }
2838
2839 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2840        doc: /* Return position of end of text matched by last search.
2841 SUBEXP, a number, specifies which parenthesized expression in the last
2842   regexp.
2843 Value is nil if SUBEXPth pair didn't match, or there were less than
2844   SUBEXP pairs.
2845 Zero means the entire text matched by the whole regexp or whole string.  */)
2846   (Lisp_Object subexp)
2847 {
2848   return match_limit (subexp, 0);
2849 }
2850
2851 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2852        doc: /* Return a list containing all info on what the last search matched.
2853 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2854 All the elements are markers or nil (nil if the Nth pair didn't match)
2855 if the last match was on a buffer; integers or nil if a string was matched.
2856 Use `set-match-data' to reinstate the data in this list.
2857
2858 If INTEGERS (the optional first argument) is non-nil, always use
2859 integers \(rather than markers) to represent buffer positions.  In
2860 this case, and if the last match was in a buffer, the buffer will get
2861 stored as one additional element at the end of the list.
2862
2863 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2864 enough to hold all the values, and if INTEGERS is non-nil, no consing
2865 is done.
2866
2867 If optional third arg RESEAT is non-nil, any previous markers on the
2868 REUSE list will be modified to point to nowhere.
2869
2870 Return value is undefined if the last search failed.  */)
2871   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2872 {
2873   Lisp_Object tail, prev;
2874   Lisp_Object *data;
2875   int i, len;
2876
2877   if (!NILP (reseat))
2878     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2879       if (MARKERP (XCAR (tail)))
2880         {
2881           unchain_marker (XMARKER (XCAR (tail)));
2882           XSETCAR (tail, Qnil);
2883         }
2884
2885   if (NILP (last_thing_searched))
2886     return Qnil;
2887
2888   prev = Qnil;
2889
2890   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2891                                  * sizeof (Lisp_Object));
2892
2893   len = 0;
2894   for (i = 0; i < search_regs.num_regs; i++)
2895     {
2896       int start = search_regs.start[i];
2897       if (start >= 0)
2898         {
2899           if (EQ (last_thing_searched, Qt)
2900               || ! NILP (integers))
2901             {
2902               XSETFASTINT (data[2 * i], start);
2903               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2904             }
2905           else if (BUFFERP (last_thing_searched))
2906             {
2907               data[2 * i] = Fmake_marker ();
2908               Fset_marker (data[2 * i],
2909                            make_number (start),
2910                            last_thing_searched);
2911               data[2 * i + 1] = Fmake_marker ();
2912               Fset_marker (data[2 * i + 1],
2913                            make_number (search_regs.end[i]),
2914                            last_thing_searched);
2915             }
2916           else
2917             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2918             abort ();
2919
2920           len = 2 * i + 2;
2921         }
2922       else
2923         data[2 * i] = data[2 * i + 1] = Qnil;
2924     }
2925
2926   if (BUFFERP (last_thing_searched) && !NILP (integers))
2927     {
2928       data[len] = last_thing_searched;
2929       len++;
2930     }
2931
2932   /* If REUSE is not usable, cons up the values and return them.  */
2933   if (! CONSP (reuse))
2934     return Flist (len, data);
2935
2936   /* If REUSE is a list, store as many value elements as will fit
2937      into the elements of REUSE.  */
2938   for (i = 0, tail = reuse; CONSP (tail);
2939        i++, tail = XCDR (tail))
2940     {
2941       if (i < len)
2942         XSETCAR (tail, data[i]);
2943       else
2944         XSETCAR (tail, Qnil);
2945       prev = tail;
2946     }
2947
2948   /* If we couldn't fit all value elements into REUSE,
2949      cons up the rest of them and add them to the end of REUSE.  */
2950   if (i < len)
2951     XSETCDR (prev, Flist (len - i, data + i));
2952
2953   return reuse;
2954 }
2955
2956 /* We used to have an internal use variant of `reseat' described as:
2957
2958       If RESEAT is `evaporate', put the markers back on the free list
2959       immediately.  No other references to the markers must exist in this
2960       case, so it is used only internally on the unwind stack and
2961       save-match-data from Lisp.
2962
2963    But it was ill-conceived: those supposedly-internal markers get exposed via
2964    the undo-list, so freeing them here is unsafe.  */
2965
2966 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2967        doc: /* Set internal data on last search match from elements of LIST.
2968 LIST should have been created by calling `match-data' previously.
2969
2970 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2971   (register Lisp_Object list, Lisp_Object reseat)
2972 {
2973   register int i;
2974   register Lisp_Object marker;
2975
2976   if (running_asynch_code)
2977     save_search_regs ();
2978
2979   CHECK_LIST (list);
2980
2981   /* Unless we find a marker with a buffer or an explicit buffer
2982      in LIST, assume that this match data came from a string.  */
2983   last_thing_searched = Qt;
2984
2985   /* Allocate registers if they don't already exist.  */
2986   {
2987     int length = XFASTINT (Flength (list)) / 2;
2988
2989     if (length > search_regs.num_regs)
2990       {
2991         if (search_regs.num_regs == 0)
2992           {
2993             search_regs.start
2994               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2995             search_regs.end
2996               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2997           }
2998         else
2999           {
3000             search_regs.start
3001               = (regoff_t *) xrealloc (search_regs.start,
3002                                        length * sizeof (regoff_t));
3003             search_regs.end
3004               = (regoff_t *) xrealloc (search_regs.end,
3005                                        length * sizeof (regoff_t));
3006           }
3007
3008         for (i = search_regs.num_regs; i < length; i++)
3009           search_regs.start[i] = -1;
3010
3011         search_regs.num_regs = length;
3012       }
3013
3014     for (i = 0; CONSP (list); i++)
3015       {
3016         marker = XCAR (list);
3017         if (BUFFERP (marker))
3018           {
3019             last_thing_searched = marker;
3020             break;
3021           }
3022         if (i >= length)
3023           break;
3024         if (NILP (marker))
3025           {
3026             search_regs.start[i] = -1;
3027             list = XCDR (list);
3028           }
3029         else
3030           {
3031             EMACS_INT from;
3032             Lisp_Object m;
3033
3034             m = marker;
3035             if (MARKERP (marker))
3036               {
3037                 if (XMARKER (marker)->buffer == 0)
3038                   XSETFASTINT (marker, 0);
3039                 else
3040                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3041               }
3042
3043             CHECK_NUMBER_COERCE_MARKER (marker);
3044             from = XINT (marker);
3045
3046             if (!NILP (reseat) && MARKERP (m))
3047               {
3048                 unchain_marker (XMARKER (m));
3049                 XSETCAR (list, Qnil);
3050               }
3051
3052             if ((list = XCDR (list), !CONSP (list)))
3053               break;
3054
3055             m = marker = XCAR (list);
3056
3057             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3058               XSETFASTINT (marker, 0);
3059
3060             CHECK_NUMBER_COERCE_MARKER (marker);
3061             search_regs.start[i] = from;
3062             search_regs.end[i] = XINT (marker);
3063
3064             if (!NILP (reseat) && MARKERP (m))
3065               {
3066                 unchain_marker (XMARKER (m));
3067                 XSETCAR (list, Qnil);
3068               }
3069           }
3070         list = XCDR (list);
3071       }
3072
3073     for (; i < search_regs.num_regs; i++)
3074       search_regs.start[i] = -1;
3075   }
3076
3077   return Qnil;
3078 }
3079
3080 /* If non-zero the match data have been saved in saved_search_regs
3081    during the execution of a sentinel or filter. */
3082 static int search_regs_saved;
3083 static struct re_registers saved_search_regs;
3084 static Lisp_Object saved_last_thing_searched;
3085
3086 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3087    if asynchronous code (filter or sentinel) is running. */
3088 static void
3089 save_search_regs (void)
3090 {
3091   if (!search_regs_saved)
3092     {
3093       saved_search_regs.num_regs = search_regs.num_regs;
3094       saved_search_regs.start = search_regs.start;
3095       saved_search_regs.end = search_regs.end;
3096       saved_last_thing_searched = last_thing_searched;
3097       last_thing_searched = Qnil;
3098       search_regs.num_regs = 0;
3099       search_regs.start = 0;
3100       search_regs.end = 0;
3101
3102       search_regs_saved = 1;
3103     }
3104 }
3105
3106 /* Called upon exit from filters and sentinels. */
3107 void
3108 restore_search_regs (void)
3109 {
3110   if (search_regs_saved)
3111     {
3112       if (search_regs.num_regs > 0)
3113         {
3114           xfree (search_regs.start);
3115           xfree (search_regs.end);
3116         }
3117       search_regs.num_regs = saved_search_regs.num_regs;
3118       search_regs.start = saved_search_regs.start;
3119       search_regs.end = saved_search_regs.end;
3120       last_thing_searched = saved_last_thing_searched;
3121       saved_last_thing_searched = Qnil;
3122       search_regs_saved = 0;
3123     }
3124 }
3125
3126 static Lisp_Object
3127 unwind_set_match_data (Lisp_Object list)
3128 {
3129   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3130   return Fset_match_data (list, Qt);
3131 }
3132
3133 /* Called to unwind protect the match data.  */
3134 void
3135 record_unwind_save_match_data (void)
3136 {
3137   record_unwind_protect (unwind_set_match_data,
3138                          Fmatch_data (Qnil, Qnil, Qnil));
3139 }
3140
3141 /* Quote a string to inactivate reg-expr chars */
3142
3143 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3144        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3145   (Lisp_Object string)
3146 {
3147   register unsigned char *in, *out, *end;
3148   register unsigned char *temp;
3149   int backslashes_added = 0;
3150
3151   CHECK_STRING (string);
3152
3153   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3154
3155   /* Now copy the data into the new string, inserting escapes. */
3156
3157   in = SDATA (string);
3158   end = in + SBYTES (string);
3159   out = temp;
3160
3161   for (; in != end; in++)
3162     {
3163       if (*in == '['
3164           || *in == '*' || *in == '.' || *in == '\\'
3165           || *in == '?' || *in == '+'
3166           || *in == '^' || *in == '$')
3167         *out++ = '\\', backslashes_added++;
3168       *out++ = *in;
3169     }
3170
3171   return make_specified_string (temp,
3172                                 SCHARS (string) + backslashes_added,
3173                                 out - temp,
3174                                 STRING_MULTIBYTE (string));
3175 }
3176 \f
3177 void
3178 syms_of_search (void)
3179 {
3180   register int i;
3181
3182   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3183     {
3184       searchbufs[i].buf.allocated = 100;
3185       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3186       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3187       searchbufs[i].regexp = Qnil;
3188       searchbufs[i].whitespace_regexp = Qnil;
3189       searchbufs[i].syntax_table = Qnil;
3190       staticpro (&searchbufs[i].regexp);
3191       staticpro (&searchbufs[i].whitespace_regexp);
3192       staticpro (&searchbufs[i].syntax_table);
3193       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3194     }
3195   searchbuf_head = &searchbufs[0];
3196
3197   Qsearch_failed = intern_c_string ("search-failed");
3198   staticpro (&Qsearch_failed);
3199   Qinvalid_regexp = intern_c_string ("invalid-regexp");
3200   staticpro (&Qinvalid_regexp);
3201
3202   Fput (Qsearch_failed, Qerror_conditions,
3203         pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
3204   Fput (Qsearch_failed, Qerror_message,
3205         make_pure_c_string ("Search failed"));
3206
3207   Fput (Qinvalid_regexp, Qerror_conditions,
3208         pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
3209   Fput (Qinvalid_regexp, Qerror_message,
3210         make_pure_c_string ("Invalid regexp"));
3211
3212   last_thing_searched = Qnil;
3213   staticpro (&last_thing_searched);
3214
3215   saved_last_thing_searched = Qnil;
3216   staticpro (&saved_last_thing_searched);
3217
3218   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3219       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3220 Some commands use this for user-specified regexps.
3221 Spaces that occur inside character classes or repetition operators
3222 or other such regexp constructs are not replaced with this.
3223 A value of nil (which is the normal value) means treat spaces literally.  */);
3224   Vsearch_spaces_regexp = Qnil;
3225
3226   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3227       doc: /* Internal use only.
3228 If non-nil, the primitive searching and matching functions
3229 such as `looking-at', `string-match', `re-search-forward', etc.,
3230 do not set the match data.  The proper way to use this variable
3231 is to bind it with `let' around a small expression.  */);
3232   Vinhibit_changing_match_data = Qnil;
3233
3234   defsubr (&Slooking_at);
3235   defsubr (&Sposix_looking_at);
3236   defsubr (&Sstring_match);
3237   defsubr (&Sposix_string_match);
3238   defsubr (&Ssearch_forward);
3239   defsubr (&Ssearch_backward);
3240   defsubr (&Sword_search_forward);
3241   defsubr (&Sword_search_backward);
3242   defsubr (&Sword_search_forward_lax);
3243   defsubr (&Sword_search_backward_lax);
3244   defsubr (&Sre_search_forward);
3245   defsubr (&Sre_search_backward);
3246   defsubr (&Sposix_search_forward);
3247   defsubr (&Sposix_search_backward);
3248   defsubr (&Sreplace_match);
3249   defsubr (&Smatch_beginning);
3250   defsubr (&Smatch_end);
3251   defsubr (&Smatch_data);
3252   defsubr (&Sset_match_data);
3253   defsubr (&Sregexp_quote);
3254 }
3255
3256 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3257    (do not change this comment) */