src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "character.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   /* Syntax table for which the regexp applies.  We need this because
  46      of character classes.  If this is t, then the compiled pattern is valid
  47      for any syntax-table.  */
  48   Lisp_Object syntax_table;
  49   struct re_pattern_buffer buf;
  50   char fastmap[0400];
  51   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  52   char posix;
  53 };
  54
  55 /* The instances of that struct.  */
  56 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  57
  58 /* The head of the linked list; points to the most recently used buffer.  */
  59 struct regexp_cache *searchbuf_head;
  60
  61
  62 /* Every call to re_match, etc., must pass &search_regs as the regs
  63    argument unless you can show it is unnecessary (i.e., if re_match
  64    is certainly going to be called again before region-around-match
  65    can be called).
  66
  67    Since the registers are now dynamically allocated, we need to make
  68    sure not to refer to the Nth register before checking that it has
  69    been allocated by checking search_regs.num_regs.
  70
  71    The regex code keeps track of whether it has allocated the search
  72    buffer using bits in the re_pattern_buffer.  This means that whenever
  73    you compile a new pattern, it completely forgets whether it has
  74    allocated any registers, and will allocate new registers the next
  75    time you call a searching or matching function.  Therefore, we need
  76    to call re_set_registers after compiling a new pattern or after
  77    setting the match registers, so that the regex functions will be
  78    able to free or re-allocate it properly.  */
  79 static struct re_registers search_regs;
  80
  81 /* The buffer in which the last search was performed, or
  82    Qt if the last search was done in a string;
  83    Qnil if no searching has been done yet.  */
  84 static Lisp_Object last_thing_searched;
  85
  86 /* error condition signaled when regexp compile_pattern fails */
  87
  88 Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches */
  91 Lisp_Object Qsearch_failed;
  92
  93 Lisp_Object Vsearch_spaces_regexp;
  94
  95 /* If non-nil, the match data will not be changed during call to
  96    searching or matching functions.  This variable is for internal use
  97    only.  */
  98 Lisp_Object Vinhibit_changing_match_data;
  99
 100 static void set_search_regs ();
 101 static void save_search_regs ();
 102 static int simple_search ();
 103 static int boyer_moore ();
 104 static int search_buffer ();
 105 static void matcher_overflow () NO_RETURN;
 106
 107 static void
 108 matcher_overflow ()
 109 {
 110   error ("Stack overflow in regexp matcher");
 111 }
 112
 113 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 114    PATTERN is the pattern to compile.
 115    CP is the place to put the result.
 116    TRANSLATE is a translation table for ignoring case, or nil for none.
 117    REGP is the structure that says where to store the "register"
 118    values that will result from matching this pattern.
 119    If it is 0, we should compile the pattern not to record any
 120    subexpression bounds.
 121    POSIX is nonzero if we want full backtracking (POSIX style)
 122    for this pattern.  0 means backtrack only enough to get a valid match.
 123
 124    The behavior also depends on Vsearch_spaces_regexp.  */
 125
 126 static void
 127 compile_pattern_1 (cp, pattern, translate, regp, posix)
 128      struct regexp_cache *cp;
 129      Lisp_Object pattern;
 130      Lisp_Object translate;
 131      struct re_registers *regp;
 132      int posix;
 133 {
 134   char *val;
 135   reg_syntax_t old;
 136
 137   cp->regexp = Qnil;
 138   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 139   cp->posix = posix;
 140   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 141   cp->buf.charset_unibyte = charset_unibyte;
 142   if (STRINGP (Vsearch_spaces_regexp))
 143     cp->whitespace_regexp = Vsearch_spaces_regexp;
 144   else
 145     cp->whitespace_regexp = Qnil;
 146
 147   /* rms: I think BLOCK_INPUT is not needed here any more,
 148      because regex.c defines malloc to call xmalloc.
 149      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 150      So let's turn it off.  */
 151   /*  BLOCK_INPUT;  */
 152   old = re_set_syntax (RE_SYNTAX_EMACS
 153                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 154
 155   if (STRINGP (Vsearch_spaces_regexp))
 156     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 157   else
 158     re_set_whitespace_regexp (NULL);
 159
 160   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 161                                      SBYTES (pattern), &cp->buf);
 162
 163   /* If the compiled pattern hard codes some of the contents of the
 164      syntax-table, it can only be reused with *this* syntax table.  */
 165   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 166
 167   re_set_whitespace_regexp (NULL);
 168
 169   re_set_syntax (old);
 170   /* UNBLOCK_INPUT;  */
 171   if (val)
 172     xsignal1 (Qinvalid_regexp, build_string (val));
 173
 174   cp->regexp = Fcopy_sequence (pattern);
 175 }
 176
 177 /* Shrink each compiled regexp buffer in the cache
 178    to the size actually used right now.
 179    This is called from garbage collection.  */
 180
 181 void
 182 shrink_regexp_cache ()
 183 {
 184   struct regexp_cache *cp;
 185
 186   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 187     {
 188       cp->buf.allocated = cp->buf.used;
 189       cp->buf.buffer
 190         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 191     }
 192 }
 193
 194 /* Clear the regexp cache w.r.t. a particular syntax table,
 195    because it was changed.
 196    There is no danger of memory leak here because re_compile_pattern
 197    automagically manages the memory in each re_pattern_buffer struct,
 198    based on its `allocated' and `buffer' values.  */
 199 void
 200 clear_regexp_cache ()
 201 {
 202   int i;
 203
 204   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 205     /* It's tempting to compare with the syntax-table we've actually changd,
 206        but it's not sufficient because char-table inheritance mewans that
 207        modifying one syntax-table can change others at the same time.  */
 208     if (!EQ (searchbufs[i].syntax_table, Qt))
 209       searchbufs[i].regexp = Qnil;
 210 }
 211
 212 /* Compile a regexp if necessary, but first check to see if there's one in
 213    the cache.
 214    PATTERN is the pattern to compile.
 215    TRANSLATE is a translation table for ignoring case, or nil for none.
 216    REGP is the structure that says where to store the "register"
 217    values that will result from matching this pattern.
 218    If it is 0, we should compile the pattern not to record any
 219    subexpression bounds.
 220    POSIX is nonzero if we want full backtracking (POSIX style)
 221    for this pattern.  0 means backtrack only enough to get a valid match.  */
 222
 223 struct re_pattern_buffer *
 224 compile_pattern (pattern, regp, translate, posix, multibyte)
 225      Lisp_Object pattern;
 226      struct re_registers *regp;
 227      Lisp_Object translate;
 228      int posix, multibyte;
 229 {
 230   struct regexp_cache *cp, **cpp;
 231
 232   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 233     {
 234       cp = *cpp;
 235       /* Entries are initialized to nil, and may be set to nil by
 236          compile_pattern_1 if the pattern isn't valid.  Don't apply
 237          string accessors in those cases.  However, compile_pattern_1
 238          is only applied to the cache entry we pick here to reuse.  So
 239          nil should never appear before a non-nil entry.  */
 240       if (NILP (cp->regexp))
 241         goto compile_it;
 242       if (SCHARS (cp->regexp) == SCHARS (pattern)
 243           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 244           && !NILP (Fstring_equal (cp->regexp, pattern))
 245           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 246           && cp->posix == posix
 247           && (EQ (cp->syntax_table, Qt)
 248               || EQ (cp->syntax_table, current_buffer->syntax_table))
 249           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 250           && cp->buf.charset_unibyte == charset_unibyte)
 251         break;
 252
 253       /* If we're at the end of the cache, compile into the nil cell
 254          we found, or the last (least recently used) cell with a
 255          string value.  */
 256       if (cp->next == 0)
 257         {
 258         compile_it:
 259           compile_pattern_1 (cp, pattern, translate, regp, posix);
 260           break;
 261         }
 262     }
 263
 264   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 265      either because we found it in the cache or because we just compiled it.
 266      Move it to the front of the queue to mark it as most recently used.  */
 267   *cpp = cp->next;
 268   cp->next = searchbuf_head;
 269   searchbuf_head = cp;
 270
 271   /* Advise the searching functions about the space we have allocated
 272      for register data.  */
 273   if (regp)
 274     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 275
 276   /* The compiled pattern can be used both for mulitbyte and unibyte
 277      target.  But, we have to tell which the pattern is used for. */
 278   cp->buf.target_multibyte = multibyte;
 279
 280   return &cp->buf;
 281 }
 282
 283 \f
 284 static Lisp_Object
 285 looking_at_1 (string, posix)
 286      Lisp_Object string;
 287      int posix;
 288 {
 289   Lisp_Object val;
 290   unsigned char *p1, *p2;
 291   int s1, s2;
 292   register int i;
 293   struct re_pattern_buffer *bufp;
 294
 295   if (running_asynch_code)
 296     save_search_regs ();
 297
 298   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 299   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 300     = current_buffer->case_eqv_table;
 301
 302   CHECK_STRING (string);
 303   bufp = compile_pattern (string,
 304                           (NILP (Vinhibit_changing_match_data)
 305                            ? &search_regs : NULL),
 306                           (!NILP (current_buffer->case_fold_search)
 307                            ? current_buffer->case_canon_table : Qnil),
 308                           posix,
 309                           !NILP (current_buffer->enable_multibyte_characters));
 310
 311   immediate_quit = 1;
 312   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 313
 314   /* Get pointers and sizes of the two strings
 315      that make up the visible portion of the buffer. */
 316
 317   p1 = BEGV_ADDR;
 318   s1 = GPT_BYTE - BEGV_BYTE;
 319   p2 = GAP_END_ADDR;
 320   s2 = ZV_BYTE - GPT_BYTE;
 321   if (s1 < 0)
 322     {
 323       p2 = p1;
 324       s2 = ZV_BYTE - BEGV_BYTE;
 325       s1 = 0;
 326     }
 327   if (s2 < 0)
 328     {
 329       s1 = ZV_BYTE - BEGV_BYTE;
 330       s2 = 0;
 331     }
 332
 333   re_match_object = Qnil;
 334
 335   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 336                   PT_BYTE - BEGV_BYTE,
 337                   (NILP (Vinhibit_changing_match_data)
 338                    ? &search_regs : NULL),
 339                   ZV_BYTE - BEGV_BYTE);
 340   immediate_quit = 0;
 341
 342   if (i == -2)
 343     matcher_overflow ();
 344
 345   val = (0 <= i ? Qt : Qnil);
 346   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 347     for (i = 0; i < search_regs.num_regs; i++)
 348       if (search_regs.start[i] >= 0)
 349         {
 350           search_regs.start[i]
 351             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 352           search_regs.end[i]
 353             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 354         }
 355
 356   /* Set last_thing_searched only when match data is changed.  */
 357   if (NILP (Vinhibit_changing_match_data))
 358     XSETBUFFER (last_thing_searched, current_buffer);
 359
 360   return val;
 361 }
 362
 363 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 364        doc: /* Return t if text after point matches regular expression REGEXP.
 365 This function modifies the match data that `match-beginning',
 366 `match-end' and `match-data' access; save and restore the match
 367 data if you want to preserve them.  */)
 368      (regexp)
 369      Lisp_Object regexp;
 370 {
 371   return looking_at_1 (regexp, 0);
 372 }
 373
 374 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 375        doc: /* Return t if text after point matches regular expression REGEXP.
 376 Find the longest match, in accord with Posix regular expression rules.
 377 This function modifies the match data that `match-beginning',
 378 `match-end' and `match-data' access; save and restore the match
 379 data if you want to preserve them.  */)
 380      (regexp)
 381      Lisp_Object regexp;
 382 {
 383   return looking_at_1 (regexp, 1);
 384 }
 385 \f
 386 static Lisp_Object
 387 string_match_1 (regexp, string, start, posix)
 388      Lisp_Object regexp, string, start;
 389      int posix;
 390 {
 391   int val;
 392   struct re_pattern_buffer *bufp;
 393   int pos, pos_byte;
 394   int i;
 395
 396   if (running_asynch_code)
 397     save_search_regs ();
 398
 399   CHECK_STRING (regexp);
 400   CHECK_STRING (string);
 401
 402   if (NILP (start))
 403     pos = 0, pos_byte = 0;
 404   else
 405     {
 406       int len = SCHARS (string);
 407
 408       CHECK_NUMBER (start);
 409       pos = XINT (start);
 410       if (pos < 0 && -pos <= len)
 411         pos = len + pos;
 412       else if (0 > pos || pos > len)
 413         args_out_of_range (string, start);
 414       pos_byte = string_char_to_byte (string, pos);
 415     }
 416
 417   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 418   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 419     = current_buffer->case_eqv_table;
 420
 421   bufp = compile_pattern (regexp,
 422                           (NILP (Vinhibit_changing_match_data)
 423                            ? &search_regs : NULL),
 424                           (!NILP (current_buffer->case_fold_search)
 425                            ? current_buffer->case_canon_table : Qnil),
 426                           posix,
 427                           STRING_MULTIBYTE (string));
 428   immediate_quit = 1;
 429   re_match_object = string;
 430
 431   val = re_search (bufp, (char *) SDATA (string),
 432                    SBYTES (string), pos_byte,
 433                    SBYTES (string) - pos_byte,
 434                    (NILP (Vinhibit_changing_match_data)
 435                     ? &search_regs : NULL));
 436   immediate_quit = 0;
 437
 438   /* Set last_thing_searched only when match data is changed.  */
 439   if (NILP (Vinhibit_changing_match_data))
 440     last_thing_searched = Qt;
 441
 442   if (val == -2)
 443     matcher_overflow ();
 444   if (val < 0) return Qnil;
 445
 446   if (NILP (Vinhibit_changing_match_data))
 447     for (i = 0; i < search_regs.num_regs; i++)
 448       if (search_regs.start[i] >= 0)
 449         {
 450           search_regs.start[i]
 451             = string_byte_to_char (string, search_regs.start[i]);
 452           search_regs.end[i]
 453             = string_byte_to_char (string, search_regs.end[i]);
 454         }
 455
 456   return make_number (string_byte_to_char (string, val));
 457 }
 458
 459 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 460        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 461 Matching ignores case if `case-fold-search' is non-nil.
 462 If third arg START is non-nil, start search at that index in STRING.
 463 For index of first char beyond the match, do (match-end 0).
 464 `match-end' and `match-beginning' also give indices of substrings
 465 matched by parenthesis constructs in the pattern.
 466
 467 You can use the function `match-string' to extract the substrings
 468 matched by the parenthesis constructions in REGEXP. */)
 469      (regexp, string, start)
 470      Lisp_Object regexp, string, start;
 471 {
 472   return string_match_1 (regexp, string, start, 0);
 473 }
 474
 475 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 476        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 477 Find the longest match, in accord with Posix regular expression rules.
 478 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 479 If third arg START is non-nil, start search at that index in STRING.
 480 For index of first char beyond the match, do (match-end 0).
 481 `match-end' and `match-beginning' also give indices of substrings
 482 matched by parenthesis constructs in the pattern.  */)
 483      (regexp, string, start)
 484      Lisp_Object regexp, string, start;
 485 {
 486   return string_match_1 (regexp, string, start, 1);
 487 }
 488
 489 /* Match REGEXP against STRING, searching all of STRING,
 490    and return the index of the match, or negative on failure.
 491    This does not clobber the match data.  */
 492
 493 int
 494 fast_string_match (regexp, string)
 495      Lisp_Object regexp, string;
 496 {
 497   int val;
 498   struct re_pattern_buffer *bufp;
 499
 500   bufp = compile_pattern (regexp, 0, Qnil,
 501                           0, STRING_MULTIBYTE (string));
 502   immediate_quit = 1;
 503   re_match_object = string;
 504
 505   val = re_search (bufp, (char *) SDATA (string),
 506                    SBYTES (string), 0,
 507                    SBYTES (string), 0);
 508   immediate_quit = 0;
 509   return val;
 510 }
 511
 512 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 513    and return the index of the match, or negative on failure.
 514    This does not clobber the match data.
 515    We assume that STRING contains single-byte characters.  */
 516
 517 extern Lisp_Object Vascii_downcase_table;
 518
 519 int
 520 fast_c_string_match_ignore_case (regexp, string)
 521      Lisp_Object regexp;
 522      const char *string;
 523 {
 524   int val;
 525   struct re_pattern_buffer *bufp;
 526   int len = strlen (string);
 527
 528   regexp = string_make_unibyte (regexp);
 529   re_match_object = Qt;
 530   bufp = compile_pattern (regexp, 0,
 531                           Vascii_canon_table, 0,
 532                           0);
 533   immediate_quit = 1;
 534   val = re_search (bufp, string, len, 0, len, 0);
 535   immediate_quit = 0;
 536   return val;
 537 }
 538
 539 /* Like fast_string_match but ignore case.  */
 540
 541 int
 542 fast_string_match_ignore_case (regexp, string)
 543      Lisp_Object regexp, string;
 544 {
 545   int val;
 546   struct re_pattern_buffer *bufp;
 547
 548   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 549                           0, STRING_MULTIBYTE (string));
 550   immediate_quit = 1;
 551   re_match_object = string;
 552
 553   val = re_search (bufp, (char *) SDATA (string),
 554                    SBYTES (string), 0,
 555                    SBYTES (string), 0);
 556   immediate_quit = 0;
 557   return val;
 558 }
 559 \f
 560 /* The newline cache: remembering which sections of text have no newlines.  */
 561
 562 /* If the user has requested newline caching, make sure it's on.
 563    Otherwise, make sure it's off.
 564    This is our cheezy way of associating an action with the change of
 565    state of a buffer-local variable.  */
 566 static void
 567 newline_cache_on_off (buf)
 568      struct buffer *buf;
 569 {
 570   if (NILP (buf->cache_long_line_scans))
 571     {
 572       /* It should be off.  */
 573       if (buf->newline_cache)
 574         {
 575           free_region_cache (buf->newline_cache);
 576           buf->newline_cache = 0;
 577         }
 578     }
 579   else
 580     {
 581       /* It should be on.  */
 582       if (buf->newline_cache == 0)
 583         buf->newline_cache = new_region_cache ();
 584     }
 585 }
 586
 587 \f
 588 /* Search for COUNT instances of the character TARGET between START and END.
 589
 590    If COUNT is positive, search forwards; END must be >= START.
 591    If COUNT is negative, search backwards for the -COUNTth instance;
 592       END must be <= START.
 593    If COUNT is zero, do anything you please; run rogue, for all I care.
 594
 595    If END is zero, use BEGV or ZV instead, as appropriate for the
 596    direction indicated by COUNT.
 597
 598    If we find COUNT instances, set *SHORTAGE to zero, and return the
 599    position past the COUNTth match.  Note that for reverse motion
 600    this is not the same as the usual convention for Emacs motion commands.
 601
 602    If we don't find COUNT instances before reaching END, set *SHORTAGE
 603    to the number of TARGETs left unfound, and return END.
 604
 605    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 606    except when inside redisplay.  */
 607
 608 int
 609 scan_buffer (target, start, end, count, shortage, allow_quit)
 610      register int target;
 611      int start, end;
 612      int count;
 613      int *shortage;
 614      int allow_quit;
 615 {
 616   struct region_cache *newline_cache;
 617   int direction;
 618
 619   if (count > 0)
 620     {
 621       direction = 1;
 622       if (! end) end = ZV;
 623     }
 624   else
 625     {
 626       direction = -1;
 627       if (! end) end = BEGV;
 628     }
 629
 630   newline_cache_on_off (current_buffer);
 631   newline_cache = current_buffer->newline_cache;
 632
 633   if (shortage != 0)
 634     *shortage = 0;
 635
 636   immediate_quit = allow_quit;
 637
 638   if (count > 0)
 639     while (start != end)
 640       {
 641         /* Our innermost scanning loop is very simple; it doesn't know
 642            about gaps, buffer ends, or the newline cache.  ceiling is
 643            the position of the last character before the next such
 644            obstacle --- the last character the dumb search loop should
 645            examine.  */
 646         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 647         int start_byte = CHAR_TO_BYTE (start);
 648         int tem;
 649
 650         /* If we're looking for a newline, consult the newline cache
 651            to see where we can avoid some scanning.  */
 652         if (target == '\n' && newline_cache)
 653           {
 654             int next_change;
 655             immediate_quit = 0;
 656             while (region_cache_forward
 657                    (current_buffer, newline_cache, start_byte, &next_change))
 658               start_byte = next_change;
 659             immediate_quit = allow_quit;
 660
 661             /* START should never be after END.  */
 662             if (start_byte > ceiling_byte)
 663               start_byte = ceiling_byte;
 664
 665             /* Now the text after start is an unknown region, and
 666                next_change is the position of the next known region. */
 667             ceiling_byte = min (next_change - 1, ceiling_byte);
 668           }
 669
 670         /* The dumb loop can only scan text stored in contiguous
 671            bytes. BUFFER_CEILING_OF returns the last character
 672            position that is contiguous, so the ceiling is the
 673            position after that.  */
 674         tem = BUFFER_CEILING_OF (start_byte);
 675         ceiling_byte = min (tem, ceiling_byte);
 676
 677         {
 678           /* The termination address of the dumb loop.  */
 679           register unsigned char *ceiling_addr
 680             = BYTE_POS_ADDR (ceiling_byte) + 1;
 681           register unsigned char *cursor
 682             = BYTE_POS_ADDR (start_byte);
 683           unsigned char *base = cursor;
 684
 685           while (cursor < ceiling_addr)
 686             {
 687               unsigned char *scan_start = cursor;
 688
 689               /* The dumb loop.  */
 690               while (*cursor != target && ++cursor < ceiling_addr)
 691                 ;
 692
 693               /* If we're looking for newlines, cache the fact that
 694                  the region from start to cursor is free of them. */
 695               if (target == '\n' && newline_cache)
 696                 know_region_cache (current_buffer, newline_cache,
 697                                    start_byte + scan_start - base,
 698                                    start_byte + cursor - base);
 699
 700               /* Did we find the target character?  */
 701               if (cursor < ceiling_addr)
 702                 {
 703                   if (--count == 0)
 704                     {
 705                       immediate_quit = 0;
 706                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 707                     }
 708                   cursor++;
 709                 }
 710             }
 711
 712           start = BYTE_TO_CHAR (start_byte + cursor - base);
 713         }
 714       }
 715   else
 716     while (start > end)
 717       {
 718         /* The last character to check before the next obstacle.  */
 719         int ceiling_byte = CHAR_TO_BYTE (end);
 720         int start_byte = CHAR_TO_BYTE (start);
 721         int tem;
 722
 723         /* Consult the newline cache, if appropriate.  */
 724         if (target == '\n' && newline_cache)
 725           {
 726             int next_change;
 727             immediate_quit = 0;
 728             while (region_cache_backward
 729                    (current_buffer, newline_cache, start_byte, &next_change))
 730               start_byte = next_change;
 731             immediate_quit = allow_quit;
 732
 733             /* Start should never be at or before end.  */
 734             if (start_byte <= ceiling_byte)
 735               start_byte = ceiling_byte + 1;
 736
 737             /* Now the text before start is an unknown region, and
 738                next_change is the position of the next known region. */
 739             ceiling_byte = max (next_change, ceiling_byte);
 740           }
 741
 742         /* Stop scanning before the gap.  */
 743         tem = BUFFER_FLOOR_OF (start_byte - 1);
 744         ceiling_byte = max (tem, ceiling_byte);
 745
 746         {
 747           /* The termination address of the dumb loop.  */
 748           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 749           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 750           unsigned char *base = cursor;
 751
 752           while (cursor >= ceiling_addr)
 753             {
 754               unsigned char *scan_start = cursor;
 755
 756               while (*cursor != target && --cursor >= ceiling_addr)
 757                 ;
 758
 759               /* If we're looking for newlines, cache the fact that
 760                  the region from after the cursor to start is free of them.  */
 761               if (target == '\n' && newline_cache)
 762                 know_region_cache (current_buffer, newline_cache,
 763                                    start_byte + cursor - base,
 764                                    start_byte + scan_start - base);
 765
 766               /* Did we find the target character?  */
 767               if (cursor >= ceiling_addr)
 768                 {
 769                   if (++count >= 0)
 770                     {
 771                       immediate_quit = 0;
 772                       return BYTE_TO_CHAR (start_byte + cursor - base);
 773                     }
 774                   cursor--;
 775                 }
 776             }
 777
 778           start = BYTE_TO_CHAR (start_byte + cursor - base);
 779         }
 780       }
 781
 782   immediate_quit = 0;
 783   if (shortage != 0)
 784     *shortage = count * direction;
 785   return start;
 786 }
 787 \f
 788 /* Search for COUNT instances of a line boundary, which means either a
 789    newline or (if selective display enabled) a carriage return.
 790    Start at START.  If COUNT is negative, search backwards.
 791
 792    We report the resulting position by calling TEMP_SET_PT_BOTH.
 793
 794    If we find COUNT instances. we position after (always after,
 795    even if scanning backwards) the COUNTth match, and return 0.
 796
 797    If we don't find COUNT instances before reaching the end of the
 798    buffer (or the beginning, if scanning backwards), we return
 799    the number of line boundaries left unfound, and position at
 800    the limit we bumped up against.
 801
 802    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 803    except in special cases.  */
 804
 805 int
 806 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 807      int start, start_byte;
 808      int limit, limit_byte;
 809      register int count;
 810      int allow_quit;
 811 {
 812   int direction = ((count > 0) ? 1 : -1);
 813
 814   register unsigned char *cursor;
 815   unsigned char *base;
 816
 817   register int ceiling;
 818   register unsigned char *ceiling_addr;
 819
 820   int old_immediate_quit = immediate_quit;
 821
 822   /* The code that follows is like scan_buffer
 823      but checks for either newline or carriage return.  */
 824
 825   if (allow_quit)
 826     immediate_quit++;
 827
 828   start_byte = CHAR_TO_BYTE (start);
 829
 830   if (count > 0)
 831     {
 832       while (start_byte < limit_byte)
 833         {
 834           ceiling =  BUFFER_CEILING_OF (start_byte);
 835           ceiling = min (limit_byte - 1, ceiling);
 836           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 837           base = (cursor = BYTE_POS_ADDR (start_byte));
 838           while (1)
 839             {
 840               while (*cursor != '\n' && ++cursor != ceiling_addr)
 841                 ;
 842
 843               if (cursor != ceiling_addr)
 844                 {
 845                   if (--count == 0)
 846                     {
 847                       immediate_quit = old_immediate_quit;
 848                       start_byte = start_byte + cursor - base + 1;
 849                       start = BYTE_TO_CHAR (start_byte);
 850                       TEMP_SET_PT_BOTH (start, start_byte);
 851                       return 0;
 852                     }
 853                   else
 854                     if (++cursor == ceiling_addr)
 855                       break;
 856                 }
 857               else
 858                 break;
 859             }
 860           start_byte += cursor - base;
 861         }
 862     }
 863   else
 864     {
 865       while (start_byte > limit_byte)
 866         {
 867           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 868           ceiling = max (limit_byte, ceiling);
 869           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 870           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 871           while (1)
 872             {
 873               while (--cursor != ceiling_addr && *cursor != '\n')
 874                 ;
 875
 876               if (cursor != ceiling_addr)
 877                 {
 878                   if (++count == 0)
 879                     {
 880                       immediate_quit = old_immediate_quit;
 881                       /* Return the position AFTER the match we found.  */
 882                       start_byte = start_byte + cursor - base + 1;
 883                       start = BYTE_TO_CHAR (start_byte);
 884                       TEMP_SET_PT_BOTH (start, start_byte);
 885                       return 0;
 886                     }
 887                 }
 888               else
 889                 break;
 890             }
 891           /* Here we add 1 to compensate for the last decrement
 892              of CURSOR, which took it past the valid range.  */
 893           start_byte += cursor - base + 1;
 894         }
 895     }
 896
 897   TEMP_SET_PT_BOTH (limit, limit_byte);
 898   immediate_quit = old_immediate_quit;
 899
 900   return count * direction;
 901 }
 902
 903 int
 904 find_next_newline_no_quit (from, cnt)
 905      register int from, cnt;
 906 {
 907   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 908 }
 909
 910 /* Like find_next_newline, but returns position before the newline,
 911    not after, and only search up to TO.  This isn't just
 912    find_next_newline (...)-1, because you might hit TO.  */
 913
 914 int
 915 find_before_next_newline (from, to, cnt)
 916      int from, to, cnt;
 917 {
 918   int shortage;
 919   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 920
 921   if (shortage == 0)
 922     pos--;
 923
 924   return pos;
 925 }
 926 \f
 927 /* Subroutines of Lisp buffer search functions. */
 928
 929 static Lisp_Object
 930 search_command (string, bound, noerror, count, direction, RE, posix)
 931      Lisp_Object string, bound, noerror, count;
 932      int direction;
 933      int RE;
 934      int posix;
 935 {
 936   register int np;
 937   int lim, lim_byte;
 938   int n = direction;
 939
 940   if (!NILP (count))
 941     {
 942       CHECK_NUMBER (count);
 943       n *= XINT (count);
 944     }
 945
 946   CHECK_STRING (string);
 947   if (NILP (bound))
 948     {
 949       if (n > 0)
 950         lim = ZV, lim_byte = ZV_BYTE;
 951       else
 952         lim = BEGV, lim_byte = BEGV_BYTE;
 953     }
 954   else
 955     {
 956       CHECK_NUMBER_COERCE_MARKER (bound);
 957       lim = XINT (bound);
 958       if (n > 0 ? lim < PT : lim > PT)
 959         error ("Invalid search bound (wrong side of point)");
 960       if (lim > ZV)
 961         lim = ZV, lim_byte = ZV_BYTE;
 962       else if (lim < BEGV)
 963         lim = BEGV, lim_byte = BEGV_BYTE;
 964       else
 965         lim_byte = CHAR_TO_BYTE (lim);
 966     }
 967
 968   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 969   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 970     = current_buffer->case_eqv_table;
 971
 972   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 973                       (!NILP (current_buffer->case_fold_search)
 974                        ? current_buffer->case_canon_table
 975                        : Qnil),
 976                       (!NILP (current_buffer->case_fold_search)
 977                        ? current_buffer->case_eqv_table
 978                        : Qnil),
 979                       posix);
 980   if (np <= 0)
 981     {
 982       if (NILP (noerror))
 983         xsignal1 (Qsearch_failed, string);
 984
 985       if (!EQ (noerror, Qt))
 986         {
 987           if (lim < BEGV || lim > ZV)
 988             abort ();
 989           SET_PT_BOTH (lim, lim_byte);
 990           return Qnil;
 991 #if 0 /* This would be clean, but maybe programs depend on
 992          a value of nil here.  */
 993           np = lim;
 994 #endif
 995         }
 996       else
 997         return Qnil;
 998     }
 999
1000   if (np < BEGV || np > ZV)
1001     abort ();
1002
1003   SET_PT (np);
1004
1005   return make_number (np);
1006 }
1007 \f
1008 /* Return 1 if REGEXP it matches just one constant string.  */
1009
1010 static int
1011 trivial_regexp_p (regexp)
1012      Lisp_Object regexp;
1013 {
1014   int len = SBYTES (regexp);
1015   unsigned char *s = SDATA (regexp);
1016   while (--len >= 0)
1017     {
1018       switch (*s++)
1019         {
1020         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1021           return 0;
1022         case '\\':
1023           if (--len < 0)
1024             return 0;
1025           switch (*s++)
1026             {
1027             case '|': case '(': case ')': case '`': case '\'': case 'b':
1028             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1029             case 'S': case '=': case '{': case '}': case '_':
1030             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1031             case '1': case '2': case '3': case '4': case '5':
1032             case '6': case '7': case '8': case '9':
1033               return 0;
1034             }
1035         }
1036     }
1037   return 1;
1038 }
1039
1040 /* Search for the n'th occurrence of STRING in the current buffer,
1041    starting at position POS and stopping at position LIM,
1042    treating STRING as a literal string if RE is false or as
1043    a regular expression if RE is true.
1044
1045    If N is positive, searching is forward and LIM must be greater than POS.
1046    If N is negative, searching is backward and LIM must be less than POS.
1047
1048    Returns -x if x occurrences remain to be found (x > 0),
1049    or else the position at the beginning of the Nth occurrence
1050    (if searching backward) or the end (if searching forward).
1051
1052    POSIX is nonzero if we want full backtracking (POSIX style)
1053    for this pattern.  0 means backtrack only enough to get a valid match.  */
1054
1055 #define TRANSLATE(out, trt, d)                  \
1056 do                                              \
1057   {                                             \
1058     if (! NILP (trt))                           \
1059       {                                         \
1060         Lisp_Object temp;                       \
1061         temp = Faref (trt, make_number (d));    \
1062         if (INTEGERP (temp))                    \
1063           out = XINT (temp);                    \
1064         else                                    \
1065           out = d;                              \
1066       }                                         \
1067     else                                        \
1068       out = d;                                  \
1069   }                                             \
1070 while (0)
1071
1072 /* Only used in search_buffer, to record the end position of the match
1073    when searching regexps and SEARCH_REGS should not be changed
1074    (i.e. Vinhibit_changing_match_data is non-nil).  */
1075 static struct re_registers search_regs_1;
1076
1077 static int
1078 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1079                RE, trt, inverse_trt, posix)
1080      Lisp_Object string;
1081      int pos;
1082      int pos_byte;
1083      int lim;
1084      int lim_byte;
1085      int n;
1086      int RE;
1087      Lisp_Object trt;
1088      Lisp_Object inverse_trt;
1089      int posix;
1090 {
1091   int len = SCHARS (string);
1092   int len_byte = SBYTES (string);
1093   register int i;
1094
1095   if (running_asynch_code)
1096     save_search_regs ();
1097
1098   /* Searching 0 times means don't move.  */
1099   /* Null string is found at starting position.  */
1100   if (len == 0 || n == 0)
1101     {
1102       set_search_regs (pos_byte, 0);
1103       return pos;
1104     }
1105
1106   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1107     {
1108       unsigned char *p1, *p2;
1109       int s1, s2;
1110       struct re_pattern_buffer *bufp;
1111
1112       bufp = compile_pattern (string,
1113                               (NILP (Vinhibit_changing_match_data)
1114                                ? &search_regs : &search_regs_1),
1115                               trt, posix,
1116                               !NILP (current_buffer->enable_multibyte_characters));
1117
1118       immediate_quit = 1;       /* Quit immediately if user types ^G,
1119                                    because letting this function finish
1120                                    can take too long. */
1121       QUIT;                     /* Do a pending quit right away,
1122                                    to avoid paradoxical behavior */
1123       /* Get pointers and sizes of the two strings
1124          that make up the visible portion of the buffer. */
1125
1126       p1 = BEGV_ADDR;
1127       s1 = GPT_BYTE - BEGV_BYTE;
1128       p2 = GAP_END_ADDR;
1129       s2 = ZV_BYTE - GPT_BYTE;
1130       if (s1 < 0)
1131         {
1132           p2 = p1;
1133           s2 = ZV_BYTE - BEGV_BYTE;
1134           s1 = 0;
1135         }
1136       if (s2 < 0)
1137         {
1138           s1 = ZV_BYTE - BEGV_BYTE;
1139           s2 = 0;
1140         }
1141       re_match_object = Qnil;
1142
1143       while (n < 0)
1144         {
1145           int val;
1146           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1147                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1148                              (NILP (Vinhibit_changing_match_data)
1149                               ? &search_regs : &search_regs_1),
1150                              /* Don't allow match past current point */
1151                              pos_byte - BEGV_BYTE);
1152           if (val == -2)
1153             {
1154               matcher_overflow ();
1155             }
1156           if (val >= 0)
1157             {
1158               if (NILP (Vinhibit_changing_match_data))
1159                 {
1160                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1161                   for (i = 0; i < search_regs.num_regs; i++)
1162                     if (search_regs.start[i] >= 0)
1163                       {
1164                         search_regs.start[i]
1165                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1166                         search_regs.end[i]
1167                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1168                       }
1169                   XSETBUFFER (last_thing_searched, current_buffer);
1170                   /* Set pos to the new position. */
1171                   pos = search_regs.start[0];
1172                 }
1173               else
1174                 {
1175                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1176                   /* Set pos to the new position.  */
1177                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1178                 }
1179             }
1180           else
1181             {
1182               immediate_quit = 0;
1183               return (n);
1184             }
1185           n++;
1186         }
1187       while (n > 0)
1188         {
1189           int val;
1190           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1191                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1192                              (NILP (Vinhibit_changing_match_data)
1193                               ? &search_regs : &search_regs_1),
1194                              lim_byte - BEGV_BYTE);
1195           if (val == -2)
1196             {
1197               matcher_overflow ();
1198             }
1199           if (val >= 0)
1200             {
1201               if (NILP (Vinhibit_changing_match_data))
1202                 {
1203                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1204                   for (i = 0; i < search_regs.num_regs; i++)
1205                     if (search_regs.start[i] >= 0)
1206                       {
1207                         search_regs.start[i]
1208                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1209                         search_regs.end[i]
1210                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1211                       }
1212                   XSETBUFFER (last_thing_searched, current_buffer);
1213                   pos = search_regs.end[0];
1214                 }
1215               else
1216                 {
1217                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1218                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1219                 }
1220             }
1221           else
1222             {
1223               immediate_quit = 0;
1224               return (0 - n);
1225             }
1226           n--;
1227         }
1228       immediate_quit = 0;
1229       return (pos);
1230     }
1231   else                          /* non-RE case */
1232     {
1233       unsigned char *raw_pattern, *pat;
1234       int raw_pattern_size;
1235       int raw_pattern_size_byte;
1236       unsigned char *patbuf;
1237       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1238       unsigned char *base_pat;
1239       /* Set to positive if we find a non-ASCII char that need
1240          translation.  Otherwise set to zero later.  */
1241       int char_base = -1;
1242       int boyer_moore_ok = 1;
1243
1244       /* MULTIBYTE says whether the text to be searched is multibyte.
1245          We must convert PATTERN to match that, or we will not really
1246          find things right.  */
1247
1248       if (multibyte == STRING_MULTIBYTE (string))
1249         {
1250           raw_pattern = (unsigned char *) SDATA (string);
1251           raw_pattern_size = SCHARS (string);
1252           raw_pattern_size_byte = SBYTES (string);
1253         }
1254       else if (multibyte)
1255         {
1256           raw_pattern_size = SCHARS (string);
1257           raw_pattern_size_byte
1258             = count_size_as_multibyte (SDATA (string),
1259                                        raw_pattern_size);
1260           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1261           copy_text (SDATA (string), raw_pattern,
1262                      SCHARS (string), 0, 1);
1263         }
1264       else
1265         {
1266           /* Converting multibyte to single-byte.
1267
1268              ??? Perhaps this conversion should be done in a special way
1269              by subtracting nonascii-insert-offset from each non-ASCII char,
1270              so that only the multibyte chars which really correspond to
1271              the chosen single-byte character set can possibly match.  */
1272           raw_pattern_size = SCHARS (string);
1273           raw_pattern_size_byte = SCHARS (string);
1274           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1275           copy_text (SDATA (string), raw_pattern,
1276                      SBYTES (string), 1, 0);
1277         }
1278
1279       /* Copy and optionally translate the pattern.  */
1280       len = raw_pattern_size;
1281       len_byte = raw_pattern_size_byte;
1282       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1283       pat = patbuf;
1284       base_pat = raw_pattern;
1285       if (multibyte)
1286         {
1287           /* Fill patbuf by translated characters in STRING while
1288              checking if we can use boyer-moore search.  If TRT is
1289              non-nil, we can use boyer-moore search only if TRT can be
1290              represented by the byte array of 256 elements.  For that,
1291              all non-ASCII case-equivalents of all case-senstive
1292              characters in STRING must belong to the same charset and
1293              row.  */
1294
1295           while (--len >= 0)
1296             {
1297               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1298               int c, translated, inverse;
1299               int in_charlen, charlen;
1300
1301               /* If we got here and the RE flag is set, it's because we're
1302                  dealing with a regexp known to be trivial, so the backslash
1303                  just quotes the next character.  */
1304               if (RE && *base_pat == '\\')
1305                 {
1306                   len--;
1307                   raw_pattern_size--;
1308                   len_byte--;
1309                   base_pat++;
1310                 }
1311
1312               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1313
1314               if (NILP (trt))
1315                 {
1316                   str = base_pat;
1317                   charlen = in_charlen;
1318                 }
1319               else
1320                 {
1321                   /* Translate the character.  */
1322                   TRANSLATE (translated, trt, c);
1323                   charlen = CHAR_STRING (translated, str_base);
1324                   str = str_base;
1325
1326                   /* Check if C has any other case-equivalents.  */
1327                   TRANSLATE (inverse, inverse_trt, c);
1328                   /* If so, check if we can use boyer-moore.  */
1329                   if (c != inverse && boyer_moore_ok)
1330                     {
1331                       /* Check if all equivalents belong to the same
1332                          group of characters.  Note that the check of C
1333                          itself is done by the last iteration.  */
1334                       int this_char_base = -1;
1335
1336                       while (boyer_moore_ok)
1337                         {
1338                           if (ASCII_BYTE_P (inverse))
1339                             {
1340                               if (this_char_base > 0)
1341                                 boyer_moore_ok = 0;
1342                               else
1343                                 {
1344                                   this_char_base = 0;
1345                                   if (char_base < 0)
1346                                     char_base = this_char_base;
1347                                 }
1348                             }
1349                           else if (CHAR_BYTE8_P (inverse))
1350                             /* Boyer-moore search can't handle a
1351                                translation of an eight-bit
1352                                character.  */
1353                             boyer_moore_ok = 0;
1354                           else if (this_char_base < 0)
1355                             {
1356                               this_char_base = inverse & ~0x3F;
1357                               if (char_base < 0)
1358                                 char_base = this_char_base;
1359                               else if (char_base > 0
1360                                        && this_char_base != char_base)
1361                                 boyer_moore_ok = 0;
1362                             }
1363                           else if ((inverse & ~0x3F) != this_char_base)
1364                             boyer_moore_ok = 0;
1365                           if (c == inverse)
1366                             break;
1367                           TRANSLATE (inverse, inverse_trt, inverse);
1368                         }
1369                     }
1370                 }
1371               if (char_base < 0)
1372                 char_base = 0;
1373
1374               /* Store this character into the translated pattern.  */
1375               bcopy (str, pat, charlen);
1376               pat += charlen;
1377               base_pat += in_charlen;
1378               len_byte -= in_charlen;
1379             }
1380         }
1381       else
1382         {
1383           /* Unibyte buffer.  */
1384           char_base = 0;
1385           while (--len >= 0)
1386             {
1387               int c, translated;
1388
1389               /* If we got here and the RE flag is set, it's because we're
1390                  dealing with a regexp known to be trivial, so the backslash
1391                  just quotes the next character.  */
1392               if (RE && *base_pat == '\\')
1393                 {
1394                   len--;
1395                   raw_pattern_size--;
1396                   base_pat++;
1397                 }
1398               c = *base_pat++;
1399               TRANSLATE (translated, trt, c);
1400               *pat++ = translated;
1401             }
1402         }
1403
1404       len_byte = pat - patbuf;
1405       len = raw_pattern_size;
1406       pat = base_pat = patbuf;
1407
1408       if (boyer_moore_ok)
1409         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1410                             pos, pos_byte, lim, lim_byte,
1411                             char_base);
1412       else
1413         return simple_search (n, pat, len, len_byte, trt,
1414                               pos, pos_byte, lim, lim_byte);
1415     }
1416 }
1417 \f
1418 /* Do a simple string search N times for the string PAT,
1419    whose length is LEN/LEN_BYTE,
1420    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1421    TRT is the translation table.
1422
1423    Return the character position where the match is found.
1424    Otherwise, if M matches remained to be found, return -M.
1425
1426    This kind of search works regardless of what is in PAT and
1427    regardless of what is in TRT.  It is used in cases where
1428    boyer_moore cannot work.  */
1429
1430 static int
1431 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1432      int n;
1433      unsigned char *pat;
1434      int len, len_byte;
1435      Lisp_Object trt;
1436      int pos, pos_byte;
1437      int lim, lim_byte;
1438 {
1439   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1440   int forward = n > 0;
1441   /* Number of buffer bytes matched.  Note that this may be different
1442      from len_byte in a multibyte buffer.  */
1443   int match_byte;
1444
1445   if (lim > pos && multibyte)
1446     while (n > 0)
1447       {
1448         while (1)
1449           {
1450             /* Try matching at position POS.  */
1451             int this_pos = pos;
1452             int this_pos_byte = pos_byte;
1453             int this_len = len;
1454             int this_len_byte = len_byte;
1455             unsigned char *p = pat;
1456             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1457               goto stop;
1458
1459             while (this_len > 0)
1460               {
1461                 int charlen, buf_charlen;
1462                 int pat_ch, buf_ch;
1463
1464                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1465                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1466                                                  ZV_BYTE - this_pos_byte,
1467                                                  buf_charlen);
1468                 TRANSLATE (buf_ch, trt, buf_ch);
1469
1470                 if (buf_ch != pat_ch)
1471                   break;
1472
1473                 this_len_byte -= charlen;
1474                 this_len--;
1475                 p += charlen;
1476
1477                 this_pos_byte += buf_charlen;
1478                 this_pos++;
1479               }
1480
1481             if (this_len == 0)
1482               {
1483                 match_byte = this_pos_byte - pos_byte;
1484                 pos += len;
1485                 pos_byte += match_byte;
1486                 break;
1487               }
1488
1489             INC_BOTH (pos, pos_byte);
1490           }
1491
1492         n--;
1493       }
1494   else if (lim > pos)
1495     while (n > 0)
1496       {
1497         while (1)
1498           {
1499             /* Try matching at position POS.  */
1500             int this_pos = pos;
1501             int this_len = len;
1502             unsigned char *p = pat;
1503
1504             if (pos + len > lim)
1505               goto stop;
1506
1507             while (this_len > 0)
1508               {
1509                 int pat_ch = *p++;
1510                 int buf_ch = FETCH_BYTE (this_pos);
1511                 TRANSLATE (buf_ch, trt, buf_ch);
1512
1513                 if (buf_ch != pat_ch)
1514                   break;
1515
1516                 this_len--;
1517                 this_pos++;
1518               }
1519
1520             if (this_len == 0)
1521               {
1522                 match_byte = len;
1523                 pos += len;
1524                 break;
1525               }
1526
1527             pos++;
1528           }
1529
1530         n--;
1531       }
1532   /* Backwards search.  */
1533   else if (lim < pos && multibyte)
1534     while (n < 0)
1535       {
1536         while (1)
1537           {
1538             /* Try matching at position POS.  */
1539             int this_pos = pos - len;
1540             int this_pos_byte;
1541             int this_len = len;
1542             int this_len_byte = len_byte;
1543             unsigned char *p = pat;
1544
1545             if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1546               goto stop;
1547             this_pos_byte = CHAR_TO_BYTE (this_pos);
1548             match_byte = pos_byte - this_pos_byte;
1549
1550             while (this_len > 0)
1551               {
1552                 int charlen, buf_charlen;
1553                 int pat_ch, buf_ch;
1554
1555                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1556                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1557                                                  ZV_BYTE - this_pos_byte,
1558                                                  buf_charlen);
1559                 TRANSLATE (buf_ch, trt, buf_ch);
1560
1561                 if (buf_ch != pat_ch)
1562                   break;
1563
1564                 this_len_byte -= charlen;
1565                 this_len--;
1566                 p += charlen;
1567                 this_pos_byte += buf_charlen;
1568                 this_pos++;
1569               }
1570
1571             if (this_len == 0)
1572               {
1573                 pos -= len;
1574                 pos_byte -= match_byte;
1575                 break;
1576               }
1577
1578             DEC_BOTH (pos, pos_byte);
1579           }
1580
1581         n++;
1582       }
1583   else if (lim < pos)
1584     while (n < 0)
1585       {
1586         while (1)
1587           {
1588             /* Try matching at position POS.  */
1589             int this_pos = pos - len;
1590             int this_len = len;
1591             unsigned char *p = pat;
1592
1593             if (this_pos < lim)
1594               goto stop;
1595
1596             while (this_len > 0)
1597               {
1598                 int pat_ch = *p++;
1599                 int buf_ch = FETCH_BYTE (this_pos);
1600                 TRANSLATE (buf_ch, trt, buf_ch);
1601
1602                 if (buf_ch != pat_ch)
1603                   break;
1604                 this_len--;
1605                 this_pos++;
1606               }
1607
1608             if (this_len == 0)
1609               {
1610                 match_byte = len;
1611                 pos -= len;
1612                 break;
1613               }
1614
1615             pos--;
1616           }
1617
1618         n++;
1619       }
1620
1621  stop:
1622   if (n == 0)
1623     {
1624       if (forward)
1625         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1626       else
1627         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1628
1629       return pos;
1630     }
1631   else if (n > 0)
1632     return -n;
1633   else
1634     return n;
1635 }
1636 \f
1637 /* Do Boyer-Moore search N times for the string BASE_PAT,
1638    whose length is LEN/LEN_BYTE,
1639    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1640    DIRECTION says which direction we search in.
1641    TRT and INVERSE_TRT are translation tables.
1642    Characters in PAT are already translated by TRT.
1643
1644    This kind of search works if all the characters in BASE_PAT that
1645    have nontrivial translation are the same aside from the last byte.
1646    This makes it possible to translate just the last byte of a
1647    character, and do so after just a simple test of the context.
1648    CHAR_BASE is nonzero if there is such a non-ASCII character.
1649
1650    If that criterion is not satisfied, do not call this function.  */
1651
1652 static int
1653 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1654              pos, pos_byte, lim, lim_byte, char_base)
1655      int n;
1656      unsigned char *base_pat;
1657      int len, len_byte;
1658      Lisp_Object trt;
1659      Lisp_Object inverse_trt;
1660      int pos, pos_byte;
1661      int lim, lim_byte;
1662      int char_base;
1663 {
1664   int direction = ((n > 0) ? 1 : -1);
1665   register int dirlen;
1666   int infinity, limit, stride_for_teases = 0;
1667   register int *BM_tab;
1668   int *BM_tab_base;
1669   register unsigned char *cursor, *p_limit;
1670   register int i, j;
1671   unsigned char *pat, *pat_end;
1672   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1673
1674   unsigned char simple_translate[0400];
1675   /* These are set to the preceding bytes of a byte to be translated
1676      if char_base is nonzero.  As the maximum byte length of a
1677      multibyte character is 5, we have to check at most four previous
1678      bytes.  */
1679   int translate_prev_byte1 = 0;
1680   int translate_prev_byte2 = 0;
1681   int translate_prev_byte3 = 0;
1682   int translate_prev_byte4 = 0;
1683
1684   BM_tab = (int *) alloca (0400 * sizeof (int));
1685
1686   /* The general approach is that we are going to maintain that we know */
1687   /* the first (closest to the present position, in whatever direction */
1688   /* we're searching) character that could possibly be the last */
1689   /* (furthest from present position) character of a valid match.  We */
1690   /* advance the state of our knowledge by looking at that character */
1691   /* and seeing whether it indeed matches the last character of the */
1692   /* pattern.  If it does, we take a closer look.  If it does not, we */
1693   /* move our pointer (to putative last characters) as far as is */
1694   /* logically possible.  This amount of movement, which I call a */
1695   /* stride, will be the length of the pattern if the actual character */
1696   /* appears nowhere in the pattern, otherwise it will be the distance */
1697   /* from the last occurrence of that character to the end of the */
1698   /* pattern. */
1699   /* As a coding trick, an enormous stride is coded into the table for */
1700   /* characters that match the last character.  This allows use of only */
1701   /* a single test, a test for having gone past the end of the */
1702   /* permissible match region, to test for both possible matches (when */
1703   /* the stride goes past the end immediately) and failure to */
1704   /* match (where you get nudged past the end one stride at a time). */
1705
1706   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1707   /* is determined only by the last character of the putative match. */
1708   /* If that character does not match, we will stride the proper */
1709   /* distance to propose a match that superimposes it on the last */
1710   /* instance of a character that matches it (per trt), or misses */
1711   /* it entirely if there is none. */
1712
1713   dirlen = len_byte * direction;
1714   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1715
1716   /* Record position after the end of the pattern.  */
1717   pat_end = base_pat + len_byte;
1718   /* BASE_PAT points to a character that we start scanning from.
1719      It is the first character in a forward search,
1720      the last character in a backward search.  */
1721   if (direction < 0)
1722     base_pat = pat_end - 1;
1723
1724   BM_tab_base = BM_tab;
1725   BM_tab += 0400;
1726   j = dirlen;           /* to get it in a register */
1727   /* A character that does not appear in the pattern induces a */
1728   /* stride equal to the pattern length. */
1729   while (BM_tab_base != BM_tab)
1730     {
1731       *--BM_tab = j;
1732       *--BM_tab = j;
1733       *--BM_tab = j;
1734       *--BM_tab = j;
1735     }
1736
1737   /* We use this for translation, instead of TRT itself.
1738      We fill this in to handle the characters that actually
1739      occur in the pattern.  Others don't matter anyway!  */
1740   bzero (simple_translate, sizeof simple_translate);
1741   for (i = 0; i < 0400; i++)
1742     simple_translate[i] = i;
1743
1744   if (char_base)
1745     {
1746       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1747          byte following them are the target of translation.  */
1748       unsigned char str[MAX_MULTIBYTE_LENGTH];
1749       int len = CHAR_STRING (char_base, str);
1750
1751       translate_prev_byte1 = str[len - 2];
1752       if (len > 2)
1753         {
1754           translate_prev_byte2 = str[len - 3];
1755           if (len > 3)
1756             {
1757               translate_prev_byte3 = str[len - 4];
1758               if (len > 4)
1759                 translate_prev_byte4 = str[len - 5];
1760             }
1761         }
1762     }
1763
1764   i = 0;
1765   while (i != infinity)
1766     {
1767       unsigned char *ptr = base_pat + i;
1768       i += direction;
1769       if (i == dirlen)
1770         i = infinity;
1771       if (! NILP (trt))
1772         {
1773           /* If the byte currently looking at is the last of a
1774              character to check case-equivalents, set CH to that
1775              character.  An ASCII character and a non-ASCII character
1776              matching with CHAR_BASE are to be checked.  */
1777           int ch = -1;
1778
1779           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1780             ch = *ptr;
1781           else if (char_base
1782                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1783             {
1784               unsigned char *charstart = ptr - 1;
1785
1786               while (! (CHAR_HEAD_P (*charstart)))
1787                 charstart--;
1788               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1789               if (char_base != (ch & ~0x3F))
1790                 ch = -1;
1791             }
1792
1793           if (ch >= 0200)
1794             j = (ch & 0x3F) | 0200;
1795           else
1796             j = *ptr;
1797
1798           if (i == infinity)
1799             stride_for_teases = BM_tab[j];
1800
1801           BM_tab[j] = dirlen - i;
1802           /* A translation table is accompanied by its inverse -- see */
1803           /* comment following downcase_table for details */
1804           if (ch >= 0)
1805             {
1806               int starting_ch = ch;
1807               int starting_j = j;
1808
1809               while (1)
1810                 {
1811                   TRANSLATE (ch, inverse_trt, ch);
1812                   if (ch >= 0200)
1813                     j = (ch & 0x3F) | 0200;
1814                   else
1815                     j = ch;
1816
1817                   /* For all the characters that map into CH,
1818                      set up simple_translate to map the last byte
1819                      into STARTING_J.  */
1820                   simple_translate[j] = starting_j;
1821                   if (ch == starting_ch)
1822                     break;
1823                   BM_tab[j] = dirlen - i;
1824                 }
1825             }
1826         }
1827       else
1828         {
1829           j = *ptr;
1830
1831           if (i == infinity)
1832             stride_for_teases = BM_tab[j];
1833           BM_tab[j] = dirlen - i;
1834         }
1835       /* stride_for_teases tells how much to stride if we get a */
1836       /* match on the far character but are subsequently */
1837       /* disappointed, by recording what the stride would have been */
1838       /* for that character if the last character had been */
1839       /* different. */
1840     }
1841   infinity = dirlen - infinity;
1842   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1843   /* loop invariant - POS_BYTE points at where last char (first
1844      char if reverse) of pattern would align in a possible match.  */
1845   while (n != 0)
1846     {
1847       int tail_end;
1848       unsigned char *tail_end_ptr;
1849
1850       /* It's been reported that some (broken) compiler thinks that
1851          Boolean expressions in an arithmetic context are unsigned.
1852          Using an explicit ?1:0 prevents this.  */
1853       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1854           < 0)
1855         return (n * (0 - direction));
1856       /* First we do the part we can by pointers (maybe nothing) */
1857       QUIT;
1858       pat = base_pat;
1859       limit = pos_byte - dirlen + direction;
1860       if (direction > 0)
1861         {
1862           limit = BUFFER_CEILING_OF (limit);
1863           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1864              can take on without hitting edge of buffer or the gap.  */
1865           limit = min (limit, pos_byte + 20000);
1866           limit = min (limit, lim_byte - 1);
1867         }
1868       else
1869         {
1870           limit = BUFFER_FLOOR_OF (limit);
1871           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1872              can take on without hitting edge of buffer or the gap.  */
1873           limit = max (limit, pos_byte - 20000);
1874           limit = max (limit, lim_byte);
1875         }
1876       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1877       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1878
1879       if ((limit - pos_byte) * direction > 20)
1880         {
1881           unsigned char *p2;
1882
1883           p_limit = BYTE_POS_ADDR (limit);
1884           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1885           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1886           while (1)             /* use one cursor setting as long as i can */
1887             {
1888               if (direction > 0) /* worth duplicating */
1889                 {
1890                   /* Use signed comparison if appropriate
1891                      to make cursor+infinity sure to be > p_limit.
1892                      Assuming that the buffer lies in a range of addresses
1893                      that are all "positive" (as ints) or all "negative",
1894                      either kind of comparison will work as long
1895                      as we don't step by infinity.  So pick the kind
1896                      that works when we do step by infinity.  */
1897                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1898                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1899                       cursor += BM_tab[*cursor];
1900                   else
1901                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1902                       cursor += BM_tab[*cursor];
1903                 }
1904               else
1905                 {
1906                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1907                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1908                       cursor += BM_tab[*cursor];
1909                   else
1910                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1911                       cursor += BM_tab[*cursor];
1912                 }
1913 /* If you are here, cursor is beyond the end of the searched region. */
1914 /* This can happen if you match on the far character of the pattern, */
1915 /* because the "stride" of that character is infinity, a number able */
1916 /* to throw you well beyond the end of the search.  It can also */
1917 /* happen if you fail to match within the permitted region and would */
1918 /* otherwise try a character beyond that region */
1919               if ((cursor - p_limit) * direction <= len_byte)
1920                 break;  /* a small overrun is genuine */
1921               cursor -= infinity; /* large overrun = hit */
1922               i = dirlen - direction;
1923               if (! NILP (trt))
1924                 {
1925                   while ((i -= direction) + direction != 0)
1926                     {
1927                       int ch;
1928                       cursor -= direction;
1929                       /* Translate only the last byte of a character.  */
1930                       if (! multibyte
1931                           || ((cursor == tail_end_ptr
1932                                || CHAR_HEAD_P (cursor[1]))
1933                               && (CHAR_HEAD_P (cursor[0])
1934                                   /* Check if this is the last byte of
1935                                      a translable character.  */
1936                                   || (translate_prev_byte1 == cursor[-1]
1937                                       && (CHAR_HEAD_P (translate_prev_byte1)
1938                                           || (translate_prev_byte2 == cursor[-2]
1939                                               && (CHAR_HEAD_P (translate_prev_byte2)
1940                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1941                         ch = simple_translate[*cursor];
1942                       else
1943                         ch = *cursor;
1944                       if (pat[i] != ch)
1945                         break;
1946                     }
1947                 }
1948               else
1949                 {
1950                   while ((i -= direction) + direction != 0)
1951                     {
1952                       cursor -= direction;
1953                       if (pat[i] != *cursor)
1954                         break;
1955                     }
1956                 }
1957               cursor += dirlen - i - direction; /* fix cursor */
1958               if (i + direction == 0)
1959                 {
1960                   int position, start, end;
1961
1962                   cursor -= direction;
1963
1964                   position = pos_byte + cursor - p2 + ((direction > 0)
1965                                                        ? 1 - len_byte : 0);
1966                   set_search_regs (position, len_byte);
1967
1968                   if (NILP (Vinhibit_changing_match_data))
1969                     {
1970                       start = search_regs.start[0];
1971                       end = search_regs.end[0];
1972                     }
1973                   else
1974                     /* If Vinhibit_changing_match_data is non-nil,
1975                        search_regs will not be changed.  So let's
1976                        compute start and end here.  */
1977                     {
1978                       start = BYTE_TO_CHAR (position);
1979                       end = BYTE_TO_CHAR (position + len_byte);
1980                     }
1981
1982                   if ((n -= direction) != 0)
1983                     cursor += dirlen; /* to resume search */
1984                   else
1985                     return direction > 0 ? end : start;
1986                 }
1987               else
1988                 cursor += stride_for_teases; /* <sigh> we lose -  */
1989             }
1990           pos_byte += cursor - p2;
1991         }
1992       else
1993         /* Now we'll pick up a clump that has to be done the hard */
1994         /* way because it covers a discontinuity */
1995         {
1996           limit = ((direction > 0)
1997                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1998                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1999           limit = ((direction > 0)
2000                    ? min (limit + len_byte, lim_byte - 1)
2001                    : max (limit - len_byte, lim_byte));
2002           /* LIMIT is now the last value POS_BYTE can have
2003              and still be valid for a possible match.  */
2004           while (1)
2005             {
2006               /* This loop can be coded for space rather than */
2007               /* speed because it will usually run only once. */
2008               /* (the reach is at most len + 21, and typically */
2009               /* does not exceed len) */
2010               while ((limit - pos_byte) * direction >= 0)
2011                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2012               /* now run the same tests to distinguish going off the */
2013               /* end, a match or a phony match. */
2014               if ((pos_byte - limit) * direction <= len_byte)
2015                 break;  /* ran off the end */
2016               /* Found what might be a match.
2017                  Set POS_BYTE back to last (first if reverse) pos.  */
2018               pos_byte -= infinity;
2019               i = dirlen - direction;
2020               while ((i -= direction) + direction != 0)
2021                 {
2022                   int ch;
2023                   unsigned char *ptr;
2024                   pos_byte -= direction;
2025                   ptr = BYTE_POS_ADDR (pos_byte);
2026                   /* Translate only the last byte of a character.  */
2027                   if (! multibyte
2028                       || ((ptr == tail_end_ptr
2029                            || CHAR_HEAD_P (ptr[1]))
2030                           && (CHAR_HEAD_P (ptr[0])
2031                               /* Check if this is the last byte of a
2032                                  translable character.  */
2033                               || (translate_prev_byte1 == ptr[-1]
2034                                   && (CHAR_HEAD_P (translate_prev_byte1)
2035                                       || (translate_prev_byte2 == ptr[-2]
2036                                           && (CHAR_HEAD_P (translate_prev_byte2)
2037                                               || translate_prev_byte3 == ptr[-3])))))))
2038                     ch = simple_translate[*ptr];
2039                   else
2040                     ch = *ptr;
2041                   if (pat[i] != ch)
2042                     break;
2043                 }
2044               /* Above loop has moved POS_BYTE part or all the way
2045                  back to the first pos (last pos if reverse).
2046                  Set it once again at the last (first if reverse) char.  */
2047               pos_byte += dirlen - i- direction;
2048               if (i + direction == 0)
2049                 {
2050                   int position, start, end;
2051                   pos_byte -= direction;
2052
2053                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2054                   set_search_regs (position, len_byte);
2055
2056                   if (NILP (Vinhibit_changing_match_data))
2057                     {
2058                       start = search_regs.start[0];
2059                       end = search_regs.end[0];
2060                     }
2061                   else
2062                     /* If Vinhibit_changing_match_data is non-nil,
2063                        search_regs will not be changed.  So let's
2064                        compute start and end here.  */
2065                     {
2066                       start = BYTE_TO_CHAR (position);
2067                       end = BYTE_TO_CHAR (position + len_byte);
2068                     }
2069
2070                   if ((n -= direction) != 0)
2071                     pos_byte += dirlen; /* to resume search */
2072                   else
2073                     return direction > 0 ? end : start;
2074                 }
2075               else
2076                 pos_byte += stride_for_teases;
2077             }
2078           }
2079       /* We have done one clump.  Can we continue? */
2080       if ((lim_byte - pos_byte) * direction < 0)
2081         return ((0 - n) * direction);
2082     }
2083   return BYTE_TO_CHAR (pos_byte);
2084 }
2085
2086 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2087    for the overall match just found in the current buffer.
2088    Also clear out the match data for registers 1 and up.  */
2089
2090 static void
2091 set_search_regs (beg_byte, nbytes)
2092      int beg_byte, nbytes;
2093 {
2094   int i;
2095
2096   if (!NILP (Vinhibit_changing_match_data))
2097     return;
2098
2099   /* Make sure we have registers in which to store
2100      the match position.  */
2101   if (search_regs.num_regs == 0)
2102     {
2103       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2104       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2105       search_regs.num_regs = 2;
2106     }
2107
2108   /* Clear out the other registers.  */
2109   for (i = 1; i < search_regs.num_regs; i++)
2110     {
2111       search_regs.start[i] = -1;
2112       search_regs.end[i] = -1;
2113     }
2114
2115   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2116   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2117   XSETBUFFER (last_thing_searched, current_buffer);
2118 }
2119 \f
2120 /* Given a string of words separated by word delimiters,
2121   compute a regexp that matches those exact words
2122   separated by arbitrary punctuation.  */
2123
2124 static Lisp_Object
2125 wordify (string)
2126      Lisp_Object string;
2127 {
2128   register unsigned char *p, *o;
2129   register int i, i_byte, len, punct_count = 0, word_count = 0;
2130   Lisp_Object val;
2131   int prev_c = 0;
2132   int adjust;
2133
2134   CHECK_STRING (string);
2135   p = SDATA (string);
2136   len = SCHARS (string);
2137
2138   for (i = 0, i_byte = 0; i < len; )
2139     {
2140       int c;
2141
2142       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2143
2144       if (SYNTAX (c) != Sword)
2145         {
2146           punct_count++;
2147           if (i > 0 && SYNTAX (prev_c) == Sword)
2148             word_count++;
2149         }
2150
2151       prev_c = c;
2152     }
2153
2154   if (SYNTAX (prev_c) == Sword)
2155     word_count++;
2156   if (!word_count)
2157     return empty_unibyte_string;
2158
2159   adjust = - punct_count + 5 * (word_count - 1) + 4;
2160   if (STRING_MULTIBYTE (string))
2161     val = make_uninit_multibyte_string (len + adjust,
2162                                         SBYTES (string)
2163                                         + adjust);
2164   else
2165     val = make_uninit_string (len + adjust);
2166
2167   o = SDATA (val);
2168   *o++ = '\\';
2169   *o++ = 'b';
2170   prev_c = 0;
2171
2172   for (i = 0, i_byte = 0; i < len; )
2173     {
2174       int c;
2175       int i_byte_orig = i_byte;
2176
2177       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2178
2179       if (SYNTAX (c) == Sword)
2180         {
2181           bcopy (SDATA (string) + i_byte_orig, o,
2182                  i_byte - i_byte_orig);
2183           o += i_byte - i_byte_orig;
2184         }
2185       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2186         {
2187           *o++ = '\\';
2188           *o++ = 'W';
2189           *o++ = '\\';
2190           *o++ = 'W';
2191           *o++ = '*';
2192         }
2193
2194       prev_c = c;
2195     }
2196
2197   *o++ = '\\';
2198   *o++ = 'b';
2199
2200   return val;
2201 }
2202 \f
2203 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2204        "MSearch backward: ",
2205        doc: /* Search backward from point for STRING.
2206 Set point to the beginning of the occurrence found, and return point.
2207 An optional second argument bounds the search; it is a buffer position.
2208 The match found must not extend before that position.
2209 Optional third argument, if t, means if fail just return nil (no error).
2210  If not nil and not t, position at limit of search and return nil.
2211 Optional fourth argument is repeat count--search for successive occurrences.
2212
2213 Search case-sensitivity is determined by the value of the variable
2214 `case-fold-search', which see.
2215
2216 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2217      (string, bound, noerror, count)
2218      Lisp_Object string, bound, noerror, count;
2219 {
2220   return search_command (string, bound, noerror, count, -1, 0, 0);
2221 }
2222
2223 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2224        doc: /* Search forward from point for STRING.
2225 Set point to the end of the occurrence found, and return point.
2226 An optional second argument bounds the search; it is a buffer position.
2227 The match found must not extend after that position.  A value of nil is
2228   equivalent to (point-max).
2229 Optional third argument, if t, means if fail just return nil (no error).
2230   If not nil and not t, move to limit of search and return nil.
2231 Optional fourth argument is repeat count--search for successive occurrences.
2232
2233 Search case-sensitivity is determined by the value of the variable
2234 `case-fold-search', which see.
2235
2236 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2237      (string, bound, noerror, count)
2238      Lisp_Object string, bound, noerror, count;
2239 {
2240   return search_command (string, bound, noerror, count, 1, 0, 0);
2241 }
2242
2243 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2244        "sWord search backward: ",
2245        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2246 Set point to the beginning of the occurrence found, and return point.
2247 An optional second argument bounds the search; it is a buffer position.
2248 The match found must not extend before that position.
2249 Optional third argument, if t, means if fail just return nil (no error).
2250   If not nil and not t, move to limit of search and return nil.
2251 Optional fourth argument is repeat count--search for successive occurrences.  */)
2252      (string, bound, noerror, count)
2253      Lisp_Object string, bound, noerror, count;
2254 {
2255   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2256 }
2257
2258 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2259        "sWord search: ",
2260        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2261 Set point to the end of the occurrence found, and return point.
2262 An optional second argument bounds the search; it is a buffer position.
2263 The match found must not extend after that position.
2264 Optional third argument, if t, means if fail just return nil (no error).
2265   If not nil and not t, move to limit of search and return nil.
2266 Optional fourth argument is repeat count--search for successive occurrences.  */)
2267      (string, bound, noerror, count)
2268      Lisp_Object string, bound, noerror, count;
2269 {
2270   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2271 }
2272
2273 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2274        "sRE search backward: ",
2275        doc: /* Search backward from point for match for regular expression REGEXP.
2276 Set point to the beginning of the match, and return point.
2277 The match found is the one starting last in the buffer
2278 and yet ending before the origin of the search.
2279 An optional second argument bounds the search; it is a buffer position.
2280 The match found must start at or after that position.
2281 Optional third argument, if t, means if fail just return nil (no error).
2282   If not nil and not t, move to limit of search and return nil.
2283 Optional fourth argument is repeat count--search for successive occurrences.
2284 See also the functions `match-beginning', `match-end', `match-string',
2285 and `replace-match'.  */)
2286      (regexp, bound, noerror, count)
2287      Lisp_Object regexp, bound, noerror, count;
2288 {
2289   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2290 }
2291
2292 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2293        "sRE search: ",
2294        doc: /* Search forward from point for regular expression REGEXP.
2295 Set point to the end of the occurrence found, and return point.
2296 An optional second argument bounds the search; it is a buffer position.
2297 The match found must not extend after that position.
2298 Optional third argument, if t, means if fail just return nil (no error).
2299   If not nil and not t, move to limit of search and return nil.
2300 Optional fourth argument is repeat count--search for successive occurrences.
2301 See also the functions `match-beginning', `match-end', `match-string',
2302 and `replace-match'.  */)
2303      (regexp, bound, noerror, count)
2304      Lisp_Object regexp, bound, noerror, count;
2305 {
2306   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2307 }
2308
2309 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2310        "sPosix search backward: ",
2311        doc: /* Search backward from point for match for regular expression REGEXP.
2312 Find the longest match in accord with Posix regular expression rules.
2313 Set point to the beginning of the match, and return point.
2314 The match found is the one starting last in the buffer
2315 and yet ending before the origin of the search.
2316 An optional second argument bounds the search; it is a buffer position.
2317 The match found must start at or after that position.
2318 Optional third argument, if t, means if fail just return nil (no error).
2319   If not nil and not t, move to limit of search and return nil.
2320 Optional fourth argument is repeat count--search for successive occurrences.
2321 See also the functions `match-beginning', `match-end', `match-string',
2322 and `replace-match'.  */)
2323      (regexp, bound, noerror, count)
2324      Lisp_Object regexp, bound, noerror, count;
2325 {
2326   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2327 }
2328
2329 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2330        "sPosix search: ",
2331        doc: /* Search forward from point for regular expression REGEXP.
2332 Find the longest match in accord with Posix regular expression rules.
2333 Set point to the end of the occurrence found, and return point.
2334 An optional second argument bounds the search; it is a buffer position.
2335 The match found must not extend after that position.
2336 Optional third argument, if t, means if fail just return nil (no error).
2337   If not nil and not t, move to limit of search and return nil.
2338 Optional fourth argument is repeat count--search for successive occurrences.
2339 See also the functions `match-beginning', `match-end', `match-string',
2340 and `replace-match'.  */)
2341      (regexp, bound, noerror, count)
2342      Lisp_Object regexp, bound, noerror, count;
2343 {
2344   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2345 }
2346 \f
2347 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2348        doc: /* Replace text matched by last search with NEWTEXT.
2349 Leave point at the end of the replacement text.
2350
2351 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2352 Otherwise maybe capitalize the whole text, or maybe just word initials,
2353 based on the replaced text.
2354 If the replaced text has only capital letters
2355 and has at least one multiletter word, convert NEWTEXT to all caps.
2356 Otherwise if all words are capitalized in the replaced text,
2357 capitalize each word in NEWTEXT.
2358
2359 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2360 Otherwise treat `\\' as special:
2361   `\\&' in NEWTEXT means substitute original matched text.
2362   `\\N' means substitute what matched the Nth `\\(...\\)'.
2363        If Nth parens didn't match, substitute nothing.
2364   `\\\\' means insert one `\\'.
2365 Case conversion does not apply to these substitutions.
2366
2367 FIXEDCASE and LITERAL are optional arguments.
2368
2369 The optional fourth argument STRING can be a string to modify.
2370 This is meaningful when the previous match was done against STRING,
2371 using `string-match'.  When used this way, `replace-match'
2372 creates and returns a new string made by copying STRING and replacing
2373 the part of STRING that was matched.
2374
2375 The optional fifth argument SUBEXP specifies a subexpression;
2376 it says to replace just that subexpression with NEWTEXT,
2377 rather than replacing the entire matched text.
2378 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2379 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2380 NEWTEXT in place of subexp N.
2381 This is useful only after a regular expression search or match,
2382 since only regular expressions have distinguished subexpressions.  */)
2383      (newtext, fixedcase, literal, string, subexp)
2384      Lisp_Object newtext, fixedcase, literal, string, subexp;
2385 {
2386   enum { nochange, all_caps, cap_initial } case_action;
2387   register int pos, pos_byte;
2388   int some_multiletter_word;
2389   int some_lowercase;
2390   int some_uppercase;
2391   int some_nonuppercase_initial;
2392   register int c, prevc;
2393   int sub;
2394   int opoint, newpoint;
2395
2396   CHECK_STRING (newtext);
2397
2398   if (! NILP (string))
2399     CHECK_STRING (string);
2400
2401   case_action = nochange;       /* We tried an initialization */
2402                                 /* but some C compilers blew it */
2403
2404   if (search_regs.num_regs <= 0)
2405     error ("`replace-match' called before any match found");
2406
2407   if (NILP (subexp))
2408     sub = 0;
2409   else
2410     {
2411       CHECK_NUMBER (subexp);
2412       sub = XINT (subexp);
2413       if (sub < 0 || sub >= search_regs.num_regs)
2414         args_out_of_range (subexp, make_number (search_regs.num_regs));
2415     }
2416
2417   if (NILP (string))
2418     {
2419       if (search_regs.start[sub] < BEGV
2420           || search_regs.start[sub] > search_regs.end[sub]
2421           || search_regs.end[sub] > ZV)
2422         args_out_of_range (make_number (search_regs.start[sub]),
2423                            make_number (search_regs.end[sub]));
2424     }
2425   else
2426     {
2427       if (search_regs.start[sub] < 0
2428           || search_regs.start[sub] > search_regs.end[sub]
2429           || search_regs.end[sub] > SCHARS (string))
2430         args_out_of_range (make_number (search_regs.start[sub]),
2431                            make_number (search_regs.end[sub]));
2432     }
2433
2434   if (NILP (fixedcase))
2435     {
2436       /* Decide how to casify by examining the matched text. */
2437       int last;
2438
2439       pos = search_regs.start[sub];
2440       last = search_regs.end[sub];
2441
2442       if (NILP (string))
2443         pos_byte = CHAR_TO_BYTE (pos);
2444       else
2445         pos_byte = string_char_to_byte (string, pos);
2446
2447       prevc = '\n';
2448       case_action = all_caps;
2449
2450       /* some_multiletter_word is set nonzero if any original word
2451          is more than one letter long. */
2452       some_multiletter_word = 0;
2453       some_lowercase = 0;
2454       some_nonuppercase_initial = 0;
2455       some_uppercase = 0;
2456
2457       while (pos < last)
2458         {
2459           if (NILP (string))
2460             {
2461               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2462               INC_BOTH (pos, pos_byte);
2463             }
2464           else
2465             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2466
2467           if (LOWERCASEP (c))
2468             {
2469               /* Cannot be all caps if any original char is lower case */
2470
2471               some_lowercase = 1;
2472               if (SYNTAX (prevc) != Sword)
2473                 some_nonuppercase_initial = 1;
2474               else
2475                 some_multiletter_word = 1;
2476             }
2477           else if (UPPERCASEP (c))
2478             {
2479               some_uppercase = 1;
2480               if (SYNTAX (prevc) != Sword)
2481                 ;
2482               else
2483                 some_multiletter_word = 1;
2484             }
2485           else
2486             {
2487               /* If the initial is a caseless word constituent,
2488                  treat that like a lowercase initial.  */
2489               if (SYNTAX (prevc) != Sword)
2490                 some_nonuppercase_initial = 1;
2491             }
2492
2493           prevc = c;
2494         }
2495
2496       /* Convert to all caps if the old text is all caps
2497          and has at least one multiletter word.  */
2498       if (! some_lowercase && some_multiletter_word)
2499         case_action = all_caps;
2500       /* Capitalize each word, if the old text has all capitalized words.  */
2501       else if (!some_nonuppercase_initial && some_multiletter_word)
2502         case_action = cap_initial;
2503       else if (!some_nonuppercase_initial && some_uppercase)
2504         /* Should x -> yz, operating on X, give Yz or YZ?
2505            We'll assume the latter.  */
2506         case_action = all_caps;
2507       else
2508         case_action = nochange;
2509     }
2510
2511   /* Do replacement in a string.  */
2512   if (!NILP (string))
2513     {
2514       Lisp_Object before, after;
2515
2516       before = Fsubstring (string, make_number (0),
2517                            make_number (search_regs.start[sub]));
2518       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2519
2520       /* Substitute parts of the match into NEWTEXT
2521          if desired.  */
2522       if (NILP (literal))
2523         {
2524           int lastpos = 0;
2525           int lastpos_byte = 0;
2526           /* We build up the substituted string in ACCUM.  */
2527           Lisp_Object accum;
2528           Lisp_Object middle;
2529           int length = SBYTES (newtext);
2530
2531           accum = Qnil;
2532
2533           for (pos_byte = 0, pos = 0; pos_byte < length;)
2534             {
2535               int substart = -1;
2536               int subend = 0;
2537               int delbackslash = 0;
2538
2539               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2540
2541               if (c == '\\')
2542                 {
2543                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2544
2545                   if (c == '&')
2546                     {
2547                       substart = search_regs.start[sub];
2548                       subend = search_regs.end[sub];
2549                     }
2550                   else if (c >= '1' && c <= '9')
2551                     {
2552                       if (search_regs.start[c - '0'] >= 0
2553                           && c <= search_regs.num_regs + '0')
2554                         {
2555                           substart = search_regs.start[c - '0'];
2556                           subend = search_regs.end[c - '0'];
2557                         }
2558                       else
2559                         {
2560                           /* If that subexp did not match,
2561                              replace \\N with nothing.  */
2562                           substart = 0;
2563                           subend = 0;
2564                         }
2565                     }
2566                   else if (c == '\\')
2567                     delbackslash = 1;
2568                   else
2569                     error ("Invalid use of `\\' in replacement text");
2570                 }
2571               if (substart >= 0)
2572                 {
2573                   if (pos - 2 != lastpos)
2574                     middle = substring_both (newtext, lastpos,
2575                                              lastpos_byte,
2576                                              pos - 2, pos_byte - 2);
2577                   else
2578                     middle = Qnil;
2579                   accum = concat3 (accum, middle,
2580                                    Fsubstring (string,
2581                                                make_number (substart),
2582                                                make_number (subend)));
2583                   lastpos = pos;
2584                   lastpos_byte = pos_byte;
2585                 }
2586               else if (delbackslash)
2587                 {
2588                   middle = substring_both (newtext, lastpos,
2589                                            lastpos_byte,
2590                                            pos - 1, pos_byte - 1);
2591
2592                   accum = concat2 (accum, middle);
2593                   lastpos = pos;
2594                   lastpos_byte = pos_byte;
2595                 }
2596             }
2597
2598           if (pos != lastpos)
2599             middle = substring_both (newtext, lastpos,
2600                                      lastpos_byte,
2601                                      pos, pos_byte);
2602           else
2603             middle = Qnil;
2604
2605           newtext = concat2 (accum, middle);
2606         }
2607
2608       /* Do case substitution in NEWTEXT if desired.  */
2609       if (case_action == all_caps)
2610         newtext = Fupcase (newtext);
2611       else if (case_action == cap_initial)
2612         newtext = Fupcase_initials (newtext);
2613
2614       return concat3 (before, newtext, after);
2615     }
2616
2617   /* Record point, then move (quietly) to the start of the match.  */
2618   if (PT >= search_regs.end[sub])
2619     opoint = PT - ZV;
2620   else if (PT > search_regs.start[sub])
2621     opoint = search_regs.end[sub] - ZV;
2622   else
2623     opoint = PT;
2624
2625   /* If we want non-literal replacement,
2626      perform substitution on the replacement string.  */
2627   if (NILP (literal))
2628     {
2629       int length = SBYTES (newtext);
2630       unsigned char *substed;
2631       int substed_alloc_size, substed_len;
2632       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2633       int str_multibyte = STRING_MULTIBYTE (newtext);
2634       Lisp_Object rev_tbl;
2635       int really_changed = 0;
2636
2637       rev_tbl = Qnil;
2638
2639       substed_alloc_size = length * 2 + 100;
2640       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2641       substed_len = 0;
2642
2643       /* Go thru NEWTEXT, producing the actual text to insert in
2644          SUBSTED while adjusting multibyteness to that of the current
2645          buffer.  */
2646
2647       for (pos_byte = 0, pos = 0; pos_byte < length;)
2648         {
2649           unsigned char str[MAX_MULTIBYTE_LENGTH];
2650           unsigned char *add_stuff = NULL;
2651           int add_len = 0;
2652           int idx = -1;
2653
2654           if (str_multibyte)
2655             {
2656               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2657               if (!buf_multibyte)
2658                 c = multibyte_char_to_unibyte (c, rev_tbl);
2659             }
2660           else
2661             {
2662               /* Note that we don't have to increment POS.  */
2663               c = SREF (newtext, pos_byte++);
2664               if (buf_multibyte)
2665                 c = unibyte_char_to_multibyte (c);
2666             }
2667
2668           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2669              or set IDX to a match index, which means put that part
2670              of the buffer text into SUBSTED.  */
2671
2672           if (c == '\\')
2673             {
2674               really_changed = 1;
2675
2676               if (str_multibyte)
2677                 {
2678                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2679                                                       pos, pos_byte);
2680                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2681                     c = multibyte_char_to_unibyte (c, rev_tbl);
2682                 }
2683               else
2684                 {
2685                   c = SREF (newtext, pos_byte++);
2686                   if (buf_multibyte)
2687                     c = unibyte_char_to_multibyte (c);
2688                 }
2689
2690               if (c == '&')
2691                 idx = sub;
2692               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2693                 {
2694                   if (search_regs.start[c - '0'] >= 1)
2695                     idx = c - '0';
2696                 }
2697               else if (c == '\\')
2698                 add_len = 1, add_stuff = "\\";
2699               else
2700                 {
2701                   xfree (substed);
2702                   error ("Invalid use of `\\' in replacement text");
2703                 }
2704             }
2705           else
2706             {
2707               add_len = CHAR_STRING (c, str);
2708               add_stuff = str;
2709             }
2710
2711           /* If we want to copy part of a previous match,
2712              set up ADD_STUFF and ADD_LEN to point to it.  */
2713           if (idx >= 0)
2714             {
2715               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2716               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2717               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2718                 move_gap (search_regs.start[idx]);
2719               add_stuff = BYTE_POS_ADDR (begbyte);
2720             }
2721
2722           /* Now the stuff we want to add to SUBSTED
2723              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2724
2725           /* Make sure SUBSTED is big enough.  */
2726           if (substed_len + add_len >= substed_alloc_size)
2727             {
2728               substed_alloc_size = substed_len + add_len + 500;
2729               substed = (unsigned char *) xrealloc (substed,
2730                                                     substed_alloc_size + 1);
2731             }
2732
2733           /* Now add to the end of SUBSTED.  */
2734           if (add_stuff)
2735             {
2736               bcopy (add_stuff, substed + substed_len, add_len);
2737               substed_len += add_len;
2738             }
2739         }
2740
2741       if (really_changed)
2742         {
2743           if (buf_multibyte)
2744             {
2745               int nchars = multibyte_chars_in_text (substed, substed_len);
2746
2747               newtext = make_multibyte_string (substed, nchars, substed_len);
2748             }
2749           else
2750             newtext = make_unibyte_string (substed, substed_len);
2751         }
2752       xfree (substed);
2753     }
2754
2755   /* Replace the old text with the new in the cleanest possible way.  */
2756   replace_range (search_regs.start[sub], search_regs.end[sub],
2757                  newtext, 1, 0, 1);
2758   newpoint = search_regs.start[sub] + SCHARS (newtext);
2759
2760   if (case_action == all_caps)
2761     Fupcase_region (make_number (search_regs.start[sub]),
2762                     make_number (newpoint));
2763   else if (case_action == cap_initial)
2764     Fupcase_initials_region (make_number (search_regs.start[sub]),
2765                              make_number (newpoint));
2766
2767   /* Adjust search data for this change.  */
2768   {
2769     int oldend = search_regs.end[sub];
2770     int oldstart = search_regs.start[sub];
2771     int change = newpoint - search_regs.end[sub];
2772     int i;
2773
2774     for (i = 0; i < search_regs.num_regs; i++)
2775       {
2776         if (search_regs.start[i] >= oldend)
2777           search_regs.start[i] += change;
2778         else if (search_regs.start[i] > oldstart)
2779           search_regs.start[i] = oldstart;
2780         if (search_regs.end[i] >= oldend)
2781           search_regs.end[i] += change;
2782         else if (search_regs.end[i] > oldstart)
2783           search_regs.end[i] = oldstart;
2784       }
2785   }
2786
2787   /* Put point back where it was in the text.  */
2788   if (opoint <= 0)
2789     TEMP_SET_PT (opoint + ZV);
2790   else
2791     TEMP_SET_PT (opoint);
2792
2793   /* Now move point "officially" to the start of the inserted replacement.  */
2794   move_if_not_intangible (newpoint);
2795
2796   return Qnil;
2797 }
2798 \f
2799 static Lisp_Object
2800 match_limit (num, beginningp)
2801      Lisp_Object num;
2802      int beginningp;
2803 {
2804   register int n;
2805
2806   CHECK_NUMBER (num);
2807   n = XINT (num);
2808   if (n < 0)
2809     args_out_of_range (num, make_number (0));
2810   if (search_regs.num_regs <= 0)
2811     error ("No match data, because no search succeeded");
2812   if (n >= search_regs.num_regs
2813       || search_regs.start[n] < 0)
2814     return Qnil;
2815   return (make_number ((beginningp) ? search_regs.start[n]
2816                                     : search_regs.end[n]));
2817 }
2818
2819 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2820        doc: /* Return position of start of text matched by last search.
2821 SUBEXP, a number, specifies which parenthesized expression in the last
2822   regexp.
2823 Value is nil if SUBEXPth pair didn't match, or there were less than
2824   SUBEXP pairs.
2825 Zero means the entire text matched by the whole regexp or whole string.  */)
2826      (subexp)
2827      Lisp_Object subexp;
2828 {
2829   return match_limit (subexp, 1);
2830 }
2831
2832 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2833        doc: /* Return position of end of text matched by last search.
2834 SUBEXP, a number, specifies which parenthesized expression in the last
2835   regexp.
2836 Value is nil if SUBEXPth pair didn't match, or there were less than
2837   SUBEXP pairs.
2838 Zero means the entire text matched by the whole regexp or whole string.  */)
2839      (subexp)
2840      Lisp_Object subexp;
2841 {
2842   return match_limit (subexp, 0);
2843 }
2844
2845 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2846        doc: /* Return a list containing all info on what the last search matched.
2847 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2848 All the elements are markers or nil (nil if the Nth pair didn't match)
2849 if the last match was on a buffer; integers or nil if a string was matched.
2850 Use `store-match-data' to reinstate the data in this list.
2851
2852 If INTEGERS (the optional first argument) is non-nil, always use
2853 integers \(rather than markers) to represent buffer positions.  In
2854 this case, and if the last match was in a buffer, the buffer will get
2855 stored as one additional element at the end of the list.
2856
2857 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2858 enough to hold all the values, and if INTEGERS is non-nil, no consing
2859 is done.
2860
2861 If optional third arg RESEAT is non-nil, any previous markers on the
2862 REUSE list will be modified to point to nowhere.
2863
2864 Return value is undefined if the last search failed.  */)
2865   (integers, reuse, reseat)
2866      Lisp_Object integers, reuse, reseat;
2867 {
2868   Lisp_Object tail, prev;
2869   Lisp_Object *data;
2870   int i, len;
2871
2872   if (!NILP (reseat))
2873     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2874       if (MARKERP (XCAR (tail)))
2875         {
2876           unchain_marker (XMARKER (XCAR (tail)));
2877           XSETCAR (tail, Qnil);
2878         }
2879
2880   if (NILP (last_thing_searched))
2881     return Qnil;
2882
2883   prev = Qnil;
2884
2885   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2886                                  * sizeof (Lisp_Object));
2887
2888   len = 0;
2889   for (i = 0; i < search_regs.num_regs; i++)
2890     {
2891       int start = search_regs.start[i];
2892       if (start >= 0)
2893         {
2894           if (EQ (last_thing_searched, Qt)
2895               || ! NILP (integers))
2896             {
2897               XSETFASTINT (data[2 * i], start);
2898               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2899             }
2900           else if (BUFFERP (last_thing_searched))
2901             {
2902               data[2 * i] = Fmake_marker ();
2903               Fset_marker (data[2 * i],
2904                            make_number (start),
2905                            last_thing_searched);
2906               data[2 * i + 1] = Fmake_marker ();
2907               Fset_marker (data[2 * i + 1],
2908                            make_number (search_regs.end[i]),
2909                            last_thing_searched);
2910             }
2911           else
2912             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2913             abort ();
2914
2915           len = 2 * i + 2;
2916         }
2917       else
2918         data[2 * i] = data[2 * i + 1] = Qnil;
2919     }
2920
2921   if (BUFFERP (last_thing_searched) && !NILP (integers))
2922     {
2923       data[len] = last_thing_searched;
2924       len++;
2925     }
2926
2927   /* If REUSE is not usable, cons up the values and return them.  */
2928   if (! CONSP (reuse))
2929     return Flist (len, data);
2930
2931   /* If REUSE is a list, store as many value elements as will fit
2932      into the elements of REUSE.  */
2933   for (i = 0, tail = reuse; CONSP (tail);
2934        i++, tail = XCDR (tail))
2935     {
2936       if (i < len)
2937         XSETCAR (tail, data[i]);
2938       else
2939         XSETCAR (tail, Qnil);
2940       prev = tail;
2941     }
2942
2943   /* If we couldn't fit all value elements into REUSE,
2944      cons up the rest of them and add them to the end of REUSE.  */
2945   if (i < len)
2946     XSETCDR (prev, Flist (len - i, data + i));
2947
2948   return reuse;
2949 }
2950
2951 /* We used to have an internal use variant of `reseat' described as:
2952
2953       If RESEAT is `evaporate', put the markers back on the free list
2954       immediately.  No other references to the markers must exist in this
2955       case, so it is used only internally on the unwind stack and
2956       save-match-data from Lisp.
2957
2958    But it was ill-conceived: those supposedly-internal markers get exposed via
2959    the undo-list, so freeing them here is unsafe.  */
2960
2961 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2962        doc: /* Set internal data on last search match from elements of LIST.
2963 LIST should have been created by calling `match-data' previously.
2964
2965 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2966     (list, reseat)
2967      register Lisp_Object list, reseat;
2968 {
2969   register int i;
2970   register Lisp_Object marker;
2971
2972   if (running_asynch_code)
2973     save_search_regs ();
2974
2975   CHECK_LIST (list);
2976
2977   /* Unless we find a marker with a buffer or an explicit buffer
2978      in LIST, assume that this match data came from a string.  */
2979   last_thing_searched = Qt;
2980
2981   /* Allocate registers if they don't already exist.  */
2982   {
2983     int length = XFASTINT (Flength (list)) / 2;
2984
2985     if (length > search_regs.num_regs)
2986       {
2987         if (search_regs.num_regs == 0)
2988           {
2989             search_regs.start
2990               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2991             search_regs.end
2992               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2993           }
2994         else
2995           {
2996             search_regs.start
2997               = (regoff_t *) xrealloc (search_regs.start,
2998                                        length * sizeof (regoff_t));
2999             search_regs.end
3000               = (regoff_t *) xrealloc (search_regs.end,
3001                                        length * sizeof (regoff_t));
3002           }
3003
3004         for (i = search_regs.num_regs; i < length; i++)
3005           search_regs.start[i] = -1;
3006
3007         search_regs.num_regs = length;
3008       }
3009
3010     for (i = 0; CONSP (list); i++)
3011       {
3012         marker = XCAR (list);
3013         if (BUFFERP (marker))
3014           {
3015             last_thing_searched = marker;
3016             break;
3017           }
3018         if (i >= length)
3019           break;
3020         if (NILP (marker))
3021           {
3022             search_regs.start[i] = -1;
3023             list = XCDR (list);
3024           }
3025         else
3026           {
3027             int from;
3028             Lisp_Object m;
3029
3030             m = marker;
3031             if (MARKERP (marker))
3032               {
3033                 if (XMARKER (marker)->buffer == 0)
3034                   XSETFASTINT (marker, 0);
3035                 else
3036                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3037               }
3038
3039             CHECK_NUMBER_COERCE_MARKER (marker);
3040             from = XINT (marker);
3041
3042             if (!NILP (reseat) && MARKERP (m))
3043               {
3044                 unchain_marker (XMARKER (m));
3045                 XSETCAR (list, Qnil);
3046               }
3047
3048             if ((list = XCDR (list), !CONSP (list)))
3049               break;
3050
3051             m = marker = XCAR (list);
3052
3053             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3054               XSETFASTINT (marker, 0);
3055
3056             CHECK_NUMBER_COERCE_MARKER (marker);
3057             search_regs.start[i] = from;
3058             search_regs.end[i] = XINT (marker);
3059
3060             if (!NILP (reseat) && MARKERP (m))
3061               {
3062                 unchain_marker (XMARKER (m));
3063                 XSETCAR (list, Qnil);
3064               }
3065           }
3066         list = XCDR (list);
3067       }
3068
3069     for (; i < search_regs.num_regs; i++)
3070       search_regs.start[i] = -1;
3071   }
3072
3073   return Qnil;
3074 }
3075
3076 /* If non-zero the match data have been saved in saved_search_regs
3077    during the execution of a sentinel or filter. */
3078 static int search_regs_saved;
3079 static struct re_registers saved_search_regs;
3080 static Lisp_Object saved_last_thing_searched;
3081
3082 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3083    if asynchronous code (filter or sentinel) is running. */
3084 static void
3085 save_search_regs ()
3086 {
3087   if (!search_regs_saved)
3088     {
3089       saved_search_regs.num_regs = search_regs.num_regs;
3090       saved_search_regs.start = search_regs.start;
3091       saved_search_regs.end = search_regs.end;
3092       saved_last_thing_searched = last_thing_searched;
3093       last_thing_searched = Qnil;
3094       search_regs.num_regs = 0;
3095       search_regs.start = 0;
3096       search_regs.end = 0;
3097
3098       search_regs_saved = 1;
3099     }
3100 }
3101
3102 /* Called upon exit from filters and sentinels. */
3103 void
3104 restore_search_regs ()
3105 {
3106   if (search_regs_saved)
3107     {
3108       if (search_regs.num_regs > 0)
3109         {
3110           xfree (search_regs.start);
3111           xfree (search_regs.end);
3112         }
3113       search_regs.num_regs = saved_search_regs.num_regs;
3114       search_regs.start = saved_search_regs.start;
3115       search_regs.end = saved_search_regs.end;
3116       last_thing_searched = saved_last_thing_searched;
3117       saved_last_thing_searched = Qnil;
3118       search_regs_saved = 0;
3119     }
3120 }
3121
3122 static Lisp_Object
3123 unwind_set_match_data (list)
3124      Lisp_Object list;
3125 {
3126   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3127   return Fset_match_data (list, Qt);
3128 }
3129
3130 /* Called to unwind protect the match data.  */
3131 void
3132 record_unwind_save_match_data ()
3133 {
3134   record_unwind_protect (unwind_set_match_data,
3135                          Fmatch_data (Qnil, Qnil, Qnil));
3136 }
3137
3138 /* Quote a string to inactivate reg-expr chars */
3139
3140 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3141        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3142      (string)
3143      Lisp_Object string;
3144 {
3145   register unsigned char *in, *out, *end;
3146   register unsigned char *temp;
3147   int backslashes_added = 0;
3148
3149   CHECK_STRING (string);
3150
3151   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3152
3153   /* Now copy the data into the new string, inserting escapes. */
3154
3155   in = SDATA (string);
3156   end = in + SBYTES (string);
3157   out = temp;
3158
3159   for (; in != end; in++)
3160     {
3161       if (*in == '['
3162           || *in == '*' || *in == '.' || *in == '\\'
3163           || *in == '?' || *in == '+'
3164           || *in == '^' || *in == '$')
3165         *out++ = '\\', backslashes_added++;
3166       *out++ = *in;
3167     }
3168
3169   return make_specified_string (temp,
3170                                 SCHARS (string) + backslashes_added,
3171                                 out - temp,
3172                                 STRING_MULTIBYTE (string));
3173 }
3174 \f
3175 void
3176 syms_of_search ()
3177 {
3178   register int i;
3179
3180   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3181     {
3182       searchbufs[i].buf.allocated = 100;
3183       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3184       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3185       searchbufs[i].regexp = Qnil;
3186       searchbufs[i].whitespace_regexp = Qnil;
3187       searchbufs[i].syntax_table = Qnil;
3188       staticpro (&searchbufs[i].regexp);
3189       staticpro (&searchbufs[i].whitespace_regexp);
3190       staticpro (&searchbufs[i].syntax_table);
3191       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3192     }
3193   searchbuf_head = &searchbufs[0];
3194
3195   Qsearch_failed = intern ("search-failed");
3196   staticpro (&Qsearch_failed);
3197   Qinvalid_regexp = intern ("invalid-regexp");
3198   staticpro (&Qinvalid_regexp);
3199
3200   Fput (Qsearch_failed, Qerror_conditions,
3201         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3202   Fput (Qsearch_failed, Qerror_message,
3203         build_string ("Search failed"));
3204
3205   Fput (Qinvalid_regexp, Qerror_conditions,
3206         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3207   Fput (Qinvalid_regexp, Qerror_message,
3208         build_string ("Invalid regexp"));
3209
3210   last_thing_searched = Qnil;
3211   staticpro (&last_thing_searched);
3212
3213   saved_last_thing_searched = Qnil;
3214   staticpro (&saved_last_thing_searched);
3215
3216   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3217       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3218 Some commands use this for user-specified regexps.
3219 Spaces that occur inside character classes or repetition operators
3220 or other such regexp constructs are not replaced with this.
3221 A value of nil (which is the normal value) means treat spaces literally.  */);
3222   Vsearch_spaces_regexp = Qnil;
3223
3224   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3225       doc: /* Internal use only.
3226 If non-nil, the primitive searching and matching functions
3227 such as `looking-at', `string-match', `re-search-forward', etc.,
3228 do not set the match data.  The proper way to use this variable
3229 is to bind it with `let' around a small expression.  */);
3230   Vinhibit_changing_match_data = Qnil;
3231
3232   defsubr (&Slooking_at);
3233   defsubr (&Sposix_looking_at);
3234   defsubr (&Sstring_match);
3235   defsubr (&Sposix_string_match);
3236   defsubr (&Ssearch_forward);
3237   defsubr (&Ssearch_backward);
3238   defsubr (&Sword_search_forward);
3239   defsubr (&Sword_search_backward);
3240   defsubr (&Sre_search_forward);
3241   defsubr (&Sre_search_backward);
3242   defsubr (&Sposix_search_forward);
3243   defsubr (&Sposix_search_backward);
3244   defsubr (&Sreplace_match);
3245   defsubr (&Smatch_beginning);
3246   defsubr (&Smatch_end);
3247   defsubr (&Smatch_data);
3248   defsubr (&Sset_match_data);
3249   defsubr (&Sregexp_quote);
3250 }
3251
3252 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3253    (do not change this comment) */