src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 3, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  21 Boston, MA 02110-1301, USA.  */
  22
  23
  24 #include <config.h>
  25 #include "lisp.h"
  26 #include "syntax.h"
  27 #include "category.h"
  28 #include "buffer.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 Lisp_Object Vsearch_spaces_regexp;
  95
  96 static void set_search_regs ();
  97 static void save_search_regs ();
  98 static int simple_search ();
  99 static int boyer_moore ();
 100 static int search_buffer ();
 101 static void matcher_overflow () NO_RETURN;
 102
 103 static void
 104 matcher_overflow ()
 105 {
 106   error ("Stack overflow in regexp matcher");
 107 }
 108
 109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 110    PATTERN is the pattern to compile.
 111    CP is the place to put the result.
 112    TRANSLATE is a translation table for ignoring case, or nil for none.
 113    REGP is the structure that says where to store the "register"
 114    values that will result from matching this pattern.
 115    If it is 0, we should compile the pattern not to record any
 116    subexpression bounds.
 117    POSIX is nonzero if we want full backtracking (POSIX style)
 118    for this pattern.  0 means backtrack only enough to get a valid match.
 119    MULTIBYTE is nonzero if we want to handle multibyte characters in
 120    PATTERN.  0 means all multibyte characters are recognized just as
 121    sequences of binary data.
 122
 123    The behavior also depends on Vsearch_spaces_regexp.  */
 124
 125 static void
 126 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 127      struct regexp_cache *cp;
 128      Lisp_Object pattern;
 129      Lisp_Object translate;
 130      struct re_registers *regp;
 131      int posix;
 132      int multibyte;
 133 {
 134   unsigned char *raw_pattern;
 135   int raw_pattern_size;
 136   char *val;
 137   reg_syntax_t old;
 138
 139   /* MULTIBYTE says whether the text to be searched is multibyte.
 140      We must convert PATTERN to match that, or we will not really
 141      find things right.  */
 142
 143   if (multibyte == STRING_MULTIBYTE (pattern))
 144     {
 145       raw_pattern = (unsigned char *) SDATA (pattern);
 146       raw_pattern_size = SBYTES (pattern);
 147     }
 148   else if (multibyte)
 149     {
 150       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 151                                                   SCHARS (pattern));
 152       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 153       copy_text (SDATA (pattern), raw_pattern,
 154                  SCHARS (pattern), 0, 1);
 155     }
 156   else
 157     {
 158       /* Converting multibyte to single-byte.
 159
 160          ??? Perhaps this conversion should be done in a special way
 161          by subtracting nonascii-insert-offset from each non-ASCII char,
 162          so that only the multibyte chars which really correspond to
 163          the chosen single-byte character set can possibly match.  */
 164       raw_pattern_size = SCHARS (pattern);
 165       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 166       copy_text (SDATA (pattern), raw_pattern,
 167                  SBYTES (pattern), 1, 0);
 168     }
 169
 170   cp->regexp = Qnil;
 171   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 172   cp->posix = posix;
 173   cp->buf.multibyte = multibyte;
 174   cp->whitespace_regexp = Vsearch_spaces_regexp;
 175   /* rms: I think BLOCK_INPUT is not needed here any more,
 176      because regex.c defines malloc to call xmalloc.
 177      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 178      So let's turn it off.  */
 179   /*  BLOCK_INPUT;  */
 180   old = re_set_syntax (RE_SYNTAX_EMACS
 181                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 182
 183   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 184                             : SDATA (Vsearch_spaces_regexp));
 185
 186   val = (char *) re_compile_pattern ((char *)raw_pattern,
 187                                      raw_pattern_size, &cp->buf);
 188
 189   /* If the compiled pattern hard codes some of the contents of the
 190      syntax-table, it can only be reused with *this* syntax table.  */
 191   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 192
 193   re_set_whitespace_regexp (NULL);
 194
 195   re_set_syntax (old);
 196   /* UNBLOCK_INPUT;  */
 197   if (val)
 198     xsignal1 (Qinvalid_regexp, build_string (val));
 199
 200   cp->regexp = Fcopy_sequence (pattern);
 201 }
 202
 203 /* Shrink each compiled regexp buffer in the cache
 204    to the size actually used right now.
 205    This is called from garbage collection.  */
 206
 207 void
 208 shrink_regexp_cache ()
 209 {
 210   struct regexp_cache *cp;
 211
 212   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 213     {
 214       cp->buf.allocated = cp->buf.used;
 215       cp->buf.buffer
 216         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 217     }
 218 }
 219
 220 /* Clear the regexp cache w.r.t. a particular syntax table,
 221    because it was changed.
 222    There is no danger of memory leak here because re_compile_pattern
 223    automagically manages the memory in each re_pattern_buffer struct,
 224    based on its `allocated' and `buffer' values.  */
 225 void
 226 clear_regexp_cache ()
 227 {
 228   int i;
 229
 230   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 231     /* It's tempting to compare with the syntax-table we've actually changd,
 232        but it's not sufficient because char-table inheritance mewans that
 233        modifying one syntax-table can change others at the same time.  */
 234     if (!EQ (searchbufs[i].syntax_table, Qt))
 235       searchbufs[i].regexp = Qnil;
 236 }
 237
 238 /* Compile a regexp if necessary, but first check to see if there's one in
 239    the cache.
 240    PATTERN is the pattern to compile.
 241    TRANSLATE is a translation table for ignoring case, or nil for none.
 242    REGP is the structure that says where to store the "register"
 243    values that will result from matching this pattern.
 244    If it is 0, we should compile the pattern not to record any
 245    subexpression bounds.
 246    POSIX is nonzero if we want full backtracking (POSIX style)
 247    for this pattern.  0 means backtrack only enough to get a valid match.  */
 248
 249 struct re_pattern_buffer *
 250 compile_pattern (pattern, regp, translate, posix, multibyte)
 251      Lisp_Object pattern;
 252      struct re_registers *regp;
 253      Lisp_Object translate;
 254      int posix, multibyte;
 255 {
 256   struct regexp_cache *cp, **cpp;
 257
 258   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 259     {
 260       cp = *cpp;
 261       /* Entries are initialized to nil, and may be set to nil by
 262          compile_pattern_1 if the pattern isn't valid.  Don't apply
 263          string accessors in those cases.  However, compile_pattern_1
 264          is only applied to the cache entry we pick here to reuse.  So
 265          nil should never appear before a non-nil entry.  */
 266       if (NILP (cp->regexp))
 267         goto compile_it;
 268       if (SCHARS (cp->regexp) == SCHARS (pattern)
 269           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 270           && !NILP (Fstring_equal (cp->regexp, pattern))
 271           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 272           && cp->posix == posix
 273           && cp->buf.multibyte == multibyte
 274           && (EQ (cp->syntax_table, Qt)
 275               || EQ (cp->syntax_table, current_buffer->syntax_table))
 276           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 277         break;
 278
 279       /* If we're at the end of the cache, compile into the nil cell
 280          we found, or the last (least recently used) cell with a
 281          string value.  */
 282       if (cp->next == 0)
 283         {
 284         compile_it:
 285           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 286           break;
 287         }
 288     }
 289
 290   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 291      either because we found it in the cache or because we just compiled it.
 292      Move it to the front of the queue to mark it as most recently used.  */
 293   *cpp = cp->next;
 294   cp->next = searchbuf_head;
 295   searchbuf_head = cp;
 296
 297   /* Advise the searching functions about the space we have allocated
 298      for register data.  */
 299   if (regp)
 300     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 301
 302   return &cp->buf;
 303 }
 304
 305 \f
 306 static Lisp_Object
 307 looking_at_1 (string, posix)
 308      Lisp_Object string;
 309      int posix;
 310 {
 311   Lisp_Object val;
 312   unsigned char *p1, *p2;
 313   int s1, s2;
 314   register int i;
 315   struct re_pattern_buffer *bufp;
 316
 317   if (running_asynch_code)
 318     save_search_regs ();
 319
 320   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 321   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 322     = current_buffer->case_eqv_table;
 323
 324   CHECK_STRING (string);
 325   bufp = compile_pattern (string, &search_regs,
 326                           (!NILP (current_buffer->case_fold_search)
 327                            ? current_buffer->case_canon_table : Qnil),
 328                           posix,
 329                           !NILP (current_buffer->enable_multibyte_characters));
 330
 331   immediate_quit = 1;
 332   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 333
 334   /* Get pointers and sizes of the two strings
 335      that make up the visible portion of the buffer. */
 336
 337   p1 = BEGV_ADDR;
 338   s1 = GPT_BYTE - BEGV_BYTE;
 339   p2 = GAP_END_ADDR;
 340   s2 = ZV_BYTE - GPT_BYTE;
 341   if (s1 < 0)
 342     {
 343       p2 = p1;
 344       s2 = ZV_BYTE - BEGV_BYTE;
 345       s1 = 0;
 346     }
 347   if (s2 < 0)
 348     {
 349       s1 = ZV_BYTE - BEGV_BYTE;
 350       s2 = 0;
 351     }
 352
 353   re_match_object = Qnil;
 354
 355   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 356                   PT_BYTE - BEGV_BYTE, &search_regs,
 357                   ZV_BYTE - BEGV_BYTE);
 358   immediate_quit = 0;
 359
 360   if (i == -2)
 361     matcher_overflow ();
 362
 363   val = (0 <= i ? Qt : Qnil);
 364   if (i >= 0)
 365     for (i = 0; i < search_regs.num_regs; i++)
 366       if (search_regs.start[i] >= 0)
 367         {
 368           search_regs.start[i]
 369             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 370           search_regs.end[i]
 371             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 372         }
 373   XSETBUFFER (last_thing_searched, current_buffer);
 374   return val;
 375 }
 376
 377 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 378        doc: /* Return t if text after point matches regular expression REGEXP.
 379 This function modifies the match data that `match-beginning',
 380 `match-end' and `match-data' access; save and restore the match
 381 data if you want to preserve them.  */)
 382      (regexp)
 383      Lisp_Object regexp;
 384 {
 385   return looking_at_1 (regexp, 0);
 386 }
 387
 388 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 389        doc: /* Return t if text after point matches regular expression REGEXP.
 390 Find the longest match, in accord with Posix regular expression rules.
 391 This function modifies the match data that `match-beginning',
 392 `match-end' and `match-data' access; save and restore the match
 393 data if you want to preserve them.  */)
 394      (regexp)
 395      Lisp_Object regexp;
 396 {
 397   return looking_at_1 (regexp, 1);
 398 }
 399 \f
 400 static Lisp_Object
 401 string_match_1 (regexp, string, start, posix)
 402      Lisp_Object regexp, string, start;
 403      int posix;
 404 {
 405   int val;
 406   struct re_pattern_buffer *bufp;
 407   int pos, pos_byte;
 408   int i;
 409
 410   if (running_asynch_code)
 411     save_search_regs ();
 412
 413   CHECK_STRING (regexp);
 414   CHECK_STRING (string);
 415
 416   if (NILP (start))
 417     pos = 0, pos_byte = 0;
 418   else
 419     {
 420       int len = SCHARS (string);
 421
 422       CHECK_NUMBER (start);
 423       pos = XINT (start);
 424       if (pos < 0 && -pos <= len)
 425         pos = len + pos;
 426       else if (0 > pos || pos > len)
 427         args_out_of_range (string, start);
 428       pos_byte = string_char_to_byte (string, pos);
 429     }
 430
 431   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 432   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 433     = current_buffer->case_eqv_table;
 434
 435   bufp = compile_pattern (regexp, &search_regs,
 436                           (!NILP (current_buffer->case_fold_search)
 437                            ? current_buffer->case_canon_table : Qnil),
 438                           posix,
 439                           STRING_MULTIBYTE (string));
 440   immediate_quit = 1;
 441   re_match_object = string;
 442
 443   val = re_search (bufp, (char *) SDATA (string),
 444                    SBYTES (string), pos_byte,
 445                    SBYTES (string) - pos_byte,
 446                    &search_regs);
 447   immediate_quit = 0;
 448   last_thing_searched = Qt;
 449   if (val == -2)
 450     matcher_overflow ();
 451   if (val < 0) return Qnil;
 452
 453   for (i = 0; i < search_regs.num_regs; i++)
 454     if (search_regs.start[i] >= 0)
 455       {
 456         search_regs.start[i]
 457           = string_byte_to_char (string, search_regs.start[i]);
 458         search_regs.end[i]
 459           = string_byte_to_char (string, search_regs.end[i]);
 460       }
 461
 462   return make_number (string_byte_to_char (string, val));
 463 }
 464
 465 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 466        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 467 Matching ignores case if `case-fold-search' is non-nil.
 468 If third arg START is non-nil, start search at that index in STRING.
 469 For index of first char beyond the match, do (match-end 0).
 470 `match-end' and `match-beginning' also give indices of substrings
 471 matched by parenthesis constructs in the pattern.
 472
 473 You can use the function `match-string' to extract the substrings
 474 matched by the parenthesis constructions in REGEXP. */)
 475      (regexp, string, start)
 476      Lisp_Object regexp, string, start;
 477 {
 478   return string_match_1 (regexp, string, start, 0);
 479 }
 480
 481 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 482        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 483 Find the longest match, in accord with Posix regular expression rules.
 484 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 485 If third arg START is non-nil, start search at that index in STRING.
 486 For index of first char beyond the match, do (match-end 0).
 487 `match-end' and `match-beginning' also give indices of substrings
 488 matched by parenthesis constructs in the pattern.  */)
 489      (regexp, string, start)
 490      Lisp_Object regexp, string, start;
 491 {
 492   return string_match_1 (regexp, string, start, 1);
 493 }
 494
 495 /* Match REGEXP against STRING, searching all of STRING,
 496    and return the index of the match, or negative on failure.
 497    This does not clobber the match data.  */
 498
 499 int
 500 fast_string_match (regexp, string)
 501      Lisp_Object regexp, string;
 502 {
 503   int val;
 504   struct re_pattern_buffer *bufp;
 505
 506   bufp = compile_pattern (regexp, 0, Qnil,
 507                           0, STRING_MULTIBYTE (string));
 508   immediate_quit = 1;
 509   re_match_object = string;
 510
 511   val = re_search (bufp, (char *) SDATA (string),
 512                    SBYTES (string), 0,
 513                    SBYTES (string), 0);
 514   immediate_quit = 0;
 515   return val;
 516 }
 517
 518 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 519    and return the index of the match, or negative on failure.
 520    This does not clobber the match data.
 521    We assume that STRING contains single-byte characters.  */
 522
 523 extern Lisp_Object Vascii_downcase_table;
 524
 525 int
 526 fast_c_string_match_ignore_case (regexp, string)
 527      Lisp_Object regexp;
 528      const char *string;
 529 {
 530   int val;
 531   struct re_pattern_buffer *bufp;
 532   int len = strlen (string);
 533
 534   regexp = string_make_unibyte (regexp);
 535   re_match_object = Qt;
 536   bufp = compile_pattern (regexp, 0,
 537                           Vascii_canon_table, 0,
 538                           0);
 539   immediate_quit = 1;
 540   val = re_search (bufp, string, len, 0, len, 0);
 541   immediate_quit = 0;
 542   return val;
 543 }
 544
 545 /* Like fast_string_match but ignore case.  */
 546
 547 int
 548 fast_string_match_ignore_case (regexp, string)
 549      Lisp_Object regexp, string;
 550 {
 551   int val;
 552   struct re_pattern_buffer *bufp;
 553
 554   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 555                           0, STRING_MULTIBYTE (string));
 556   immediate_quit = 1;
 557   re_match_object = string;
 558
 559   val = re_search (bufp, (char *) SDATA (string),
 560                    SBYTES (string), 0,
 561                    SBYTES (string), 0);
 562   immediate_quit = 0;
 563   return val;
 564 }
 565 \f
 566 /* The newline cache: remembering which sections of text have no newlines.  */
 567
 568 /* If the user has requested newline caching, make sure it's on.
 569    Otherwise, make sure it's off.
 570    This is our cheezy way of associating an action with the change of
 571    state of a buffer-local variable.  */
 572 static void
 573 newline_cache_on_off (buf)
 574      struct buffer *buf;
 575 {
 576   if (NILP (buf->cache_long_line_scans))
 577     {
 578       /* It should be off.  */
 579       if (buf->newline_cache)
 580         {
 581           free_region_cache (buf->newline_cache);
 582           buf->newline_cache = 0;
 583         }
 584     }
 585   else
 586     {
 587       /* It should be on.  */
 588       if (buf->newline_cache == 0)
 589         buf->newline_cache = new_region_cache ();
 590     }
 591 }
 592
 593 \f
 594 /* Search for COUNT instances of the character TARGET between START and END.
 595
 596    If COUNT is positive, search forwards; END must be >= START.
 597    If COUNT is negative, search backwards for the -COUNTth instance;
 598       END must be <= START.
 599    If COUNT is zero, do anything you please; run rogue, for all I care.
 600
 601    If END is zero, use BEGV or ZV instead, as appropriate for the
 602    direction indicated by COUNT.
 603
 604    If we find COUNT instances, set *SHORTAGE to zero, and return the
 605    position past the COUNTth match.  Note that for reverse motion
 606    this is not the same as the usual convention for Emacs motion commands.
 607
 608    If we don't find COUNT instances before reaching END, set *SHORTAGE
 609    to the number of TARGETs left unfound, and return END.
 610
 611    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 612    except when inside redisplay.  */
 613
 614 int
 615 scan_buffer (target, start, end, count, shortage, allow_quit)
 616      register int target;
 617      int start, end;
 618      int count;
 619      int *shortage;
 620      int allow_quit;
 621 {
 622   struct region_cache *newline_cache;
 623   int direction;
 624
 625   if (count > 0)
 626     {
 627       direction = 1;
 628       if (! end) end = ZV;
 629     }
 630   else
 631     {
 632       direction = -1;
 633       if (! end) end = BEGV;
 634     }
 635
 636   newline_cache_on_off (current_buffer);
 637   newline_cache = current_buffer->newline_cache;
 638
 639   if (shortage != 0)
 640     *shortage = 0;
 641
 642   immediate_quit = allow_quit;
 643
 644   if (count > 0)
 645     while (start != end)
 646       {
 647         /* Our innermost scanning loop is very simple; it doesn't know
 648            about gaps, buffer ends, or the newline cache.  ceiling is
 649            the position of the last character before the next such
 650            obstacle --- the last character the dumb search loop should
 651            examine.  */
 652         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 653         int start_byte = CHAR_TO_BYTE (start);
 654         int tem;
 655
 656         /* If we're looking for a newline, consult the newline cache
 657            to see where we can avoid some scanning.  */
 658         if (target == '\n' && newline_cache)
 659           {
 660             int next_change;
 661             immediate_quit = 0;
 662             while (region_cache_forward
 663                    (current_buffer, newline_cache, start_byte, &next_change))
 664               start_byte = next_change;
 665             immediate_quit = allow_quit;
 666
 667             /* START should never be after END.  */
 668             if (start_byte > ceiling_byte)
 669               start_byte = ceiling_byte;
 670
 671             /* Now the text after start is an unknown region, and
 672                next_change is the position of the next known region. */
 673             ceiling_byte = min (next_change - 1, ceiling_byte);
 674           }
 675
 676         /* The dumb loop can only scan text stored in contiguous
 677            bytes. BUFFER_CEILING_OF returns the last character
 678            position that is contiguous, so the ceiling is the
 679            position after that.  */
 680         tem = BUFFER_CEILING_OF (start_byte);
 681         ceiling_byte = min (tem, ceiling_byte);
 682
 683         {
 684           /* The termination address of the dumb loop.  */
 685           register unsigned char *ceiling_addr
 686             = BYTE_POS_ADDR (ceiling_byte) + 1;
 687           register unsigned char *cursor
 688             = BYTE_POS_ADDR (start_byte);
 689           unsigned char *base = cursor;
 690
 691           while (cursor < ceiling_addr)
 692             {
 693               unsigned char *scan_start = cursor;
 694
 695               /* The dumb loop.  */
 696               while (*cursor != target && ++cursor < ceiling_addr)
 697                 ;
 698
 699               /* If we're looking for newlines, cache the fact that
 700                  the region from start to cursor is free of them. */
 701               if (target == '\n' && newline_cache)
 702                 know_region_cache (current_buffer, newline_cache,
 703                                    start_byte + scan_start - base,
 704                                    start_byte + cursor - base);
 705
 706               /* Did we find the target character?  */
 707               if (cursor < ceiling_addr)
 708                 {
 709                   if (--count == 0)
 710                     {
 711                       immediate_quit = 0;
 712                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 713                     }
 714                   cursor++;
 715                 }
 716             }
 717
 718           start = BYTE_TO_CHAR (start_byte + cursor - base);
 719         }
 720       }
 721   else
 722     while (start > end)
 723       {
 724         /* The last character to check before the next obstacle.  */
 725         int ceiling_byte = CHAR_TO_BYTE (end);
 726         int start_byte = CHAR_TO_BYTE (start);
 727         int tem;
 728
 729         /* Consult the newline cache, if appropriate.  */
 730         if (target == '\n' && newline_cache)
 731           {
 732             int next_change;
 733             immediate_quit = 0;
 734             while (region_cache_backward
 735                    (current_buffer, newline_cache, start_byte, &next_change))
 736               start_byte = next_change;
 737             immediate_quit = allow_quit;
 738
 739             /* Start should never be at or before end.  */
 740             if (start_byte <= ceiling_byte)
 741               start_byte = ceiling_byte + 1;
 742
 743             /* Now the text before start is an unknown region, and
 744                next_change is the position of the next known region. */
 745             ceiling_byte = max (next_change, ceiling_byte);
 746           }
 747
 748         /* Stop scanning before the gap.  */
 749         tem = BUFFER_FLOOR_OF (start_byte - 1);
 750         ceiling_byte = max (tem, ceiling_byte);
 751
 752         {
 753           /* The termination address of the dumb loop.  */
 754           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 755           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 756           unsigned char *base = cursor;
 757
 758           while (cursor >= ceiling_addr)
 759             {
 760               unsigned char *scan_start = cursor;
 761
 762               while (*cursor != target && --cursor >= ceiling_addr)
 763                 ;
 764
 765               /* If we're looking for newlines, cache the fact that
 766                  the region from after the cursor to start is free of them.  */
 767               if (target == '\n' && newline_cache)
 768                 know_region_cache (current_buffer, newline_cache,
 769                                    start_byte + cursor - base,
 770                                    start_byte + scan_start - base);
 771
 772               /* Did we find the target character?  */
 773               if (cursor >= ceiling_addr)
 774                 {
 775                   if (++count >= 0)
 776                     {
 777                       immediate_quit = 0;
 778                       return BYTE_TO_CHAR (start_byte + cursor - base);
 779                     }
 780                   cursor--;
 781                 }
 782             }
 783
 784           start = BYTE_TO_CHAR (start_byte + cursor - base);
 785         }
 786       }
 787
 788   immediate_quit = 0;
 789   if (shortage != 0)
 790     *shortage = count * direction;
 791   return start;
 792 }
 793 \f
 794 /* Search for COUNT instances of a line boundary, which means either a
 795    newline or (if selective display enabled) a carriage return.
 796    Start at START.  If COUNT is negative, search backwards.
 797
 798    We report the resulting position by calling TEMP_SET_PT_BOTH.
 799
 800    If we find COUNT instances. we position after (always after,
 801    even if scanning backwards) the COUNTth match, and return 0.
 802
 803    If we don't find COUNT instances before reaching the end of the
 804    buffer (or the beginning, if scanning backwards), we return
 805    the number of line boundaries left unfound, and position at
 806    the limit we bumped up against.
 807
 808    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 809    except in special cases.  */
 810
 811 int
 812 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 813      int start, start_byte;
 814      int limit, limit_byte;
 815      register int count;
 816      int allow_quit;
 817 {
 818   int direction = ((count > 0) ? 1 : -1);
 819
 820   register unsigned char *cursor;
 821   unsigned char *base;
 822
 823   register int ceiling;
 824   register unsigned char *ceiling_addr;
 825
 826   int old_immediate_quit = immediate_quit;
 827
 828   /* The code that follows is like scan_buffer
 829      but checks for either newline or carriage return.  */
 830
 831   if (allow_quit)
 832     immediate_quit++;
 833
 834   start_byte = CHAR_TO_BYTE (start);
 835
 836   if (count > 0)
 837     {
 838       while (start_byte < limit_byte)
 839         {
 840           ceiling =  BUFFER_CEILING_OF (start_byte);
 841           ceiling = min (limit_byte - 1, ceiling);
 842           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 843           base = (cursor = BYTE_POS_ADDR (start_byte));
 844           while (1)
 845             {
 846               while (*cursor != '\n' && ++cursor != ceiling_addr)
 847                 ;
 848
 849               if (cursor != ceiling_addr)
 850                 {
 851                   if (--count == 0)
 852                     {
 853                       immediate_quit = old_immediate_quit;
 854                       start_byte = start_byte + cursor - base + 1;
 855                       start = BYTE_TO_CHAR (start_byte);
 856                       TEMP_SET_PT_BOTH (start, start_byte);
 857                       return 0;
 858                     }
 859                   else
 860                     if (++cursor == ceiling_addr)
 861                       break;
 862                 }
 863               else
 864                 break;
 865             }
 866           start_byte += cursor - base;
 867         }
 868     }
 869   else
 870     {
 871       while (start_byte > limit_byte)
 872         {
 873           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 874           ceiling = max (limit_byte, ceiling);
 875           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 876           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 877           while (1)
 878             {
 879               while (--cursor != ceiling_addr && *cursor != '\n')
 880                 ;
 881
 882               if (cursor != ceiling_addr)
 883                 {
 884                   if (++count == 0)
 885                     {
 886                       immediate_quit = old_immediate_quit;
 887                       /* Return the position AFTER the match we found.  */
 888                       start_byte = start_byte + cursor - base + 1;
 889                       start = BYTE_TO_CHAR (start_byte);
 890                       TEMP_SET_PT_BOTH (start, start_byte);
 891                       return 0;
 892                     }
 893                 }
 894               else
 895                 break;
 896             }
 897           /* Here we add 1 to compensate for the last decrement
 898              of CURSOR, which took it past the valid range.  */
 899           start_byte += cursor - base + 1;
 900         }
 901     }
 902
 903   TEMP_SET_PT_BOTH (limit, limit_byte);
 904   immediate_quit = old_immediate_quit;
 905
 906   return count * direction;
 907 }
 908
 909 int
 910 find_next_newline_no_quit (from, cnt)
 911      register int from, cnt;
 912 {
 913   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 914 }
 915
 916 /* Like find_next_newline, but returns position before the newline,
 917    not after, and only search up to TO.  This isn't just
 918    find_next_newline (...)-1, because you might hit TO.  */
 919
 920 int
 921 find_before_next_newline (from, to, cnt)
 922      int from, to, cnt;
 923 {
 924   int shortage;
 925   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 926
 927   if (shortage == 0)
 928     pos--;
 929
 930   return pos;
 931 }
 932 \f
 933 /* Subroutines of Lisp buffer search functions. */
 934
 935 static Lisp_Object
 936 search_command (string, bound, noerror, count, direction, RE, posix)
 937      Lisp_Object string, bound, noerror, count;
 938      int direction;
 939      int RE;
 940      int posix;
 941 {
 942   register int np;
 943   int lim, lim_byte;
 944   int n = direction;
 945
 946   if (!NILP (count))
 947     {
 948       CHECK_NUMBER (count);
 949       n *= XINT (count);
 950     }
 951
 952   CHECK_STRING (string);
 953   if (NILP (bound))
 954     {
 955       if (n > 0)
 956         lim = ZV, lim_byte = ZV_BYTE;
 957       else
 958         lim = BEGV, lim_byte = BEGV_BYTE;
 959     }
 960   else
 961     {
 962       CHECK_NUMBER_COERCE_MARKER (bound);
 963       lim = XINT (bound);
 964       if (n > 0 ? lim < PT : lim > PT)
 965         error ("Invalid search bound (wrong side of point)");
 966       if (lim > ZV)
 967         lim = ZV, lim_byte = ZV_BYTE;
 968       else if (lim < BEGV)
 969         lim = BEGV, lim_byte = BEGV_BYTE;
 970       else
 971         lim_byte = CHAR_TO_BYTE (lim);
 972     }
 973
 974   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 975   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 976     = current_buffer->case_eqv_table;
 977
 978   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 979                       (!NILP (current_buffer->case_fold_search)
 980                        ? current_buffer->case_canon_table
 981                        : Qnil),
 982                       (!NILP (current_buffer->case_fold_search)
 983                        ? current_buffer->case_eqv_table
 984                        : Qnil),
 985                       posix);
 986   if (np <= 0)
 987     {
 988       if (NILP (noerror))
 989         xsignal1 (Qsearch_failed, string);
 990
 991       if (!EQ (noerror, Qt))
 992         {
 993           if (lim < BEGV || lim > ZV)
 994             abort ();
 995           SET_PT_BOTH (lim, lim_byte);
 996           return Qnil;
 997 #if 0 /* This would be clean, but maybe programs depend on
 998          a value of nil here.  */
 999           np = lim;
1000 #endif
1001         }
1002       else
1003         return Qnil;
1004     }
1005
1006   if (np < BEGV || np > ZV)
1007     abort ();
1008
1009   SET_PT (np);
1010
1011   return make_number (np);
1012 }
1013 \f
1014 /* Return 1 if REGEXP it matches just one constant string.  */
1015
1016 static int
1017 trivial_regexp_p (regexp)
1018      Lisp_Object regexp;
1019 {
1020   int len = SBYTES (regexp);
1021   unsigned char *s = SDATA (regexp);
1022   while (--len >= 0)
1023     {
1024       switch (*s++)
1025         {
1026         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1027           return 0;
1028         case '\\':
1029           if (--len < 0)
1030             return 0;
1031           switch (*s++)
1032             {
1033             case '|': case '(': case ')': case '`': case '\'': case 'b':
1034             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1035             case 'S': case '=': case '{': case '}': case '_':
1036             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1037             case '1': case '2': case '3': case '4': case '5':
1038             case '6': case '7': case '8': case '9':
1039               return 0;
1040             }
1041         }
1042     }
1043   return 1;
1044 }
1045
1046 /* Search for the n'th occurrence of STRING in the current buffer,
1047    starting at position POS and stopping at position LIM,
1048    treating STRING as a literal string if RE is false or as
1049    a regular expression if RE is true.
1050
1051    If N is positive, searching is forward and LIM must be greater than POS.
1052    If N is negative, searching is backward and LIM must be less than POS.
1053
1054    Returns -x if x occurrences remain to be found (x > 0),
1055    or else the position at the beginning of the Nth occurrence
1056    (if searching backward) or the end (if searching forward).
1057
1058    POSIX is nonzero if we want full backtracking (POSIX style)
1059    for this pattern.  0 means backtrack only enough to get a valid match.  */
1060
1061 #define TRANSLATE(out, trt, d)                  \
1062 do                                              \
1063   {                                             \
1064     if (! NILP (trt))                           \
1065       {                                         \
1066         Lisp_Object temp;                       \
1067         temp = Faref (trt, make_number (d));    \
1068         if (INTEGERP (temp))                    \
1069           out = XINT (temp);                    \
1070         else                                    \
1071           out = d;                              \
1072       }                                         \
1073     else                                        \
1074       out = d;                                  \
1075   }                                             \
1076 while (0)
1077
1078 static int
1079 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1080                RE, trt, inverse_trt, posix)
1081      Lisp_Object string;
1082      int pos;
1083      int pos_byte;
1084      int lim;
1085      int lim_byte;
1086      int n;
1087      int RE;
1088      Lisp_Object trt;
1089      Lisp_Object inverse_trt;
1090      int posix;
1091 {
1092   int len = SCHARS (string);
1093   int len_byte = SBYTES (string);
1094   register int i;
1095
1096   if (running_asynch_code)
1097     save_search_regs ();
1098
1099   /* Searching 0 times means don't move.  */
1100   /* Null string is found at starting position.  */
1101   if (len == 0 || n == 0)
1102     {
1103       set_search_regs (pos_byte, 0);
1104       return pos;
1105     }
1106
1107   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1108     {
1109       unsigned char *p1, *p2;
1110       int s1, s2;
1111       struct re_pattern_buffer *bufp;
1112
1113       bufp = compile_pattern (string, &search_regs, trt, posix,
1114                               !NILP (current_buffer->enable_multibyte_characters));
1115
1116       immediate_quit = 1;       /* Quit immediately if user types ^G,
1117                                    because letting this function finish
1118                                    can take too long. */
1119       QUIT;                     /* Do a pending quit right away,
1120                                    to avoid paradoxical behavior */
1121       /* Get pointers and sizes of the two strings
1122          that make up the visible portion of the buffer. */
1123
1124       p1 = BEGV_ADDR;
1125       s1 = GPT_BYTE - BEGV_BYTE;
1126       p2 = GAP_END_ADDR;
1127       s2 = ZV_BYTE - GPT_BYTE;
1128       if (s1 < 0)
1129         {
1130           p2 = p1;
1131           s2 = ZV_BYTE - BEGV_BYTE;
1132           s1 = 0;
1133         }
1134       if (s2 < 0)
1135         {
1136           s1 = ZV_BYTE - BEGV_BYTE;
1137           s2 = 0;
1138         }
1139       re_match_object = Qnil;
1140
1141       while (n < 0)
1142         {
1143           int val;
1144           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1145                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1146                              &search_regs,
1147                              /* Don't allow match past current point */
1148                              pos_byte - BEGV_BYTE);
1149           if (val == -2)
1150             {
1151               matcher_overflow ();
1152             }
1153           if (val >= 0)
1154             {
1155               pos_byte = search_regs.start[0] + BEGV_BYTE;
1156               for (i = 0; i < search_regs.num_regs; i++)
1157                 if (search_regs.start[i] >= 0)
1158                   {
1159                     search_regs.start[i]
1160                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1161                     search_regs.end[i]
1162                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1163                   }
1164               XSETBUFFER (last_thing_searched, current_buffer);
1165               /* Set pos to the new position. */
1166               pos = search_regs.start[0];
1167             }
1168           else
1169             {
1170               immediate_quit = 0;
1171               return (n);
1172             }
1173           n++;
1174         }
1175       while (n > 0)
1176         {
1177           int val;
1178           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1179                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1180                              &search_regs,
1181                              lim_byte - BEGV_BYTE);
1182           if (val == -2)
1183             {
1184               matcher_overflow ();
1185             }
1186           if (val >= 0)
1187             {
1188               pos_byte = search_regs.end[0] + BEGV_BYTE;
1189               for (i = 0; i < search_regs.num_regs; i++)
1190                 if (search_regs.start[i] >= 0)
1191                   {
1192                     search_regs.start[i]
1193                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1194                     search_regs.end[i]
1195                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1196                   }
1197               XSETBUFFER (last_thing_searched, current_buffer);
1198               pos = search_regs.end[0];
1199             }
1200           else
1201             {
1202               immediate_quit = 0;
1203               return (0 - n);
1204             }
1205           n--;
1206         }
1207       immediate_quit = 0;
1208       return (pos);
1209     }
1210   else                          /* non-RE case */
1211     {
1212       unsigned char *raw_pattern, *pat;
1213       int raw_pattern_size;
1214       int raw_pattern_size_byte;
1215       unsigned char *patbuf;
1216       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1217       unsigned char *base_pat;
1218       /* Set to positive if we find a non-ASCII char that need
1219          translation.  Otherwise set to zero later.  */
1220       int charset_base = -1;
1221       int boyer_moore_ok = 1;
1222
1223       /* MULTIBYTE says whether the text to be searched is multibyte.
1224          We must convert PATTERN to match that, or we will not really
1225          find things right.  */
1226
1227       if (multibyte == STRING_MULTIBYTE (string))
1228         {
1229           raw_pattern = (unsigned char *) SDATA (string);
1230           raw_pattern_size = SCHARS (string);
1231           raw_pattern_size_byte = SBYTES (string);
1232         }
1233       else if (multibyte)
1234         {
1235           raw_pattern_size = SCHARS (string);
1236           raw_pattern_size_byte
1237             = count_size_as_multibyte (SDATA (string),
1238                                        raw_pattern_size);
1239           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1240           copy_text (SDATA (string), raw_pattern,
1241                      SCHARS (string), 0, 1);
1242         }
1243       else
1244         {
1245           /* Converting multibyte to single-byte.
1246
1247              ??? Perhaps this conversion should be done in a special way
1248              by subtracting nonascii-insert-offset from each non-ASCII char,
1249              so that only the multibyte chars which really correspond to
1250              the chosen single-byte character set can possibly match.  */
1251           raw_pattern_size = SCHARS (string);
1252           raw_pattern_size_byte = SCHARS (string);
1253           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1254           copy_text (SDATA (string), raw_pattern,
1255                      SBYTES (string), 1, 0);
1256         }
1257
1258       /* Copy and optionally translate the pattern.  */
1259       len = raw_pattern_size;
1260       len_byte = raw_pattern_size_byte;
1261       patbuf = (unsigned char *) alloca (len_byte);
1262       pat = patbuf;
1263       base_pat = raw_pattern;
1264       if (multibyte)
1265         {
1266           /* Fill patbuf by translated characters in STRING while
1267              checking if we can use boyer-moore search.  If TRT is
1268              non-nil, we can use boyer-moore search only if TRT can be
1269              represented by the byte array of 256 elements.  For that,
1270              all non-ASCII case-equivalents of all case-senstive
1271              characters in STRING must belong to the same charset and
1272              row.  */
1273
1274           while (--len >= 0)
1275             {
1276               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1277               int c, translated, inverse;
1278               int in_charlen, charlen;
1279
1280               /* If we got here and the RE flag is set, it's because we're
1281                  dealing with a regexp known to be trivial, so the backslash
1282                  just quotes the next character.  */
1283               if (RE && *base_pat == '\\')
1284                 {
1285                   len--;
1286                   raw_pattern_size--;
1287                   len_byte--;
1288                   base_pat++;
1289                 }
1290
1291               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1292
1293               if (NILP (trt))
1294                 {
1295                   str = base_pat;
1296                   charlen = in_charlen;
1297                 }
1298               else
1299                 {
1300                   /* Translate the character.  */
1301                   TRANSLATE (translated, trt, c);
1302                   charlen = CHAR_STRING (translated, str_base);
1303                   str = str_base;
1304
1305                   /* Check if C has any other case-equivalents.  */
1306                   TRANSLATE (inverse, inverse_trt, c);
1307                   /* If so, check if we can use boyer-moore.  */
1308                   if (c != inverse && boyer_moore_ok)
1309                     {
1310                       /* Check if all equivalents belong to the same
1311                          charset & row.  Note that the check of C
1312                          itself is done by the last iteration.  Note
1313                          also that we don't have to check ASCII
1314                          characters because boyer-moore search can
1315                          always handle their translation.  */
1316                       while (1)
1317                         {
1318                           if (ASCII_BYTE_P (inverse))
1319                             {
1320                               if (charset_base > 0)
1321                                 {
1322                                   boyer_moore_ok = 0;
1323                                   break;
1324                                 }
1325                               charset_base = 0;
1326                             }
1327                           else if (SINGLE_BYTE_CHAR_P (inverse))
1328                             {
1329                               /* Boyer-moore search can't handle a
1330                                  translation of an eight-bit
1331                                  character.  */
1332                               boyer_moore_ok = 0;
1333                               break;
1334                             }
1335                           else if (charset_base < 0)
1336                             charset_base = inverse & ~CHAR_FIELD3_MASK;
1337                           else if ((inverse & ~CHAR_FIELD3_MASK)
1338                                    != charset_base)
1339                             {
1340                               boyer_moore_ok = 0;
1341                               break;
1342                             }
1343                           if (c == inverse)
1344                             break;
1345                           TRANSLATE (inverse, inverse_trt, inverse);
1346                         }
1347                     }
1348                 }
1349               if (charset_base < 0)
1350                 charset_base = 0;
1351
1352               /* Store this character into the translated pattern.  */
1353               bcopy (str, pat, charlen);
1354               pat += charlen;
1355               base_pat += in_charlen;
1356               len_byte -= in_charlen;
1357             }
1358         }
1359       else
1360         {
1361           /* Unibyte buffer.  */
1362           charset_base = 0;
1363           while (--len >= 0)
1364             {
1365               int c, translated;
1366
1367               /* If we got here and the RE flag is set, it's because we're
1368                  dealing with a regexp known to be trivial, so the backslash
1369                  just quotes the next character.  */
1370               if (RE && *base_pat == '\\')
1371                 {
1372                   len--;
1373                   raw_pattern_size--;
1374                   base_pat++;
1375                 }
1376               c = *base_pat++;
1377               TRANSLATE (translated, trt, c);
1378               *pat++ = translated;
1379             }
1380         }
1381
1382       len_byte = pat - patbuf;
1383       len = raw_pattern_size;
1384       pat = base_pat = patbuf;
1385
1386       if (boyer_moore_ok)
1387         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1388                             pos, pos_byte, lim, lim_byte,
1389                             charset_base);
1390       else
1391         return simple_search (n, pat, len, len_byte, trt,
1392                               pos, pos_byte, lim, lim_byte);
1393     }
1394 }
1395 \f
1396 /* Do a simple string search N times for the string PAT,
1397    whose length is LEN/LEN_BYTE,
1398    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1399    TRT is the translation table.
1400
1401    Return the character position where the match is found.
1402    Otherwise, if M matches remained to be found, return -M.
1403
1404    This kind of search works regardless of what is in PAT and
1405    regardless of what is in TRT.  It is used in cases where
1406    boyer_moore cannot work.  */
1407
1408 static int
1409 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1410      int n;
1411      unsigned char *pat;
1412      int len, len_byte;
1413      Lisp_Object trt;
1414      int pos, pos_byte;
1415      int lim, lim_byte;
1416 {
1417   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1418   int forward = n > 0;
1419
1420   if (lim > pos && multibyte)
1421     while (n > 0)
1422       {
1423         while (1)
1424           {
1425             /* Try matching at position POS.  */
1426             int this_pos = pos;
1427             int this_pos_byte = pos_byte;
1428             int this_len = len;
1429             int this_len_byte = len_byte;
1430             unsigned char *p = pat;
1431             if (pos + len > lim)
1432               goto stop;
1433
1434             while (this_len > 0)
1435               {
1436                 int charlen, buf_charlen;
1437                 int pat_ch, buf_ch;
1438
1439                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1440                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1441                                                  ZV_BYTE - this_pos_byte,
1442                                                  buf_charlen);
1443                 TRANSLATE (buf_ch, trt, buf_ch);
1444
1445                 if (buf_ch != pat_ch)
1446                   break;
1447
1448                 this_len_byte -= charlen;
1449                 this_len--;
1450                 p += charlen;
1451
1452                 this_pos_byte += buf_charlen;
1453                 this_pos++;
1454               }
1455
1456             if (this_len == 0)
1457               {
1458                 pos += len;
1459                 pos_byte += len_byte;
1460                 break;
1461               }
1462
1463             INC_BOTH (pos, pos_byte);
1464           }
1465
1466         n--;
1467       }
1468   else if (lim > pos)
1469     while (n > 0)
1470       {
1471         while (1)
1472           {
1473             /* Try matching at position POS.  */
1474             int this_pos = pos;
1475             int this_len = len;
1476             unsigned char *p = pat;
1477
1478             if (pos + len > lim)
1479               goto stop;
1480
1481             while (this_len > 0)
1482               {
1483                 int pat_ch = *p++;
1484                 int buf_ch = FETCH_BYTE (this_pos);
1485                 TRANSLATE (buf_ch, trt, buf_ch);
1486
1487                 if (buf_ch != pat_ch)
1488                   break;
1489
1490                 this_len--;
1491                 this_pos++;
1492               }
1493
1494             if (this_len == 0)
1495               {
1496                 pos += len;
1497                 break;
1498               }
1499
1500             pos++;
1501           }
1502
1503         n--;
1504       }
1505   /* Backwards search.  */
1506   else if (lim < pos && multibyte)
1507     while (n < 0)
1508       {
1509         while (1)
1510           {
1511             /* Try matching at position POS.  */
1512             int this_pos = pos - len;
1513             int this_pos_byte = pos_byte - len_byte;
1514             int this_len = len;
1515             int this_len_byte = len_byte;
1516             unsigned char *p = pat;
1517
1518             if (this_pos < lim || this_pos_byte < lim_byte)
1519               goto stop;
1520
1521             while (this_len > 0)
1522               {
1523                 int charlen, buf_charlen;
1524                 int pat_ch, buf_ch;
1525
1526                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1527                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1528                                                  ZV_BYTE - this_pos_byte,
1529                                                  buf_charlen);
1530                 TRANSLATE (buf_ch, trt, buf_ch);
1531
1532                 if (buf_ch != pat_ch)
1533                   break;
1534
1535                 this_len_byte -= charlen;
1536                 this_len--;
1537                 p += charlen;
1538                 this_pos_byte += buf_charlen;
1539                 this_pos++;
1540               }
1541
1542             if (this_len == 0)
1543               {
1544                 pos -= len;
1545                 pos_byte -= len_byte;
1546                 break;
1547               }
1548
1549             DEC_BOTH (pos, pos_byte);
1550           }
1551
1552         n++;
1553       }
1554   else if (lim < pos)
1555     while (n < 0)
1556       {
1557         while (1)
1558           {
1559             /* Try matching at position POS.  */
1560             int this_pos = pos - len;
1561             int this_len = len;
1562             unsigned char *p = pat;
1563
1564             if (pos - len < lim)
1565               goto stop;
1566
1567             while (this_len > 0)
1568               {
1569                 int pat_ch = *p++;
1570                 int buf_ch = FETCH_BYTE (this_pos);
1571                 TRANSLATE (buf_ch, trt, buf_ch);
1572
1573                 if (buf_ch != pat_ch)
1574                   break;
1575                 this_len--;
1576                 this_pos++;
1577               }
1578
1579             if (this_len == 0)
1580               {
1581                 pos -= len;
1582                 break;
1583               }
1584
1585             pos--;
1586           }
1587
1588         n++;
1589       }
1590
1591  stop:
1592   if (n == 0)
1593     {
1594       if (forward)
1595         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1596       else
1597         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1598
1599       return pos;
1600     }
1601   else if (n > 0)
1602     return -n;
1603   else
1604     return n;
1605 }
1606 \f
1607 /* Do Boyer-Moore search N times for the string BASE_PAT,
1608    whose length is LEN/LEN_BYTE,
1609    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1610    DIRECTION says which direction we search in.
1611    TRT and INVERSE_TRT are translation tables.
1612    Characters in PAT are already translated by TRT.
1613
1614    This kind of search works if all the characters in BASE_PAT that
1615    have nontrivial translation are the same aside from the last byte.
1616    This makes it possible to translate just the last byte of a
1617    character, and do so after just a simple test of the context.
1618    CHARSET_BASE is nonzero if there is such a non-ASCII character.
1619
1620    If that criterion is not satisfied, do not call this function.  */
1621
1622 static int
1623 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1624              pos, pos_byte, lim, lim_byte, charset_base)
1625      int n;
1626      unsigned char *base_pat;
1627      int len, len_byte;
1628      Lisp_Object trt;
1629      Lisp_Object inverse_trt;
1630      int pos, pos_byte;
1631      int lim, lim_byte;
1632      int charset_base;
1633 {
1634   int direction = ((n > 0) ? 1 : -1);
1635   register int dirlen;
1636   int infinity, limit, stride_for_teases = 0;
1637   register int *BM_tab;
1638   int *BM_tab_base;
1639   register unsigned char *cursor, *p_limit;
1640   register int i, j;
1641   unsigned char *pat, *pat_end;
1642   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1643
1644   unsigned char simple_translate[0400];
1645   /* These are set to the preceding bytes of a byte to be translated
1646      if charset_base is nonzero.  As the maximum byte length of a
1647      multibyte character is 4, we have to check at most three previous
1648      bytes.  */
1649   int translate_prev_byte1 = 0;
1650   int translate_prev_byte2 = 0;
1651   int translate_prev_byte3 = 0;
1652
1653 #ifdef C_ALLOCA
1654   int BM_tab_space[0400];
1655   BM_tab = &BM_tab_space[0];
1656 #else
1657   BM_tab = (int *) alloca (0400 * sizeof (int));
1658 #endif
1659   /* The general approach is that we are going to maintain that we know */
1660   /* the first (closest to the present position, in whatever direction */
1661   /* we're searching) character that could possibly be the last */
1662   /* (furthest from present position) character of a valid match.  We */
1663   /* advance the state of our knowledge by looking at that character */
1664   /* and seeing whether it indeed matches the last character of the */
1665   /* pattern.  If it does, we take a closer look.  If it does not, we */
1666   /* move our pointer (to putative last characters) as far as is */
1667   /* logically possible.  This amount of movement, which I call a */
1668   /* stride, will be the length of the pattern if the actual character */
1669   /* appears nowhere in the pattern, otherwise it will be the distance */
1670   /* from the last occurrence of that character to the end of the */
1671   /* pattern. */
1672   /* As a coding trick, an enormous stride is coded into the table for */
1673   /* characters that match the last character.  This allows use of only */
1674   /* a single test, a test for having gone past the end of the */
1675   /* permissible match region, to test for both possible matches (when */
1676   /* the stride goes past the end immediately) and failure to */
1677   /* match (where you get nudged past the end one stride at a time). */
1678
1679   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1680   /* is determined only by the last character of the putative match. */
1681   /* If that character does not match, we will stride the proper */
1682   /* distance to propose a match that superimposes it on the last */
1683   /* instance of a character that matches it (per trt), or misses */
1684   /* it entirely if there is none. */
1685
1686   dirlen = len_byte * direction;
1687   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1688
1689   /* Record position after the end of the pattern.  */
1690   pat_end = base_pat + len_byte;
1691   /* BASE_PAT points to a character that we start scanning from.
1692      It is the first character in a forward search,
1693      the last character in a backward search.  */
1694   if (direction < 0)
1695     base_pat = pat_end - 1;
1696
1697   BM_tab_base = BM_tab;
1698   BM_tab += 0400;
1699   j = dirlen;           /* to get it in a register */
1700   /* A character that does not appear in the pattern induces a */
1701   /* stride equal to the pattern length. */
1702   while (BM_tab_base != BM_tab)
1703     {
1704       *--BM_tab = j;
1705       *--BM_tab = j;
1706       *--BM_tab = j;
1707       *--BM_tab = j;
1708     }
1709
1710   /* We use this for translation, instead of TRT itself.
1711      We fill this in to handle the characters that actually
1712      occur in the pattern.  Others don't matter anyway!  */
1713   bzero (simple_translate, sizeof simple_translate);
1714   for (i = 0; i < 0400; i++)
1715     simple_translate[i] = i;
1716
1717   if (charset_base)
1718     {
1719       /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
1720          byte following them are the target of translation.  */
1721       int sample_char = charset_base | 0x20;
1722       unsigned char str[MAX_MULTIBYTE_LENGTH];
1723       int len = CHAR_STRING (sample_char, str);
1724
1725       translate_prev_byte1 = str[len - 2];
1726       if (len > 2)
1727         {
1728           translate_prev_byte2 = str[len - 3];
1729           if (len > 3)
1730             translate_prev_byte3 = str[len - 4];
1731         }
1732     }
1733
1734   i = 0;
1735   while (i != infinity)
1736     {
1737       unsigned char *ptr = base_pat + i;
1738       i += direction;
1739       if (i == dirlen)
1740         i = infinity;
1741       if (! NILP (trt))
1742         {
1743           /* If the byte currently looking at is the last of a
1744              character to check case-equivalents, set CH to that
1745              character.  An ASCII character and a non-ASCII character
1746              matching with CHARSET_BASE are to be checked.  */
1747           int ch = -1;
1748
1749           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1750             ch = *ptr;
1751           else if (charset_base
1752                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1753             {
1754               unsigned char *charstart = ptr - 1;
1755
1756               while (! (CHAR_HEAD_P (*charstart)))
1757                 charstart--;
1758               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1759               if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1760                 ch = -1;
1761             }
1762
1763           if (ch >= 0400)
1764             j = ((unsigned char) ch) | 0200;
1765           else
1766             j = *ptr;
1767
1768           if (i == infinity)
1769             stride_for_teases = BM_tab[j];
1770
1771           BM_tab[j] = dirlen - i;
1772           /* A translation table is accompanied by its inverse -- see */
1773           /* comment following downcase_table for details */
1774           if (ch >= 0)
1775             {
1776               int starting_ch = ch;
1777               int starting_j = j;
1778
1779               while (1)
1780                 {
1781                   TRANSLATE (ch, inverse_trt, ch);
1782                   if (ch >= 0400)
1783                     j = ((unsigned char) ch) | 0200;
1784                   else
1785                     j = (unsigned char) ch;
1786
1787                   /* For all the characters that map into CH,
1788                      set up simple_translate to map the last byte
1789                      into STARTING_J.  */
1790                   simple_translate[j] = starting_j;
1791                   if (ch == starting_ch)
1792                     break;
1793                   BM_tab[j] = dirlen - i;
1794                 }
1795             }
1796         }
1797       else
1798         {
1799           j = *ptr;
1800
1801           if (i == infinity)
1802             stride_for_teases = BM_tab[j];
1803           BM_tab[j] = dirlen - i;
1804         }
1805       /* stride_for_teases tells how much to stride if we get a */
1806       /* match on the far character but are subsequently */
1807       /* disappointed, by recording what the stride would have been */
1808       /* for that character if the last character had been */
1809       /* different. */
1810     }
1811   infinity = dirlen - infinity;
1812   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1813   /* loop invariant - POS_BYTE points at where last char (first
1814      char if reverse) of pattern would align in a possible match.  */
1815   while (n != 0)
1816     {
1817       int tail_end;
1818       unsigned char *tail_end_ptr;
1819
1820       /* It's been reported that some (broken) compiler thinks that
1821          Boolean expressions in an arithmetic context are unsigned.
1822          Using an explicit ?1:0 prevents this.  */
1823       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1824           < 0)
1825         return (n * (0 - direction));
1826       /* First we do the part we can by pointers (maybe nothing) */
1827       QUIT;
1828       pat = base_pat;
1829       limit = pos_byte - dirlen + direction;
1830       if (direction > 0)
1831         {
1832           limit = BUFFER_CEILING_OF (limit);
1833           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1834              can take on without hitting edge of buffer or the gap.  */
1835           limit = min (limit, pos_byte + 20000);
1836           limit = min (limit, lim_byte - 1);
1837         }
1838       else
1839         {
1840           limit = BUFFER_FLOOR_OF (limit);
1841           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1842              can take on without hitting edge of buffer or the gap.  */
1843           limit = max (limit, pos_byte - 20000);
1844           limit = max (limit, lim_byte);
1845         }
1846       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1847       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1848
1849       if ((limit - pos_byte) * direction > 20)
1850         {
1851           unsigned char *p2;
1852
1853           p_limit = BYTE_POS_ADDR (limit);
1854           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1855           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1856           while (1)             /* use one cursor setting as long as i can */
1857             {
1858               if (direction > 0) /* worth duplicating */
1859                 {
1860                   /* Use signed comparison if appropriate
1861                      to make cursor+infinity sure to be > p_limit.
1862                      Assuming that the buffer lies in a range of addresses
1863                      that are all "positive" (as ints) or all "negative",
1864                      either kind of comparison will work as long
1865                      as we don't step by infinity.  So pick the kind
1866                      that works when we do step by infinity.  */
1867                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1868                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1869                       cursor += BM_tab[*cursor];
1870                   else
1871                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1872                       cursor += BM_tab[*cursor];
1873                 }
1874               else
1875                 {
1876                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1877                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1878                       cursor += BM_tab[*cursor];
1879                   else
1880                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1881                       cursor += BM_tab[*cursor];
1882                 }
1883 /* If you are here, cursor is beyond the end of the searched region. */
1884 /* This can happen if you match on the far character of the pattern, */
1885 /* because the "stride" of that character is infinity, a number able */
1886 /* to throw you well beyond the end of the search.  It can also */
1887 /* happen if you fail to match within the permitted region and would */
1888 /* otherwise try a character beyond that region */
1889               if ((cursor - p_limit) * direction <= len_byte)
1890                 break;  /* a small overrun is genuine */
1891               cursor -= infinity; /* large overrun = hit */
1892               i = dirlen - direction;
1893               if (! NILP (trt))
1894                 {
1895                   while ((i -= direction) + direction != 0)
1896                     {
1897                       int ch;
1898                       cursor -= direction;
1899                       /* Translate only the last byte of a character.  */
1900                       if (! multibyte
1901                           || ((cursor == tail_end_ptr
1902                                || CHAR_HEAD_P (cursor[1]))
1903                               && (CHAR_HEAD_P (cursor[0])
1904                                   /* Check if this is the last byte of
1905                                      a translable character.  */
1906                                   || (translate_prev_byte1 == cursor[-1]
1907                                       && (CHAR_HEAD_P (translate_prev_byte1)
1908                                           || (translate_prev_byte2 == cursor[-2]
1909                                               && (CHAR_HEAD_P (translate_prev_byte2)
1910                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1911                         ch = simple_translate[*cursor];
1912                       else
1913                         ch = *cursor;
1914                       if (pat[i] != ch)
1915                         break;
1916                     }
1917                 }
1918               else
1919                 {
1920                   while ((i -= direction) + direction != 0)
1921                     {
1922                       cursor -= direction;
1923                       if (pat[i] != *cursor)
1924                         break;
1925                     }
1926                 }
1927               cursor += dirlen - i - direction; /* fix cursor */
1928               if (i + direction == 0)
1929                 {
1930                   int position;
1931
1932                   cursor -= direction;
1933
1934                   position = pos_byte + cursor - p2 + ((direction > 0)
1935                                                        ? 1 - len_byte : 0);
1936                   set_search_regs (position, len_byte);
1937
1938                   if ((n -= direction) != 0)
1939                     cursor += dirlen; /* to resume search */
1940                   else
1941                     return ((direction > 0)
1942                             ? search_regs.end[0] : search_regs.start[0]);
1943                 }
1944               else
1945                 cursor += stride_for_teases; /* <sigh> we lose -  */
1946             }
1947           pos_byte += cursor - p2;
1948         }
1949       else
1950         /* Now we'll pick up a clump that has to be done the hard */
1951         /* way because it covers a discontinuity */
1952         {
1953           limit = ((direction > 0)
1954                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1955                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1956           limit = ((direction > 0)
1957                    ? min (limit + len_byte, lim_byte - 1)
1958                    : max (limit - len_byte, lim_byte));
1959           /* LIMIT is now the last value POS_BYTE can have
1960              and still be valid for a possible match.  */
1961           while (1)
1962             {
1963               /* This loop can be coded for space rather than */
1964               /* speed because it will usually run only once. */
1965               /* (the reach is at most len + 21, and typically */
1966               /* does not exceed len) */
1967               while ((limit - pos_byte) * direction >= 0)
1968                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1969               /* now run the same tests to distinguish going off the */
1970               /* end, a match or a phony match. */
1971               if ((pos_byte - limit) * direction <= len_byte)
1972                 break;  /* ran off the end */
1973               /* Found what might be a match.
1974                  Set POS_BYTE back to last (first if reverse) pos.  */
1975               pos_byte -= infinity;
1976               i = dirlen - direction;
1977               while ((i -= direction) + direction != 0)
1978                 {
1979                   int ch;
1980                   unsigned char *ptr;
1981                   pos_byte -= direction;
1982                   ptr = BYTE_POS_ADDR (pos_byte);
1983                   /* Translate only the last byte of a character.  */
1984                   if (! multibyte
1985                       || ((ptr == tail_end_ptr
1986                            || CHAR_HEAD_P (ptr[1]))
1987                           && (CHAR_HEAD_P (ptr[0])
1988                               /* Check if this is the last byte of a
1989                                  translable character.  */
1990                               || (translate_prev_byte1 == ptr[-1]
1991                                   && (CHAR_HEAD_P (translate_prev_byte1)
1992                                       || (translate_prev_byte2 == ptr[-2]
1993                                           && (CHAR_HEAD_P (translate_prev_byte2)
1994                                               || translate_prev_byte3 == ptr[-3])))))))
1995                     ch = simple_translate[*ptr];
1996                   else
1997                     ch = *ptr;
1998                   if (pat[i] != ch)
1999                     break;
2000                 }
2001               /* Above loop has moved POS_BYTE part or all the way
2002                  back to the first pos (last pos if reverse).
2003                  Set it once again at the last (first if reverse) char.  */
2004               pos_byte += dirlen - i- direction;
2005               if (i + direction == 0)
2006                 {
2007                   int position;
2008                   pos_byte -= direction;
2009
2010                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2011
2012                   set_search_regs (position, len_byte);
2013
2014                   if ((n -= direction) != 0)
2015                     pos_byte += dirlen; /* to resume search */
2016                   else
2017                     return ((direction > 0)
2018                             ? search_regs.end[0] : search_regs.start[0]);
2019                 }
2020               else
2021                 pos_byte += stride_for_teases;
2022             }
2023           }
2024       /* We have done one clump.  Can we continue? */
2025       if ((lim_byte - pos_byte) * direction < 0)
2026         return ((0 - n) * direction);
2027     }
2028   return BYTE_TO_CHAR (pos_byte);
2029 }
2030
2031 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2032    for the overall match just found in the current buffer.
2033    Also clear out the match data for registers 1 and up.  */
2034
2035 static void
2036 set_search_regs (beg_byte, nbytes)
2037      int beg_byte, nbytes;
2038 {
2039   int i;
2040
2041   /* Make sure we have registers in which to store
2042      the match position.  */
2043   if (search_regs.num_regs == 0)
2044     {
2045       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2046       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2047       search_regs.num_regs = 2;
2048     }
2049
2050   /* Clear out the other registers.  */
2051   for (i = 1; i < search_regs.num_regs; i++)
2052     {
2053       search_regs.start[i] = -1;
2054       search_regs.end[i] = -1;
2055     }
2056
2057   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2058   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2059   XSETBUFFER (last_thing_searched, current_buffer);
2060 }
2061 \f
2062 /* Given a string of words separated by word delimiters,
2063   compute a regexp that matches those exact words
2064   separated by arbitrary punctuation.  */
2065
2066 static Lisp_Object
2067 wordify (string)
2068      Lisp_Object string;
2069 {
2070   register unsigned char *p, *o;
2071   register int i, i_byte, len, punct_count = 0, word_count = 0;
2072   Lisp_Object val;
2073   int prev_c = 0;
2074   int adjust;
2075
2076   CHECK_STRING (string);
2077   p = SDATA (string);
2078   len = SCHARS (string);
2079
2080   for (i = 0, i_byte = 0; i < len; )
2081     {
2082       int c;
2083
2084       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2085
2086       if (SYNTAX (c) != Sword)
2087         {
2088           punct_count++;
2089           if (i > 0 && SYNTAX (prev_c) == Sword)
2090             word_count++;
2091         }
2092
2093       prev_c = c;
2094     }
2095
2096   if (SYNTAX (prev_c) == Sword)
2097     word_count++;
2098   if (!word_count)
2099     return empty_string;
2100
2101   adjust = - punct_count + 5 * (word_count - 1) + 4;
2102   if (STRING_MULTIBYTE (string))
2103     val = make_uninit_multibyte_string (len + adjust,
2104                                         SBYTES (string)
2105                                         + adjust);
2106   else
2107     val = make_uninit_string (len + adjust);
2108
2109   o = SDATA (val);
2110   *o++ = '\\';
2111   *o++ = 'b';
2112   prev_c = 0;
2113
2114   for (i = 0, i_byte = 0; i < len; )
2115     {
2116       int c;
2117       int i_byte_orig = i_byte;
2118
2119       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2120
2121       if (SYNTAX (c) == Sword)
2122         {
2123           bcopy (SDATA (string) + i_byte_orig, o,
2124                  i_byte - i_byte_orig);
2125           o += i_byte - i_byte_orig;
2126         }
2127       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2128         {
2129           *o++ = '\\';
2130           *o++ = 'W';
2131           *o++ = '\\';
2132           *o++ = 'W';
2133           *o++ = '*';
2134         }
2135
2136       prev_c = c;
2137     }
2138
2139   *o++ = '\\';
2140   *o++ = 'b';
2141
2142   return val;
2143 }
2144 \f
2145 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2146        "MSearch backward: ",
2147        doc: /* Search backward from point for STRING.
2148 Set point to the beginning of the occurrence found, and return point.
2149 An optional second argument bounds the search; it is a buffer position.
2150 The match found must not extend before that position.
2151 Optional third argument, if t, means if fail just return nil (no error).
2152  If not nil and not t, position at limit of search and return nil.
2153 Optional fourth argument is repeat count--search for successive occurrences.
2154
2155 Search case-sensitivity is determined by the value of the variable
2156 `case-fold-search', which see.
2157
2158 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2159      (string, bound, noerror, count)
2160      Lisp_Object string, bound, noerror, count;
2161 {
2162   return search_command (string, bound, noerror, count, -1, 0, 0);
2163 }
2164
2165 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2166        doc: /* Search forward from point for STRING.
2167 Set point to the end of the occurrence found, and return point.
2168 An optional second argument bounds the search; it is a buffer position.
2169 The match found must not extend after that position.  A value of nil is
2170   equivalent to (point-max).
2171 Optional third argument, if t, means if fail just return nil (no error).
2172   If not nil and not t, move to limit of search and return nil.
2173 Optional fourth argument is repeat count--search for successive occurrences.
2174
2175 Search case-sensitivity is determined by the value of the variable
2176 `case-fold-search', which see.
2177
2178 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2179      (string, bound, noerror, count)
2180      Lisp_Object string, bound, noerror, count;
2181 {
2182   return search_command (string, bound, noerror, count, 1, 0, 0);
2183 }
2184
2185 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2186        "sWord search backward: ",
2187        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2188 Set point to the beginning of the occurrence found, and return point.
2189 An optional second argument bounds the search; it is a buffer position.
2190 The match found must not extend before that position.
2191 Optional third argument, if t, means if fail just return nil (no error).
2192   If not nil and not t, move to limit of search and return nil.
2193 Optional fourth argument is repeat count--search for successive occurrences.  */)
2194      (string, bound, noerror, count)
2195      Lisp_Object string, bound, noerror, count;
2196 {
2197   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2198 }
2199
2200 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2201        "sWord search: ",
2202        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2203 Set point to the end of the occurrence found, and return point.
2204 An optional second argument bounds the search; it is a buffer position.
2205 The match found must not extend after that position.
2206 Optional third argument, if t, means if fail just return nil (no error).
2207   If not nil and not t, move to limit of search and return nil.
2208 Optional fourth argument is repeat count--search for successive occurrences.  */)
2209      (string, bound, noerror, count)
2210      Lisp_Object string, bound, noerror, count;
2211 {
2212   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2213 }
2214
2215 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2216        "sRE search backward: ",
2217        doc: /* Search backward from point for match for regular expression REGEXP.
2218 Set point to the beginning of the match, and return point.
2219 The match found is the one starting last in the buffer
2220 and yet ending before the origin of the search.
2221 An optional second argument bounds the search; it is a buffer position.
2222 The match found must start at or after that position.
2223 Optional third argument, if t, means if fail just return nil (no error).
2224   If not nil and not t, move to limit of search and return nil.
2225 Optional fourth argument is repeat count--search for successive occurrences.
2226 See also the functions `match-beginning', `match-end', `match-string',
2227 and `replace-match'.  */)
2228      (regexp, bound, noerror, count)
2229      Lisp_Object regexp, bound, noerror, count;
2230 {
2231   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2232 }
2233
2234 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2235        "sRE search: ",
2236        doc: /* Search forward from point for regular expression REGEXP.
2237 Set point to the end of the occurrence found, and return point.
2238 An optional second argument bounds the search; it is a buffer position.
2239 The match found must not extend after that position.
2240 Optional third argument, if t, means if fail just return nil (no error).
2241   If not nil and not t, move to limit of search and return nil.
2242 Optional fourth argument is repeat count--search for successive occurrences.
2243 See also the functions `match-beginning', `match-end', `match-string',
2244 and `replace-match'.  */)
2245      (regexp, bound, noerror, count)
2246      Lisp_Object regexp, bound, noerror, count;
2247 {
2248   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2249 }
2250
2251 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2252        "sPosix search backward: ",
2253        doc: /* Search backward from point for match for regular expression REGEXP.
2254 Find the longest match in accord with Posix regular expression rules.
2255 Set point to the beginning of the match, and return point.
2256 The match found is the one starting last in the buffer
2257 and yet ending before the origin of the search.
2258 An optional second argument bounds the search; it is a buffer position.
2259 The match found must start at or after that position.
2260 Optional third argument, if t, means if fail just return nil (no error).
2261   If not nil and not t, move to limit of search and return nil.
2262 Optional fourth argument is repeat count--search for successive occurrences.
2263 See also the functions `match-beginning', `match-end', `match-string',
2264 and `replace-match'.  */)
2265      (regexp, bound, noerror, count)
2266      Lisp_Object regexp, bound, noerror, count;
2267 {
2268   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2269 }
2270
2271 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2272        "sPosix search: ",
2273        doc: /* Search forward from point for regular expression REGEXP.
2274 Find the longest match in accord with Posix regular expression rules.
2275 Set point to the end of the occurrence found, and return point.
2276 An optional second argument bounds the search; it is a buffer position.
2277 The match found must not extend after that position.
2278 Optional third argument, if t, means if fail just return nil (no error).
2279   If not nil and not t, move to limit of search and return nil.
2280 Optional fourth argument is repeat count--search for successive occurrences.
2281 See also the functions `match-beginning', `match-end', `match-string',
2282 and `replace-match'.  */)
2283      (regexp, bound, noerror, count)
2284      Lisp_Object regexp, bound, noerror, count;
2285 {
2286   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2287 }
2288 \f
2289 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2290        doc: /* Replace text matched by last search with NEWTEXT.
2291 Leave point at the end of the replacement text.
2292
2293 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2294 Otherwise maybe capitalize the whole text, or maybe just word initials,
2295 based on the replaced text.
2296 If the replaced text has only capital letters
2297 and has at least one multiletter word, convert NEWTEXT to all caps.
2298 Otherwise if all words are capitalized in the replaced text,
2299 capitalize each word in NEWTEXT.
2300
2301 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2302 Otherwise treat `\\' as special:
2303   `\\&' in NEWTEXT means substitute original matched text.
2304   `\\N' means substitute what matched the Nth `\\(...\\)'.
2305        If Nth parens didn't match, substitute nothing.
2306   `\\\\' means insert one `\\'.
2307 Case conversion does not apply to these substitutions.
2308
2309 FIXEDCASE and LITERAL are optional arguments.
2310
2311 The optional fourth argument STRING can be a string to modify.
2312 This is meaningful when the previous match was done against STRING,
2313 using `string-match'.  When used this way, `replace-match'
2314 creates and returns a new string made by copying STRING and replacing
2315 the part of STRING that was matched.
2316
2317 The optional fifth argument SUBEXP specifies a subexpression;
2318 it says to replace just that subexpression with NEWTEXT,
2319 rather than replacing the entire matched text.
2320 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2321 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2322 NEWTEXT in place of subexp N.
2323 This is useful only after a regular expression search or match,
2324 since only regular expressions have distinguished subexpressions.  */)
2325      (newtext, fixedcase, literal, string, subexp)
2326      Lisp_Object newtext, fixedcase, literal, string, subexp;
2327 {
2328   enum { nochange, all_caps, cap_initial } case_action;
2329   register int pos, pos_byte;
2330   int some_multiletter_word;
2331   int some_lowercase;
2332   int some_uppercase;
2333   int some_nonuppercase_initial;
2334   register int c, prevc;
2335   int sub;
2336   int opoint, newpoint;
2337
2338   CHECK_STRING (newtext);
2339
2340   if (! NILP (string))
2341     CHECK_STRING (string);
2342
2343   case_action = nochange;       /* We tried an initialization */
2344                                 /* but some C compilers blew it */
2345
2346   if (search_regs.num_regs <= 0)
2347     error ("`replace-match' called before any match found");
2348
2349   if (NILP (subexp))
2350     sub = 0;
2351   else
2352     {
2353       CHECK_NUMBER (subexp);
2354       sub = XINT (subexp);
2355       if (sub < 0 || sub >= search_regs.num_regs)
2356         args_out_of_range (subexp, make_number (search_regs.num_regs));
2357     }
2358
2359   if (NILP (string))
2360     {
2361       if (search_regs.start[sub] < BEGV
2362           || search_regs.start[sub] > search_regs.end[sub]
2363           || search_regs.end[sub] > ZV)
2364         args_out_of_range (make_number (search_regs.start[sub]),
2365                            make_number (search_regs.end[sub]));
2366     }
2367   else
2368     {
2369       if (search_regs.start[sub] < 0
2370           || search_regs.start[sub] > search_regs.end[sub]
2371           || search_regs.end[sub] > SCHARS (string))
2372         args_out_of_range (make_number (search_regs.start[sub]),
2373                            make_number (search_regs.end[sub]));
2374     }
2375
2376   if (NILP (fixedcase))
2377     {
2378       /* Decide how to casify by examining the matched text. */
2379       int last;
2380
2381       pos = search_regs.start[sub];
2382       last = search_regs.end[sub];
2383
2384       if (NILP (string))
2385         pos_byte = CHAR_TO_BYTE (pos);
2386       else
2387         pos_byte = string_char_to_byte (string, pos);
2388
2389       prevc = '\n';
2390       case_action = all_caps;
2391
2392       /* some_multiletter_word is set nonzero if any original word
2393          is more than one letter long. */
2394       some_multiletter_word = 0;
2395       some_lowercase = 0;
2396       some_nonuppercase_initial = 0;
2397       some_uppercase = 0;
2398
2399       while (pos < last)
2400         {
2401           if (NILP (string))
2402             {
2403               c = FETCH_CHAR (pos_byte);
2404               INC_BOTH (pos, pos_byte);
2405             }
2406           else
2407             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2408
2409           if (LOWERCASEP (c))
2410             {
2411               /* Cannot be all caps if any original char is lower case */
2412
2413               some_lowercase = 1;
2414               if (SYNTAX (prevc) != Sword)
2415                 some_nonuppercase_initial = 1;
2416               else
2417                 some_multiletter_word = 1;
2418             }
2419           else if (UPPERCASEP (c))
2420             {
2421               some_uppercase = 1;
2422               if (SYNTAX (prevc) != Sword)
2423                 ;
2424               else
2425                 some_multiletter_word = 1;
2426             }
2427           else
2428             {
2429               /* If the initial is a caseless word constituent,
2430                  treat that like a lowercase initial.  */
2431               if (SYNTAX (prevc) != Sword)
2432                 some_nonuppercase_initial = 1;
2433             }
2434
2435           prevc = c;
2436         }
2437
2438       /* Convert to all caps if the old text is all caps
2439          and has at least one multiletter word.  */
2440       if (! some_lowercase && some_multiletter_word)
2441         case_action = all_caps;
2442       /* Capitalize each word, if the old text has all capitalized words.  */
2443       else if (!some_nonuppercase_initial && some_multiletter_word)
2444         case_action = cap_initial;
2445       else if (!some_nonuppercase_initial && some_uppercase)
2446         /* Should x -> yz, operating on X, give Yz or YZ?
2447            We'll assume the latter.  */
2448         case_action = all_caps;
2449       else
2450         case_action = nochange;
2451     }
2452
2453   /* Do replacement in a string.  */
2454   if (!NILP (string))
2455     {
2456       Lisp_Object before, after;
2457
2458       before = Fsubstring (string, make_number (0),
2459                            make_number (search_regs.start[sub]));
2460       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2461
2462       /* Substitute parts of the match into NEWTEXT
2463          if desired.  */
2464       if (NILP (literal))
2465         {
2466           int lastpos = 0;
2467           int lastpos_byte = 0;
2468           /* We build up the substituted string in ACCUM.  */
2469           Lisp_Object accum;
2470           Lisp_Object middle;
2471           int length = SBYTES (newtext);
2472
2473           accum = Qnil;
2474
2475           for (pos_byte = 0, pos = 0; pos_byte < length;)
2476             {
2477               int substart = -1;
2478               int subend = 0;
2479               int delbackslash = 0;
2480
2481               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2482
2483               if (c == '\\')
2484                 {
2485                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2486
2487                   if (c == '&')
2488                     {
2489                       substart = search_regs.start[sub];
2490                       subend = search_regs.end[sub];
2491                     }
2492                   else if (c >= '1' && c <= '9')
2493                     {
2494                       if (search_regs.start[c - '0'] >= 0
2495                           && c <= search_regs.num_regs + '0')
2496                         {
2497                           substart = search_regs.start[c - '0'];
2498                           subend = search_regs.end[c - '0'];
2499                         }
2500                       else
2501                         {
2502                           /* If that subexp did not match,
2503                              replace \\N with nothing.  */
2504                           substart = 0;
2505                           subend = 0;
2506                         }
2507                     }
2508                   else if (c == '\\')
2509                     delbackslash = 1;
2510                   else
2511                     error ("Invalid use of `\\' in replacement text");
2512                 }
2513               if (substart >= 0)
2514                 {
2515                   if (pos - 2 != lastpos)
2516                     middle = substring_both (newtext, lastpos,
2517                                              lastpos_byte,
2518                                              pos - 2, pos_byte - 2);
2519                   else
2520                     middle = Qnil;
2521                   accum = concat3 (accum, middle,
2522                                    Fsubstring (string,
2523                                                make_number (substart),
2524                                                make_number (subend)));
2525                   lastpos = pos;
2526                   lastpos_byte = pos_byte;
2527                 }
2528               else if (delbackslash)
2529                 {
2530                   middle = substring_both (newtext, lastpos,
2531                                            lastpos_byte,
2532                                            pos - 1, pos_byte - 1);
2533
2534                   accum = concat2 (accum, middle);
2535                   lastpos = pos;
2536                   lastpos_byte = pos_byte;
2537                 }
2538             }
2539
2540           if (pos != lastpos)
2541             middle = substring_both (newtext, lastpos,
2542                                      lastpos_byte,
2543                                      pos, pos_byte);
2544           else
2545             middle = Qnil;
2546
2547           newtext = concat2 (accum, middle);
2548         }
2549
2550       /* Do case substitution in NEWTEXT if desired.  */
2551       if (case_action == all_caps)
2552         newtext = Fupcase (newtext);
2553       else if (case_action == cap_initial)
2554         newtext = Fupcase_initials (newtext);
2555
2556       return concat3 (before, newtext, after);
2557     }
2558
2559   /* Record point, then move (quietly) to the start of the match.  */
2560   if (PT >= search_regs.end[sub])
2561     opoint = PT - ZV;
2562   else if (PT > search_regs.start[sub])
2563     opoint = search_regs.end[sub] - ZV;
2564   else
2565     opoint = PT;
2566
2567   /* If we want non-literal replacement,
2568      perform substitution on the replacement string.  */
2569   if (NILP (literal))
2570     {
2571       int length = SBYTES (newtext);
2572       unsigned char *substed;
2573       int substed_alloc_size, substed_len;
2574       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2575       int str_multibyte = STRING_MULTIBYTE (newtext);
2576       Lisp_Object rev_tbl;
2577       int really_changed = 0;
2578
2579       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2580                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2581                                           make_number (0))
2582                 : Qnil);
2583
2584       substed_alloc_size = length * 2 + 100;
2585       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2586       substed_len = 0;
2587
2588       /* Go thru NEWTEXT, producing the actual text to insert in
2589          SUBSTED while adjusting multibyteness to that of the current
2590          buffer.  */
2591
2592       for (pos_byte = 0, pos = 0; pos_byte < length;)
2593         {
2594           unsigned char str[MAX_MULTIBYTE_LENGTH];
2595           unsigned char *add_stuff = NULL;
2596           int add_len = 0;
2597           int idx = -1;
2598
2599           if (str_multibyte)
2600             {
2601               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2602               if (!buf_multibyte)
2603                 c = multibyte_char_to_unibyte (c, rev_tbl);
2604             }
2605           else
2606             {
2607               /* Note that we don't have to increment POS.  */
2608               c = SREF (newtext, pos_byte++);
2609               if (buf_multibyte)
2610                 c = unibyte_char_to_multibyte (c);
2611             }
2612
2613           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2614              or set IDX to a match index, which means put that part
2615              of the buffer text into SUBSTED.  */
2616
2617           if (c == '\\')
2618             {
2619               really_changed = 1;
2620
2621               if (str_multibyte)
2622                 {
2623                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2624                                                       pos, pos_byte);
2625                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2626                     c = multibyte_char_to_unibyte (c, rev_tbl);
2627                 }
2628               else
2629                 {
2630                   c = SREF (newtext, pos_byte++);
2631                   if (buf_multibyte)
2632                     c = unibyte_char_to_multibyte (c);
2633                 }
2634
2635               if (c == '&')
2636                 idx = sub;
2637               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2638                 {
2639                   if (search_regs.start[c - '0'] >= 1)
2640                     idx = c - '0';
2641                 }
2642               else if (c == '\\')
2643                 add_len = 1, add_stuff = "\\";
2644               else
2645                 {
2646                   xfree (substed);
2647                   error ("Invalid use of `\\' in replacement text");
2648                 }
2649             }
2650           else
2651             {
2652               add_len = CHAR_STRING (c, str);
2653               add_stuff = str;
2654             }
2655
2656           /* If we want to copy part of a previous match,
2657              set up ADD_STUFF and ADD_LEN to point to it.  */
2658           if (idx >= 0)
2659             {
2660               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2661               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2662               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2663                 move_gap (search_regs.start[idx]);
2664               add_stuff = BYTE_POS_ADDR (begbyte);
2665             }
2666
2667           /* Now the stuff we want to add to SUBSTED
2668              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2669
2670           /* Make sure SUBSTED is big enough.  */
2671           if (substed_len + add_len >= substed_alloc_size)
2672             {
2673               substed_alloc_size = substed_len + add_len + 500;
2674               substed = (unsigned char *) xrealloc (substed,
2675                                                     substed_alloc_size + 1);
2676             }
2677
2678           /* Now add to the end of SUBSTED.  */
2679           if (add_stuff)
2680             {
2681               bcopy (add_stuff, substed + substed_len, add_len);
2682               substed_len += add_len;
2683             }
2684         }
2685
2686       if (really_changed)
2687         {
2688           if (buf_multibyte)
2689             {
2690               int nchars = multibyte_chars_in_text (substed, substed_len);
2691
2692               newtext = make_multibyte_string (substed, nchars, substed_len);
2693             }
2694           else
2695             newtext = make_unibyte_string (substed, substed_len);
2696         }
2697       xfree (substed);
2698     }
2699
2700   /* Replace the old text with the new in the cleanest possible way.  */
2701   replace_range (search_regs.start[sub], search_regs.end[sub],
2702                  newtext, 1, 0, 1);
2703   newpoint = search_regs.start[sub] + SCHARS (newtext);
2704
2705   if (case_action == all_caps)
2706     Fupcase_region (make_number (search_regs.start[sub]),
2707                     make_number (newpoint));
2708   else if (case_action == cap_initial)
2709     Fupcase_initials_region (make_number (search_regs.start[sub]),
2710                              make_number (newpoint));
2711
2712   /* Adjust search data for this change.  */
2713   {
2714     int oldend = search_regs.end[sub];
2715     int oldstart = search_regs.start[sub];
2716     int change = newpoint - search_regs.end[sub];
2717     int i;
2718
2719     for (i = 0; i < search_regs.num_regs; i++)
2720       {
2721         if (search_regs.start[i] >= oldend)
2722           search_regs.start[i] += change;
2723         else if (search_regs.start[i] > oldstart)
2724           search_regs.start[i] = oldstart;
2725         if (search_regs.end[i] >= oldend)
2726           search_regs.end[i] += change;
2727         else if (search_regs.end[i] > oldstart)
2728           search_regs.end[i] = oldstart;
2729       }
2730   }
2731
2732   /* Put point back where it was in the text.  */
2733   if (opoint <= 0)
2734     TEMP_SET_PT (opoint + ZV);
2735   else
2736     TEMP_SET_PT (opoint);
2737
2738   /* Now move point "officially" to the start of the inserted replacement.  */
2739   move_if_not_intangible (newpoint);
2740
2741   return Qnil;
2742 }
2743 \f
2744 static Lisp_Object
2745 match_limit (num, beginningp)
2746      Lisp_Object num;
2747      int beginningp;
2748 {
2749   register int n;
2750
2751   CHECK_NUMBER (num);
2752   n = XINT (num);
2753   if (n < 0)
2754     args_out_of_range (num, make_number (0));
2755   if (search_regs.num_regs <= 0)
2756     error ("No match data, because no search succeeded");
2757   if (n >= search_regs.num_regs
2758       || search_regs.start[n] < 0)
2759     return Qnil;
2760   return (make_number ((beginningp) ? search_regs.start[n]
2761                                     : search_regs.end[n]));
2762 }
2763
2764 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2765        doc: /* Return position of start of text matched by last search.
2766 SUBEXP, a number, specifies which parenthesized expression in the last
2767   regexp.
2768 Value is nil if SUBEXPth pair didn't match, or there were less than
2769   SUBEXP pairs.
2770 Zero means the entire text matched by the whole regexp or whole string.  */)
2771      (subexp)
2772      Lisp_Object subexp;
2773 {
2774   return match_limit (subexp, 1);
2775 }
2776
2777 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2778        doc: /* Return position of end of text matched by last search.
2779 SUBEXP, a number, specifies which parenthesized expression in the last
2780   regexp.
2781 Value is nil if SUBEXPth pair didn't match, or there were less than
2782   SUBEXP pairs.
2783 Zero means the entire text matched by the whole regexp or whole string.  */)
2784      (subexp)
2785      Lisp_Object subexp;
2786 {
2787   return match_limit (subexp, 0);
2788 }
2789
2790 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2791        doc: /* Return a list containing all info on what the last search matched.
2792 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2793 All the elements are markers or nil (nil if the Nth pair didn't match)
2794 if the last match was on a buffer; integers or nil if a string was matched.
2795 Use `store-match-data' to reinstate the data in this list.
2796
2797 If INTEGERS (the optional first argument) is non-nil, always use
2798 integers \(rather than markers) to represent buffer positions.  In
2799 this case, and if the last match was in a buffer, the buffer will get
2800 stored as one additional element at the end of the list.
2801
2802 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2803 enough to hold all the values, and if INTEGERS is non-nil, no consing
2804 is done.
2805
2806 If optional third arg RESEAT is non-nil, any previous markers on the
2807 REUSE list will be modified to point to nowhere.
2808
2809 Return value is undefined if the last search failed.  */)
2810   (integers, reuse, reseat)
2811      Lisp_Object integers, reuse, reseat;
2812 {
2813   Lisp_Object tail, prev;
2814   Lisp_Object *data;
2815   int i, len;
2816
2817   if (!NILP (reseat))
2818     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2819       if (MARKERP (XCAR (tail)))
2820         {
2821           unchain_marker (XMARKER (XCAR (tail)));
2822           XSETCAR (tail, Qnil);
2823         }
2824
2825   if (NILP (last_thing_searched))
2826     return Qnil;
2827
2828   prev = Qnil;
2829
2830   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2831                                  * sizeof (Lisp_Object));
2832
2833   len = 0;
2834   for (i = 0; i < search_regs.num_regs; i++)
2835     {
2836       int start = search_regs.start[i];
2837       if (start >= 0)
2838         {
2839           if (EQ (last_thing_searched, Qt)
2840               || ! NILP (integers))
2841             {
2842               XSETFASTINT (data[2 * i], start);
2843               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2844             }
2845           else if (BUFFERP (last_thing_searched))
2846             {
2847               data[2 * i] = Fmake_marker ();
2848               Fset_marker (data[2 * i],
2849                            make_number (start),
2850                            last_thing_searched);
2851               data[2 * i + 1] = Fmake_marker ();
2852               Fset_marker (data[2 * i + 1],
2853                            make_number (search_regs.end[i]),
2854                            last_thing_searched);
2855             }
2856           else
2857             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2858             abort ();
2859
2860           len = 2 * i + 2;
2861         }
2862       else
2863         data[2 * i] = data[2 * i + 1] = Qnil;
2864     }
2865
2866   if (BUFFERP (last_thing_searched) && !NILP (integers))
2867     {
2868       data[len] = last_thing_searched;
2869       len++;
2870     }
2871
2872   /* If REUSE is not usable, cons up the values and return them.  */
2873   if (! CONSP (reuse))
2874     return Flist (len, data);
2875
2876   /* If REUSE is a list, store as many value elements as will fit
2877      into the elements of REUSE.  */
2878   for (i = 0, tail = reuse; CONSP (tail);
2879        i++, tail = XCDR (tail))
2880     {
2881       if (i < len)
2882         XSETCAR (tail, data[i]);
2883       else
2884         XSETCAR (tail, Qnil);
2885       prev = tail;
2886     }
2887
2888   /* If we couldn't fit all value elements into REUSE,
2889      cons up the rest of them and add them to the end of REUSE.  */
2890   if (i < len)
2891     XSETCDR (prev, Flist (len - i, data + i));
2892
2893   return reuse;
2894 }
2895
2896 /* We used to have an internal use variant of `reseat' described as:
2897
2898       If RESEAT is `evaporate', put the markers back on the free list
2899       immediately.  No other references to the markers must exist in this
2900       case, so it is used only internally on the unwind stack and
2901       save-match-data from Lisp.
2902
2903    But it was ill-conceived: those supposedly-internal markers get exposed via
2904    the undo-list, so freeing them here is unsafe.  */
2905
2906 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2907        doc: /* Set internal data on last search match from elements of LIST.
2908 LIST should have been created by calling `match-data' previously.
2909
2910 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2911     (list, reseat)
2912      register Lisp_Object list, reseat;
2913 {
2914   register int i;
2915   register Lisp_Object marker;
2916
2917   if (running_asynch_code)
2918     save_search_regs ();
2919
2920   CHECK_LIST (list);
2921
2922   /* Unless we find a marker with a buffer or an explicit buffer
2923      in LIST, assume that this match data came from a string.  */
2924   last_thing_searched = Qt;
2925
2926   /* Allocate registers if they don't already exist.  */
2927   {
2928     int length = XFASTINT (Flength (list)) / 2;
2929
2930     if (length > search_regs.num_regs)
2931       {
2932         if (search_regs.num_regs == 0)
2933           {
2934             search_regs.start
2935               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2936             search_regs.end
2937               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2938           }
2939         else
2940           {
2941             search_regs.start
2942               = (regoff_t *) xrealloc (search_regs.start,
2943                                        length * sizeof (regoff_t));
2944             search_regs.end
2945               = (regoff_t *) xrealloc (search_regs.end,
2946                                        length * sizeof (regoff_t));
2947           }
2948
2949         for (i = search_regs.num_regs; i < length; i++)
2950           search_regs.start[i] = -1;
2951
2952         search_regs.num_regs = length;
2953       }
2954
2955     for (i = 0; CONSP (list); i++)
2956       {
2957         marker = XCAR (list);
2958         if (BUFFERP (marker))
2959           {
2960             last_thing_searched = marker;
2961             break;
2962           }
2963         if (i >= length)
2964           break;
2965         if (NILP (marker))
2966           {
2967             search_regs.start[i] = -1;
2968             list = XCDR (list);
2969           }
2970         else
2971           {
2972             int from;
2973             Lisp_Object m;
2974
2975             m = marker;
2976             if (MARKERP (marker))
2977               {
2978                 if (XMARKER (marker)->buffer == 0)
2979                   XSETFASTINT (marker, 0);
2980                 else
2981                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2982               }
2983
2984             CHECK_NUMBER_COERCE_MARKER (marker);
2985             from = XINT (marker);
2986
2987             if (!NILP (reseat) && MARKERP (m))
2988               {
2989                 unchain_marker (XMARKER (m));
2990                 XSETCAR (list, Qnil);
2991               }
2992
2993             if ((list = XCDR (list), !CONSP (list)))
2994               break;
2995
2996             m = marker = XCAR (list);
2997
2998             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2999               XSETFASTINT (marker, 0);
3000
3001             CHECK_NUMBER_COERCE_MARKER (marker);
3002             search_regs.start[i] = from;
3003             search_regs.end[i] = XINT (marker);
3004
3005             if (!NILP (reseat) && MARKERP (m))
3006               {
3007                 unchain_marker (XMARKER (m));
3008                 XSETCAR (list, Qnil);
3009               }
3010           }
3011         list = XCDR (list);
3012       }
3013
3014     for (; i < search_regs.num_regs; i++)
3015       search_regs.start[i] = -1;
3016   }
3017
3018   return Qnil;
3019 }
3020
3021 /* If non-zero the match data have been saved in saved_search_regs
3022    during the execution of a sentinel or filter. */
3023 static int search_regs_saved;
3024 static struct re_registers saved_search_regs;
3025 static Lisp_Object saved_last_thing_searched;
3026
3027 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3028    if asynchronous code (filter or sentinel) is running. */
3029 static void
3030 save_search_regs ()
3031 {
3032   if (!search_regs_saved)
3033     {
3034       saved_search_regs.num_regs = search_regs.num_regs;
3035       saved_search_regs.start = search_regs.start;
3036       saved_search_regs.end = search_regs.end;
3037       saved_last_thing_searched = last_thing_searched;
3038       last_thing_searched = Qnil;
3039       search_regs.num_regs = 0;
3040       search_regs.start = 0;
3041       search_regs.end = 0;
3042
3043       search_regs_saved = 1;
3044     }
3045 }
3046
3047 /* Called upon exit from filters and sentinels. */
3048 void
3049 restore_search_regs ()
3050 {
3051   if (search_regs_saved)
3052     {
3053       if (search_regs.num_regs > 0)
3054         {
3055           xfree (search_regs.start);
3056           xfree (search_regs.end);
3057         }
3058       search_regs.num_regs = saved_search_regs.num_regs;
3059       search_regs.start = saved_search_regs.start;
3060       search_regs.end = saved_search_regs.end;
3061       last_thing_searched = saved_last_thing_searched;
3062       saved_last_thing_searched = Qnil;
3063       search_regs_saved = 0;
3064     }
3065 }
3066
3067 static Lisp_Object
3068 unwind_set_match_data (list)
3069      Lisp_Object list;
3070 {
3071   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3072   return Fset_match_data (list, Qt);
3073 }
3074
3075 /* Called to unwind protect the match data.  */
3076 void
3077 record_unwind_save_match_data ()
3078 {
3079   record_unwind_protect (unwind_set_match_data,
3080                          Fmatch_data (Qnil, Qnil, Qnil));
3081 }
3082
3083 /* Quote a string to inactivate reg-expr chars */
3084
3085 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3086        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3087      (string)
3088      Lisp_Object string;
3089 {
3090   register unsigned char *in, *out, *end;
3091   register unsigned char *temp;
3092   int backslashes_added = 0;
3093
3094   CHECK_STRING (string);
3095
3096   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3097
3098   /* Now copy the data into the new string, inserting escapes. */
3099
3100   in = SDATA (string);
3101   end = in + SBYTES (string);
3102   out = temp;
3103
3104   for (; in != end; in++)
3105     {
3106       if (*in == '['
3107           || *in == '*' || *in == '.' || *in == '\\'
3108           || *in == '?' || *in == '+'
3109           || *in == '^' || *in == '$')
3110         *out++ = '\\', backslashes_added++;
3111       *out++ = *in;
3112     }
3113
3114   return make_specified_string (temp,
3115                                 SCHARS (string) + backslashes_added,
3116                                 out - temp,
3117                                 STRING_MULTIBYTE (string));
3118 }
3119 \f
3120 void
3121 syms_of_search ()
3122 {
3123   register int i;
3124
3125   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3126     {
3127       searchbufs[i].buf.allocated = 100;
3128       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3129       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3130       searchbufs[i].regexp = Qnil;
3131       searchbufs[i].whitespace_regexp = Qnil;
3132       searchbufs[i].syntax_table = Qnil;
3133       staticpro (&searchbufs[i].regexp);
3134       staticpro (&searchbufs[i].whitespace_regexp);
3135       staticpro (&searchbufs[i].syntax_table);
3136       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3137     }
3138   searchbuf_head = &searchbufs[0];
3139
3140   Qsearch_failed = intern ("search-failed");
3141   staticpro (&Qsearch_failed);
3142   Qinvalid_regexp = intern ("invalid-regexp");
3143   staticpro (&Qinvalid_regexp);
3144
3145   Fput (Qsearch_failed, Qerror_conditions,
3146         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3147   Fput (Qsearch_failed, Qerror_message,
3148         build_string ("Search failed"));
3149
3150   Fput (Qinvalid_regexp, Qerror_conditions,
3151         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3152   Fput (Qinvalid_regexp, Qerror_message,
3153         build_string ("Invalid regexp"));
3154
3155   last_thing_searched = Qnil;
3156   staticpro (&last_thing_searched);
3157
3158   saved_last_thing_searched = Qnil;
3159   staticpro (&saved_last_thing_searched);
3160
3161   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3162       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3163 Some commands use this for user-specified regexps.
3164 Spaces that occur inside character classes or repetition operators
3165 or other such regexp constructs are not replaced with this.
3166 A value of nil (which is the normal value) means treat spaces literally.  */);
3167   Vsearch_spaces_regexp = Qnil;
3168
3169   defsubr (&Slooking_at);
3170   defsubr (&Sposix_looking_at);
3171   defsubr (&Sstring_match);
3172   defsubr (&Sposix_string_match);
3173   defsubr (&Ssearch_forward);
3174   defsubr (&Ssearch_backward);
3175   defsubr (&Sword_search_forward);
3176   defsubr (&Sword_search_backward);
3177   defsubr (&Sre_search_forward);
3178   defsubr (&Sre_search_backward);
3179   defsubr (&Sposix_search_forward);
3180   defsubr (&Sposix_search_backward);
3181   defsubr (&Sreplace_match);
3182   defsubr (&Smatch_beginning);
3183   defsubr (&Smatch_end);
3184   defsubr (&Smatch_data);
3185   defsubr (&Sset_match_data);
3186   defsubr (&Sregexp_quote);
3187 }
3188
3189 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3190    (do not change this comment) */