src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #ifdef STDC_HEADERS
  24 #include <stdlib.h>
  25 #endif
  26 #include "lisp.h"
  27 #include "syntax.h"
  28 #include "category.h"
  29 #include "buffer.h"
  30 #include "charset.h"
  31 #include "region-cache.h"
  32 #include "commands.h"
  33 #include "blockinput.h"
  34 #include "intervals.h"
  35
  36 #include <sys/types.h>
  37 #include "regex.h"
  38
  39 #define min(a, b) ((a) < (b) ? (a) : (b))
  40 #define max(a, b) ((a) > (b) ? (a) : (b))
  41
  42 #define REGEXP_CACHE_SIZE 20
  43
  44 /* If the regexp is non-nil, then the buffer contains the compiled form
  45    of that regexp, suitable for searching.  */
  46 struct regexp_cache
  47 {
  48   struct regexp_cache *next;
  49   Lisp_Object regexp;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 static void set_search_regs ();
  92 static void save_search_regs ();
  93 static int simple_search ();
  94 static int boyer_moore ();
  95 static int search_buffer ();
  96
  97 static void
  98 matcher_overflow ()
  99 {
 100   error ("Stack overflow in regexp matcher");
 101 }
 102
 103 #ifdef __STDC__
 104 #define CONST const
 105 #else
 106 #define CONST
 107 #endif
 108
 109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 110    PATTERN is the pattern to compile.
 111    CP is the place to put the result.
 112    TRANSLATE is a translation table for ignoring case, or nil for none.
 113    REGP is the structure that says where to store the "register"
 114    values that will result from matching this pattern.
 115    If it is 0, we should compile the pattern not to record any
 116    subexpression bounds.
 117    POSIX is nonzero if we want full backtracking (POSIX style)
 118    for this pattern.  0 means backtrack only enough to get a valid match.
 119    MULTIBYTE is nonzero if we want to handle multibyte characters in
 120    PATTERN.  0 means all multibyte characters are recognized just as
 121    sequences of binary data.  */
 122
 123 static void
 124 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 125      struct regexp_cache *cp;
 126      Lisp_Object pattern;
 127      Lisp_Object translate;
 128      struct re_registers *regp;
 129      int posix;
 130      int multibyte;
 131 {
 132   char *raw_pattern;
 133   int raw_pattern_size;
 134   char *val;
 135   reg_syntax_t old;
 136
 137   /* MULTIBYTE says whether the text to be searched is multibyte.
 138      We must convert PATTERN to match that, or we will not really
 139      find things right.  */
 140
 141   if (multibyte == STRING_MULTIBYTE (pattern))
 142     {
 143       raw_pattern = (char *) XSTRING (pattern)->data;
 144       raw_pattern_size = STRING_BYTES (XSTRING (pattern));
 145     }
 146   else if (multibyte)
 147     {
 148       raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
 149                                                   XSTRING (pattern)->size);
 150       raw_pattern = (char *) alloca (raw_pattern_size + 1);
 151       copy_text (XSTRING (pattern)->data, raw_pattern,
 152                  XSTRING (pattern)->size, 0, 1);
 153     }
 154   else
 155     {
 156       /* Converting multibyte to single-byte.
 157
 158          ??? Perhaps this conversion should be done in a special way
 159          by subtracting nonascii-insert-offset from each non-ASCII char,
 160          so that only the multibyte chars which really correspond to
 161          the chosen single-byte character set can possibly match.  */
 162       raw_pattern_size = XSTRING (pattern)->size;
 163       raw_pattern = (char *) alloca (raw_pattern_size + 1);
 164       copy_text (XSTRING (pattern)->data, raw_pattern,
 165                  STRING_BYTES (XSTRING (pattern)), 1, 0);
 166     }
 167
 168   cp->regexp = Qnil;
 169   cp->buf.translate = (! NILP (translate) ? translate : 0);
 170   cp->posix = posix;
 171   cp->buf.multibyte = multibyte;
 172   BLOCK_INPUT;
 173   old = re_set_syntax (RE_SYNTAX_EMACS
 174                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 175   val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf);
 176   re_set_syntax (old);
 177   UNBLOCK_INPUT;
 178   if (val)
 179     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 180
 181   cp->regexp = Fcopy_sequence (pattern);
 182 }
 183
 184 /* Compile a regexp if necessary, but first check to see if there's one in
 185    the cache.
 186    PATTERN is the pattern to compile.
 187    TRANSLATE is a translation table for ignoring case, or nil for none.
 188    REGP is the structure that says where to store the "register"
 189    values that will result from matching this pattern.
 190    If it is 0, we should compile the pattern not to record any
 191    subexpression bounds.
 192    POSIX is nonzero if we want full backtracking (POSIX style)
 193    for this pattern.  0 means backtrack only enough to get a valid match.  */
 194
 195 struct re_pattern_buffer *
 196 compile_pattern (pattern, regp, translate, posix, multibyte)
 197      Lisp_Object pattern;
 198      struct re_registers *regp;
 199      Lisp_Object translate;
 200      int posix, multibyte;
 201 {
 202   struct regexp_cache *cp, **cpp;
 203
 204   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 205     {
 206       cp = *cpp;
 207       if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
 208           && !NILP (Fstring_equal (cp->regexp, pattern))
 209           && cp->buf.translate == (! NILP (translate) ? translate : 0)
 210           && cp->posix == posix
 211           && cp->buf.multibyte == multibyte)
 212         break;
 213
 214       /* If we're at the end of the cache, compile into the last cell.  */
 215       if (cp->next == 0)
 216         {
 217           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 218           break;
 219         }
 220     }
 221
 222   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 223      either because we found it in the cache or because we just compiled it.
 224      Move it to the front of the queue to mark it as most recently used.  */
 225   *cpp = cp->next;
 226   cp->next = searchbuf_head;
 227   searchbuf_head = cp;
 228
 229   /* Advise the searching functions about the space we have allocated
 230      for register data.  */
 231   if (regp)
 232     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 233
 234   return &cp->buf;
 235 }
 236
 237 /* Error condition used for failing searches */
 238 Lisp_Object Qsearch_failed;
 239
 240 Lisp_Object
 241 signal_failure (arg)
 242      Lisp_Object arg;
 243 {
 244   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 245   return Qnil;
 246 }
 247 \f
 248 static Lisp_Object
 249 looking_at_1 (string, posix)
 250      Lisp_Object string;
 251      int posix;
 252 {
 253   Lisp_Object val;
 254   unsigned char *p1, *p2;
 255   int s1, s2;
 256   register int i;
 257   struct re_pattern_buffer *bufp;
 258
 259   if (running_asynch_code)
 260     save_search_regs ();
 261
 262   CHECK_STRING (string, 0);
 263   bufp = compile_pattern (string, &search_regs,
 264                           (!NILP (current_buffer->case_fold_search)
 265                            ? DOWNCASE_TABLE : Qnil),
 266                           posix,
 267                           !NILP (current_buffer->enable_multibyte_characters));
 268
 269   immediate_quit = 1;
 270   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 271
 272   /* Get pointers and sizes of the two strings
 273      that make up the visible portion of the buffer. */
 274
 275   p1 = BEGV_ADDR;
 276   s1 = GPT_BYTE - BEGV_BYTE;
 277   p2 = GAP_END_ADDR;
 278   s2 = ZV_BYTE - GPT_BYTE;
 279   if (s1 < 0)
 280     {
 281       p2 = p1;
 282       s2 = ZV_BYTE - BEGV_BYTE;
 283       s1 = 0;
 284     }
 285   if (s2 < 0)
 286     {
 287       s1 = ZV_BYTE - BEGV_BYTE;
 288       s2 = 0;
 289     }
 290
 291   re_match_object = Qnil;
 292
 293   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 294                   PT_BYTE - BEGV_BYTE, &search_regs,
 295                   ZV_BYTE - BEGV_BYTE);
 296   if (i == -2)
 297     matcher_overflow ();
 298
 299   val = (0 <= i ? Qt : Qnil);
 300   if (i >= 0)
 301     for (i = 0; i < search_regs.num_regs; i++)
 302       if (search_regs.start[i] >= 0)
 303         {
 304           search_regs.start[i]
 305             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 306           search_regs.end[i]
 307             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 308         }
 309   XSETBUFFER (last_thing_searched, current_buffer);
 310   immediate_quit = 0;
 311   return val;
 312 }
 313
 314 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 315   "Return t if text after point matches regular expression REGEXP.\n\
 316 This function modifies the match data that `match-beginning',\n\
 317 `match-end' and `match-data' access; save and restore the match\n\
 318 data if you want to preserve them.")
 319   (regexp)
 320      Lisp_Object regexp;
 321 {
 322   return looking_at_1 (regexp, 0);
 323 }
 324
 325 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 326   "Return t if text after point matches regular expression REGEXP.\n\
 327 Find the longest match, in accord with Posix regular expression rules.\n\
 328 This function modifies the match data that `match-beginning',\n\
 329 `match-end' and `match-data' access; save and restore the match\n\
 330 data if you want to preserve them.")
 331   (regexp)
 332      Lisp_Object regexp;
 333 {
 334   return looking_at_1 (regexp, 1);
 335 }
 336 \f
 337 static Lisp_Object
 338 string_match_1 (regexp, string, start, posix)
 339      Lisp_Object regexp, string, start;
 340      int posix;
 341 {
 342   int val;
 343   struct re_pattern_buffer *bufp;
 344   int pos, pos_byte;
 345   int i;
 346
 347   if (running_asynch_code)
 348     save_search_regs ();
 349
 350   CHECK_STRING (regexp, 0);
 351   CHECK_STRING (string, 1);
 352
 353   if (NILP (start))
 354     pos = 0, pos_byte = 0;
 355   else
 356     {
 357       int len = XSTRING (string)->size;
 358
 359       CHECK_NUMBER (start, 2);
 360       pos = XINT (start);
 361       if (pos < 0 && -pos <= len)
 362         pos = len + pos;
 363       else if (0 > pos || pos > len)
 364         args_out_of_range (string, start);
 365       pos_byte = string_char_to_byte (string, pos);
 366     }
 367
 368   bufp = compile_pattern (regexp, &search_regs,
 369                           (!NILP (current_buffer->case_fold_search)
 370                            ? DOWNCASE_TABLE : Qnil),
 371                           posix,
 372                           STRING_MULTIBYTE (string));
 373   immediate_quit = 1;
 374   re_match_object = string;
 375
 376   val = re_search (bufp, (char *) XSTRING (string)->data,
 377                    STRING_BYTES (XSTRING (string)), pos_byte,
 378                    STRING_BYTES (XSTRING (string)) - pos_byte,
 379                    &search_regs);
 380   immediate_quit = 0;
 381   last_thing_searched = Qt;
 382   if (val == -2)
 383     matcher_overflow ();
 384   if (val < 0) return Qnil;
 385
 386   for (i = 0; i < search_regs.num_regs; i++)
 387     if (search_regs.start[i] >= 0)
 388       {
 389         search_regs.start[i]
 390           = string_byte_to_char (string, search_regs.start[i]);
 391         search_regs.end[i]
 392           = string_byte_to_char (string, search_regs.end[i]);
 393       }
 394
 395   return make_number (string_byte_to_char (string, val));
 396 }
 397
 398 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 399   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 400 If third arg START is non-nil, start search at that index in STRING.\n\
 401 For index of first char beyond the match, do (match-end 0).\n\
 402 `match-end' and `match-beginning' also give indices of substrings\n\
 403 matched by parenthesis constructs in the pattern.")
 404   (regexp, string, start)
 405      Lisp_Object regexp, string, start;
 406 {
 407   return string_match_1 (regexp, string, start, 0);
 408 }
 409
 410 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 411   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 412 Find the longest match, in accord with Posix regular expression rules.\n\
 413 If third arg START is non-nil, start search at that index in STRING.\n\
 414 For index of first char beyond the match, do (match-end 0).\n\
 415 `match-end' and `match-beginning' also give indices of substrings\n\
 416 matched by parenthesis constructs in the pattern.")
 417   (regexp, string, start)
 418      Lisp_Object regexp, string, start;
 419 {
 420   return string_match_1 (regexp, string, start, 1);
 421 }
 422
 423 /* Match REGEXP against STRING, searching all of STRING,
 424    and return the index of the match, or negative on failure.
 425    This does not clobber the match data.  */
 426
 427 int
 428 fast_string_match (regexp, string)
 429      Lisp_Object regexp, string;
 430 {
 431   int val;
 432   struct re_pattern_buffer *bufp;
 433
 434   bufp = compile_pattern (regexp, 0, Qnil,
 435                           0, STRING_MULTIBYTE (string));
 436   immediate_quit = 1;
 437   re_match_object = string;
 438
 439   val = re_search (bufp, (char *) XSTRING (string)->data,
 440                    STRING_BYTES (XSTRING (string)), 0,
 441                    STRING_BYTES (XSTRING (string)), 0);
 442   immediate_quit = 0;
 443   return val;
 444 }
 445
 446 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 447    and return the index of the match, or negative on failure.
 448    This does not clobber the match data.
 449    We assume that STRING contains single-byte characters.  */
 450
 451 extern Lisp_Object Vascii_downcase_table;
 452
 453 int
 454 fast_c_string_match_ignore_case (regexp, string)
 455      Lisp_Object regexp;
 456      char *string;
 457 {
 458   int val;
 459   struct re_pattern_buffer *bufp;
 460   int len = strlen (string);
 461
 462   regexp = string_make_unibyte (regexp);
 463   re_match_object = Qt;
 464   bufp = compile_pattern (regexp, 0,
 465                           Vascii_downcase_table, 0,
 466                           0);
 467   immediate_quit = 1;
 468   val = re_search (bufp, string, len, 0, len, 0);
 469   immediate_quit = 0;
 470   return val;
 471 }
 472 \f
 473 /* The newline cache: remembering which sections of text have no newlines.  */
 474
 475 /* If the user has requested newline caching, make sure it's on.
 476    Otherwise, make sure it's off.
 477    This is our cheezy way of associating an action with the change of
 478    state of a buffer-local variable.  */
 479 static void
 480 newline_cache_on_off (buf)
 481      struct buffer *buf;
 482 {
 483   if (NILP (buf->cache_long_line_scans))
 484     {
 485       /* It should be off.  */
 486       if (buf->newline_cache)
 487         {
 488           free_region_cache (buf->newline_cache);
 489           buf->newline_cache = 0;
 490         }
 491     }
 492   else
 493     {
 494       /* It should be on.  */
 495       if (buf->newline_cache == 0)
 496         buf->newline_cache = new_region_cache ();
 497     }
 498 }
 499
 500 \f
 501 /* Search for COUNT instances of the character TARGET between START and END.
 502
 503    If COUNT is positive, search forwards; END must be >= START.
 504    If COUNT is negative, search backwards for the -COUNTth instance;
 505       END must be <= START.
 506    If COUNT is zero, do anything you please; run rogue, for all I care.
 507
 508    If END is zero, use BEGV or ZV instead, as appropriate for the
 509    direction indicated by COUNT.
 510
 511    If we find COUNT instances, set *SHORTAGE to zero, and return the
 512    position after the COUNTth match.  Note that for reverse motion
 513    this is not the same as the usual convention for Emacs motion commands.
 514
 515    If we don't find COUNT instances before reaching END, set *SHORTAGE
 516    to the number of TARGETs left unfound, and return END.
 517
 518    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 519    except when inside redisplay.  */
 520
 521 int
 522 scan_buffer (target, start, end, count, shortage, allow_quit)
 523      register int target;
 524      int start, end;
 525      int count;
 526      int *shortage;
 527      int allow_quit;
 528 {
 529   struct region_cache *newline_cache;
 530   int direction;
 531
 532   if (count > 0)
 533     {
 534       direction = 1;
 535       if (! end) end = ZV;
 536     }
 537   else
 538     {
 539       direction = -1;
 540       if (! end) end = BEGV;
 541     }
 542
 543   newline_cache_on_off (current_buffer);
 544   newline_cache = current_buffer->newline_cache;
 545
 546   if (shortage != 0)
 547     *shortage = 0;
 548
 549   immediate_quit = allow_quit;
 550
 551   if (count > 0)
 552     while (start != end)
 553       {
 554         /* Our innermost scanning loop is very simple; it doesn't know
 555            about gaps, buffer ends, or the newline cache.  ceiling is
 556            the position of the last character before the next such
 557            obstacle --- the last character the dumb search loop should
 558            examine.  */
 559         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 560         int start_byte = CHAR_TO_BYTE (start);
 561         int tem;
 562
 563         /* If we're looking for a newline, consult the newline cache
 564            to see where we can avoid some scanning.  */
 565         if (target == '\n' && newline_cache)
 566           {
 567             int next_change;
 568             immediate_quit = 0;
 569             while (region_cache_forward
 570                    (current_buffer, newline_cache, start_byte, &next_change))
 571               start_byte = next_change;
 572             immediate_quit = allow_quit;
 573
 574             /* START should never be after END.  */
 575             if (start_byte > ceiling_byte)
 576               start_byte = ceiling_byte;
 577
 578             /* Now the text after start is an unknown region, and
 579                next_change is the position of the next known region. */
 580             ceiling_byte = min (next_change - 1, ceiling_byte);
 581           }
 582
 583         /* The dumb loop can only scan text stored in contiguous
 584            bytes. BUFFER_CEILING_OF returns the last character
 585            position that is contiguous, so the ceiling is the
 586            position after that.  */
 587         tem = BUFFER_CEILING_OF (start_byte);
 588         ceiling_byte = min (tem, ceiling_byte);
 589
 590         {
 591           /* The termination address of the dumb loop.  */
 592           register unsigned char *ceiling_addr
 593             = BYTE_POS_ADDR (ceiling_byte) + 1;
 594           register unsigned char *cursor
 595             = BYTE_POS_ADDR (start_byte);
 596           unsigned char *base = cursor;
 597
 598           while (cursor < ceiling_addr)
 599             {
 600               unsigned char *scan_start = cursor;
 601
 602               /* The dumb loop.  */
 603               while (*cursor != target && ++cursor < ceiling_addr)
 604                 ;
 605
 606               /* If we're looking for newlines, cache the fact that
 607                  the region from start to cursor is free of them. */
 608               if (target == '\n' && newline_cache)
 609                 know_region_cache (current_buffer, newline_cache,
 610                                    start_byte + scan_start - base,
 611                                    start_byte + cursor - base);
 612
 613               /* Did we find the target character?  */
 614               if (cursor < ceiling_addr)
 615                 {
 616                   if (--count == 0)
 617                     {
 618                       immediate_quit = 0;
 619                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 620                     }
 621                   cursor++;
 622                 }
 623             }
 624
 625           start = BYTE_TO_CHAR (start_byte + cursor - base);
 626         }
 627       }
 628   else
 629     while (start > end)
 630       {
 631         /* The last character to check before the next obstacle.  */
 632         int ceiling_byte = CHAR_TO_BYTE (end);
 633         int start_byte = CHAR_TO_BYTE (start);
 634         int tem;
 635
 636         /* Consult the newline cache, if appropriate.  */
 637         if (target == '\n' && newline_cache)
 638           {
 639             int next_change;
 640             immediate_quit = 0;
 641             while (region_cache_backward
 642                    (current_buffer, newline_cache, start_byte, &next_change))
 643               start_byte = next_change;
 644             immediate_quit = allow_quit;
 645
 646             /* Start should never be at or before end.  */
 647             if (start_byte <= ceiling_byte)
 648               start_byte = ceiling_byte + 1;
 649
 650             /* Now the text before start is an unknown region, and
 651                next_change is the position of the next known region. */
 652             ceiling_byte = max (next_change, ceiling_byte);
 653           }
 654
 655         /* Stop scanning before the gap.  */
 656         tem = BUFFER_FLOOR_OF (start_byte - 1);
 657         ceiling_byte = max (tem, ceiling_byte);
 658
 659         {
 660           /* The termination address of the dumb loop.  */
 661           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 662           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 663           unsigned char *base = cursor;
 664
 665           while (cursor >= ceiling_addr)
 666             {
 667               unsigned char *scan_start = cursor;
 668
 669               while (*cursor != target && --cursor >= ceiling_addr)
 670                 ;
 671
 672               /* If we're looking for newlines, cache the fact that
 673                  the region from after the cursor to start is free of them.  */
 674               if (target == '\n' && newline_cache)
 675                 know_region_cache (current_buffer, newline_cache,
 676                                    start_byte + cursor - base,
 677                                    start_byte + scan_start - base);
 678
 679               /* Did we find the target character?  */
 680               if (cursor >= ceiling_addr)
 681                 {
 682                   if (++count >= 0)
 683                     {
 684                       immediate_quit = 0;
 685                       return BYTE_TO_CHAR (start_byte + cursor - base);
 686                     }
 687                   cursor--;
 688                 }
 689             }
 690
 691           start = BYTE_TO_CHAR (start_byte + cursor - base);
 692         }
 693       }
 694
 695   immediate_quit = 0;
 696   if (shortage != 0)
 697     *shortage = count * direction;
 698   return start;
 699 }
 700 \f
 701 /* Search for COUNT instances of a line boundary, which means either a
 702    newline or (if selective display enabled) a carriage return.
 703    Start at START.  If COUNT is negative, search backwards.
 704
 705    We report the resulting position by calling TEMP_SET_PT_BOTH.
 706
 707    If we find COUNT instances. we position after (always after,
 708    even if scanning backwards) the COUNTth match, and return 0.
 709
 710    If we don't find COUNT instances before reaching the end of the
 711    buffer (or the beginning, if scanning backwards), we return
 712    the number of line boundaries left unfound, and position at
 713    the limit we bumped up against.
 714
 715    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 716    except in special cases.  */
 717
 718 int
 719 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 720      int start, start_byte;
 721      int limit, limit_byte;
 722      register int count;
 723      int allow_quit;
 724 {
 725   int direction = ((count > 0) ? 1 : -1);
 726
 727   register unsigned char *cursor;
 728   unsigned char *base;
 729
 730   register int ceiling;
 731   register unsigned char *ceiling_addr;
 732
 733   int old_immediate_quit = immediate_quit;
 734
 735   /* If we are not in selective display mode,
 736      check only for newlines.  */
 737   int selective_display = (!NILP (current_buffer->selective_display)
 738                            && !INTEGERP (current_buffer->selective_display));
 739
 740   /* The code that follows is like scan_buffer
 741      but checks for either newline or carriage return.  */
 742
 743   if (allow_quit)
 744     immediate_quit++;
 745
 746   start_byte = CHAR_TO_BYTE (start);
 747
 748   if (count > 0)
 749     {
 750       while (start_byte < limit_byte)
 751         {
 752           ceiling =  BUFFER_CEILING_OF (start_byte);
 753           ceiling = min (limit_byte - 1, ceiling);
 754           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 755           base = (cursor = BYTE_POS_ADDR (start_byte));
 756           while (1)
 757             {
 758               while (*cursor != '\n' && ++cursor != ceiling_addr)
 759                 ;
 760
 761               if (cursor != ceiling_addr)
 762                 {
 763                   if (--count == 0)
 764                     {
 765                       immediate_quit = old_immediate_quit;
 766                       start_byte = start_byte + cursor - base + 1;
 767                       start = BYTE_TO_CHAR (start_byte);
 768                       TEMP_SET_PT_BOTH (start, start_byte);
 769                       return 0;
 770                     }
 771                   else
 772                     if (++cursor == ceiling_addr)
 773                       break;
 774                 }
 775               else
 776                 break;
 777             }
 778           start_byte += cursor - base;
 779         }
 780     }
 781   else
 782     {
 783       while (start_byte > limit_byte)
 784         {
 785           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 786           ceiling = max (limit_byte, ceiling);
 787           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 788           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 789           while (1)
 790             {
 791               while (--cursor != ceiling_addr && *cursor != '\n')
 792                 ;
 793
 794               if (cursor != ceiling_addr)
 795                 {
 796                   if (++count == 0)
 797                     {
 798                       immediate_quit = old_immediate_quit;
 799                       /* Return the position AFTER the match we found.  */
 800                       start_byte = start_byte + cursor - base + 1;
 801                       start = BYTE_TO_CHAR (start_byte);
 802                       TEMP_SET_PT_BOTH (start, start_byte);
 803                       return 0;
 804                     }
 805                 }
 806               else
 807                 break;
 808             }
 809           /* Here we add 1 to compensate for the last decrement
 810              of CURSOR, which took it past the valid range.  */
 811           start_byte += cursor - base + 1;
 812         }
 813     }
 814
 815   TEMP_SET_PT_BOTH (limit, limit_byte);
 816   immediate_quit = old_immediate_quit;
 817
 818   return count * direction;
 819 }
 820
 821 int
 822 find_next_newline_no_quit (from, cnt)
 823      register int from, cnt;
 824 {
 825   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 826 }
 827
 828 /* Like find_next_newline, but returns position before the newline,
 829    not after, and only search up to TO.  This isn't just
 830    find_next_newline (...)-1, because you might hit TO.  */
 831
 832 int
 833 find_before_next_newline (from, to, cnt)
 834      int from, to, cnt;
 835 {
 836   int shortage;
 837   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 838
 839   if (shortage == 0)
 840     pos--;
 841
 842   return pos;
 843 }
 844 \f
 845 /* Subroutines of Lisp buffer search functions. */
 846
 847 static Lisp_Object
 848 search_command (string, bound, noerror, count, direction, RE, posix)
 849      Lisp_Object string, bound, noerror, count;
 850      int direction;
 851      int RE;
 852      int posix;
 853 {
 854   register int np;
 855   int lim, lim_byte;
 856   int n = direction;
 857
 858   if (!NILP (count))
 859     {
 860       CHECK_NUMBER (count, 3);
 861       n *= XINT (count);
 862     }
 863
 864   CHECK_STRING (string, 0);
 865   if (NILP (bound))
 866     {
 867       if (n > 0)
 868         lim = ZV, lim_byte = ZV_BYTE;
 869       else
 870         lim = BEGV, lim_byte = BEGV_BYTE;
 871     }
 872   else
 873     {
 874       CHECK_NUMBER_COERCE_MARKER (bound, 1);
 875       lim = XINT (bound);
 876       if (n > 0 ? lim < PT : lim > PT)
 877         error ("Invalid search bound (wrong side of point)");
 878       if (lim > ZV)
 879         lim = ZV, lim_byte = ZV_BYTE;
 880       else if (lim < BEGV)
 881         lim = BEGV, lim_byte = BEGV_BYTE;
 882       else
 883         lim_byte = CHAR_TO_BYTE (lim);
 884     }
 885
 886   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 887                       (!NILP (current_buffer->case_fold_search)
 888                        ? current_buffer->case_canon_table
 889                        : Qnil),
 890                       (!NILP (current_buffer->case_fold_search)
 891                        ? current_buffer->case_eqv_table
 892                        : Qnil),
 893                       posix);
 894   if (np <= 0)
 895     {
 896       if (NILP (noerror))
 897         return signal_failure (string);
 898       if (!EQ (noerror, Qt))
 899         {
 900           if (lim < BEGV || lim > ZV)
 901             abort ();
 902           SET_PT_BOTH (lim, lim_byte);
 903           return Qnil;
 904 #if 0 /* This would be clean, but maybe programs depend on
 905          a value of nil here.  */
 906           np = lim;
 907 #endif
 908         }
 909       else
 910         return Qnil;
 911     }
 912
 913   if (np < BEGV || np > ZV)
 914     abort ();
 915
 916   SET_PT (np);
 917
 918   return make_number (np);
 919 }
 920 \f
 921 /* Return 1 if REGEXP it matches just one constant string.  */
 922
 923 static int
 924 trivial_regexp_p (regexp)
 925      Lisp_Object regexp;
 926 {
 927   int len = STRING_BYTES (XSTRING (regexp));
 928   unsigned char *s = XSTRING (regexp)->data;
 929   unsigned char c;
 930   while (--len >= 0)
 931     {
 932       switch (*s++)
 933         {
 934         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 935           return 0;
 936         case '\\':
 937           if (--len < 0)
 938             return 0;
 939           switch (*s++)
 940             {
 941             case '|': case '(': case ')': case '`': case '\'': case 'b':
 942             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 943             case 'S': case '=':
 944             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 945             case '1': case '2': case '3': case '4': case '5':
 946             case '6': case '7': case '8': case '9':
 947               return 0;
 948             }
 949         }
 950     }
 951   return 1;
 952 }
 953
 954 /* Search for the n'th occurrence of STRING in the current buffer,
 955    starting at position POS and stopping at position LIM,
 956    treating STRING as a literal string if RE is false or as
 957    a regular expression if RE is true.
 958
 959    If N is positive, searching is forward and LIM must be greater than POS.
 960    If N is negative, searching is backward and LIM must be less than POS.
 961
 962    Returns -x if x occurrences remain to be found (x > 0),
 963    or else the position at the beginning of the Nth occurrence
 964    (if searching backward) or the end (if searching forward).
 965
 966    POSIX is nonzero if we want full backtracking (POSIX style)
 967    for this pattern.  0 means backtrack only enough to get a valid match.  */
 968
 969 #define TRANSLATE(out, trt, d)                  \
 970 do                                              \
 971   {                                             \
 972     if (! NILP (trt))                           \
 973       {                                         \
 974         Lisp_Object temp;                       \
 975         temp = Faref (trt, make_number (d));    \
 976         if (INTEGERP (temp))                    \
 977           out = XINT (temp);                    \
 978         else                                    \
 979           out = d;                              \
 980       }                                         \
 981     else                                        \
 982       out = d;                                  \
 983   }                                             \
 984 while (0)
 985
 986 static int
 987 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
 988                RE, trt, inverse_trt, posix)
 989      Lisp_Object string;
 990      int pos;
 991      int pos_byte;
 992      int lim;
 993      int lim_byte;
 994      int n;
 995      int RE;
 996      Lisp_Object trt;
 997      Lisp_Object inverse_trt;
 998      int posix;
 999 {
1000   int len = XSTRING (string)->size;
1001   int len_byte = STRING_BYTES (XSTRING (string));
1002   register int i;
1003
1004   if (running_asynch_code)
1005     save_search_regs ();
1006
1007   /* Null string is found at starting position.  */
1008   if (len == 0)
1009     {
1010       set_search_regs (pos, 0);
1011       return pos;
1012     }
1013
1014   /* Searching 0 times means don't move.  */
1015   if (n == 0)
1016     return pos;
1017
1018   if (RE && !trivial_regexp_p (string))
1019     {
1020       unsigned char *p1, *p2;
1021       int s1, s2;
1022       struct re_pattern_buffer *bufp;
1023
1024       bufp = compile_pattern (string, &search_regs, trt, posix,
1025                               !NILP (current_buffer->enable_multibyte_characters));
1026
1027       immediate_quit = 1;       /* Quit immediately if user types ^G,
1028                                    because letting this function finish
1029                                    can take too long. */
1030       QUIT;                     /* Do a pending quit right away,
1031                                    to avoid paradoxical behavior */
1032       /* Get pointers and sizes of the two strings
1033          that make up the visible portion of the buffer. */
1034
1035       p1 = BEGV_ADDR;
1036       s1 = GPT_BYTE - BEGV_BYTE;
1037       p2 = GAP_END_ADDR;
1038       s2 = ZV_BYTE - GPT_BYTE;
1039       if (s1 < 0)
1040         {
1041           p2 = p1;
1042           s2 = ZV_BYTE - BEGV_BYTE;
1043           s1 = 0;
1044         }
1045       if (s2 < 0)
1046         {
1047           s1 = ZV_BYTE - BEGV_BYTE;
1048           s2 = 0;
1049         }
1050       re_match_object = Qnil;
1051
1052       while (n < 0)
1053         {
1054           int val;
1055           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1056                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1057                              &search_regs,
1058                              /* Don't allow match past current point */
1059                              pos_byte - BEGV_BYTE);
1060           if (val == -2)
1061             {
1062               matcher_overflow ();
1063             }
1064           if (val >= 0)
1065             {
1066               pos_byte = search_regs.start[0] + BEGV_BYTE;
1067               for (i = 0; i < search_regs.num_regs; i++)
1068                 if (search_regs.start[i] >= 0)
1069                   {
1070                     search_regs.start[i]
1071                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1072                     search_regs.end[i]
1073                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1074                   }
1075               XSETBUFFER (last_thing_searched, current_buffer);
1076               /* Set pos to the new position. */
1077               pos = search_regs.start[0];
1078             }
1079           else
1080             {
1081               immediate_quit = 0;
1082               return (n);
1083             }
1084           n++;
1085         }
1086       while (n > 0)
1087         {
1088           int val;
1089           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1090                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1091                              &search_regs,
1092                              lim_byte - BEGV_BYTE);
1093           if (val == -2)
1094             {
1095               matcher_overflow ();
1096             }
1097           if (val >= 0)
1098             {
1099               pos_byte = search_regs.end[0] + BEGV_BYTE;
1100               for (i = 0; i < search_regs.num_regs; i++)
1101                 if (search_regs.start[i] >= 0)
1102                   {
1103                     search_regs.start[i]
1104                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1105                     search_regs.end[i]
1106                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1107                   }
1108               XSETBUFFER (last_thing_searched, current_buffer);
1109               pos = search_regs.end[0];
1110             }
1111           else
1112             {
1113               immediate_quit = 0;
1114               return (0 - n);
1115             }
1116           n--;
1117         }
1118       immediate_quit = 0;
1119       return (pos);
1120     }
1121   else                          /* non-RE case */
1122     {
1123       unsigned char *raw_pattern, *pat;
1124       int raw_pattern_size;
1125       int raw_pattern_size_byte;
1126       unsigned char *patbuf;
1127       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1128       unsigned char *base_pat = XSTRING (string)->data;
1129       int charset_base = -1;
1130       int simple = 1;
1131
1132       /* MULTIBYTE says whether the text to be searched is multibyte.
1133          We must convert PATTERN to match that, or we will not really
1134          find things right.  */
1135
1136       if (multibyte == STRING_MULTIBYTE (string))
1137         {
1138           raw_pattern = (char *) XSTRING (string)->data;
1139           raw_pattern_size = XSTRING (string)->size;
1140           raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1141         }
1142       else if (multibyte)
1143         {
1144           raw_pattern_size = XSTRING (string)->size;
1145           raw_pattern_size_byte
1146             = count_size_as_multibyte (XSTRING (string)->data,
1147                                        raw_pattern_size);
1148           raw_pattern = (char *) alloca (raw_pattern_size_byte + 1);
1149           copy_text (XSTRING (string)->data, raw_pattern,
1150                      XSTRING (string)->size, 0, 1);
1151         }
1152       else
1153         {
1154           /* Converting multibyte to single-byte.
1155
1156              ??? Perhaps this conversion should be done in a special way
1157              by subtracting nonascii-insert-offset from each non-ASCII char,
1158              so that only the multibyte chars which really correspond to
1159              the chosen single-byte character set can possibly match.  */
1160           raw_pattern_size = XSTRING (string)->size;
1161           raw_pattern_size_byte = XSTRING (string)->size;
1162           raw_pattern = (char *) alloca (raw_pattern_size + 1);
1163           copy_text (XSTRING (string)->data, raw_pattern,
1164                      STRING_BYTES (XSTRING (string)), 1, 0);
1165         }
1166
1167       /* Copy and optionally translate the pattern.  */
1168       len = raw_pattern_size;
1169       len_byte = raw_pattern_size_byte;
1170       patbuf = (unsigned char *) alloca (len_byte);
1171       pat = patbuf;
1172       base_pat = raw_pattern;
1173       if (multibyte)
1174         {
1175           while (--len >= 0)
1176             {
1177               unsigned char workbuf[4], *str;
1178               int c, translated, inverse;
1179               int in_charlen, charlen;
1180
1181               /* If we got here and the RE flag is set, it's because we're
1182                  dealing with a regexp known to be trivial, so the backslash
1183                  just quotes the next character.  */
1184               if (RE && *base_pat == '\\')
1185                 {
1186                   len--;
1187                   len_byte--;
1188                   base_pat++;
1189                 }
1190
1191               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1192               /* Translate the character, if requested.  */
1193               TRANSLATE (translated, trt, c);
1194               /* If translation changed the byte-length, go back
1195                  to the original character.  */
1196               charlen = CHAR_STRING (translated, workbuf, str);
1197               if (in_charlen != charlen)
1198                 {
1199                   translated = c;
1200                   charlen = CHAR_STRING (c, workbuf, str);
1201                 }
1202
1203               TRANSLATE (inverse, inverse_trt, c);
1204
1205               /* Did this char actually get translated?
1206                  Would any other char get translated into it?  */
1207               if (translated != c || inverse != c)
1208                 {
1209                   /* Keep track of which character set row
1210                      contains the characters that need translation.  */
1211                   int charset_base_code = c & ~0xff;
1212                   if (charset_base == -1)
1213                     charset_base = charset_base_code;
1214                   else if (charset_base != charset_base_code)
1215                     /* If two different rows appear, needing translation,
1216                        then we cannot use boyer_moore search.  */
1217                     simple = 0;
1218                     /* ??? Handa: this must do simple = 0
1219                        if c is a composite character.  */
1220                 }
1221
1222               /* Store this character into the translated pattern.  */
1223               bcopy (str, pat, charlen);
1224               pat += charlen;
1225               base_pat += in_charlen;
1226               len_byte -= in_charlen;
1227             }
1228         }
1229       else
1230         {
1231           while (--len >= 0)
1232             {
1233               int c, translated, inverse;
1234
1235               /* If we got here and the RE flag is set, it's because we're
1236                  dealing with a regexp known to be trivial, so the backslash
1237                  just quotes the next character.  */
1238               if (RE && *base_pat == '\\')
1239                 {
1240                   len--;
1241                   base_pat++;
1242                 }
1243               c = *base_pat++;
1244               TRANSLATE (translated, trt, c);
1245               TRANSLATE (inverse, inverse_trt, c);
1246
1247               /* Did this char actually get translated?
1248                  Would any other char get translated into it?  */
1249               if (translated != c || inverse != c)
1250                 {
1251                   /* Keep track of which character set row
1252                      contains the characters that need translation.  */
1253                   int charset_base_code = c & ~0xff;
1254                   if (charset_base == -1)
1255                     charset_base = charset_base_code;
1256                   else if (charset_base != charset_base_code)
1257                     /* If two different rows appear, needing translation,
1258                        then we cannot use boyer_moore search.  */
1259                     simple = 0;
1260                 }
1261               *pat++ = translated;
1262             }
1263         }
1264
1265       len_byte = pat - patbuf;
1266       len = raw_pattern_size;
1267       pat = base_pat = patbuf;
1268
1269       if (simple)
1270         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1271                             pos, pos_byte, lim, lim_byte,
1272                             charset_base);
1273       else
1274         return simple_search (n, pat, len, len_byte, trt,
1275                               pos, pos_byte, lim, lim_byte);
1276     }
1277 }
1278 \f
1279 /* Do a simple string search N times for the string PAT,
1280    whose length is LEN/LEN_BYTE,
1281    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1282    TRT is the translation table.
1283
1284    Return the character position where the match is found.
1285    Otherwise, if M matches remained to be found, return -M.
1286
1287    This kind of search works regardless of what is in PAT and
1288    regardless of what is in TRT.  It is used in cases where
1289    boyer_moore cannot work.  */
1290
1291 static int
1292 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1293      int n;
1294      unsigned char *pat;
1295      int len, len_byte;
1296      Lisp_Object trt;
1297      int pos, pos_byte;
1298      int lim, lim_byte;
1299 {
1300   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1301   int forward = n > 0;
1302
1303   if (lim > pos && multibyte)
1304     while (n > 0)
1305       {
1306         while (1)
1307           {
1308             /* Try matching at position POS.  */
1309             int this_pos = pos;
1310             int this_pos_byte = pos_byte;
1311             int this_len = len;
1312             int this_len_byte = len_byte;
1313             unsigned char *p = pat;
1314             if (pos + len > lim)
1315               goto stop;
1316
1317             while (this_len > 0)
1318               {
1319                 int charlen, buf_charlen;
1320                 int pat_ch, buf_ch;
1321
1322                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1323                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1324                                                  ZV_BYTE - this_pos_byte,
1325                                                  buf_charlen);
1326                 TRANSLATE (buf_ch, trt, buf_ch);
1327
1328                 if (buf_ch != pat_ch)
1329                   break;
1330
1331                 this_len_byte -= charlen;
1332                 this_len--;
1333                 p += charlen;
1334
1335                 this_pos_byte += buf_charlen;
1336                 this_pos++;
1337               }
1338
1339             if (this_len == 0)
1340               {
1341                 pos += len;
1342                 pos_byte += len_byte;
1343                 break;
1344               }
1345
1346             INC_BOTH (pos, pos_byte);
1347           }
1348
1349         n--;
1350       }
1351   else if (lim > pos)
1352     while (n > 0)
1353       {
1354         while (1)
1355           {
1356             /* Try matching at position POS.  */
1357             int this_pos = pos;
1358             int this_len = len;
1359             unsigned char *p = pat;
1360
1361             if (pos + len > lim)
1362               goto stop;
1363
1364             while (this_len > 0)
1365               {
1366                 int pat_ch = *p++;
1367                 int buf_ch = FETCH_BYTE (this_pos);
1368                 TRANSLATE (buf_ch, trt, buf_ch);
1369
1370                 if (buf_ch != pat_ch)
1371                   break;
1372
1373                 this_len--;
1374                 this_pos++;
1375               }
1376
1377             if (this_len == 0)
1378               {
1379                 pos += len;
1380                 break;
1381               }
1382
1383             pos++;
1384           }
1385
1386         n--;
1387       }
1388   /* Backwards search.  */
1389   else if (lim < pos && multibyte)
1390     while (n < 0)
1391       {
1392         while (1)
1393           {
1394             /* Try matching at position POS.  */
1395             int this_pos = pos - len;
1396             int this_pos_byte = pos_byte - len_byte;
1397             int this_len = len;
1398             int this_len_byte = len_byte;
1399             unsigned char *p = pat;
1400
1401             if (pos - len < lim)
1402               goto stop;
1403
1404             while (this_len > 0)
1405               {
1406                 int charlen, buf_charlen;
1407                 int pat_ch, buf_ch;
1408
1409                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1410                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1411                                                  ZV_BYTE - this_pos_byte,
1412                                                  buf_charlen);
1413                 TRANSLATE (buf_ch, trt, buf_ch);
1414
1415                 if (buf_ch != pat_ch)
1416                   break;
1417
1418                 this_len_byte -= charlen;
1419                 this_len--;
1420                 p += charlen;
1421                 this_pos_byte += buf_charlen;
1422                 this_pos++;
1423               }
1424
1425             if (this_len == 0)
1426               {
1427                 pos -= len;
1428                 pos_byte -= len_byte;
1429                 break;
1430               }
1431
1432             DEC_BOTH (pos, pos_byte);
1433           }
1434
1435         n++;
1436       }
1437   else if (lim < pos)
1438     while (n < 0)
1439       {
1440         while (1)
1441           {
1442             /* Try matching at position POS.  */
1443             int this_pos = pos - len;
1444             int this_len = len;
1445             unsigned char *p = pat;
1446
1447             if (pos - len < lim)
1448               goto stop;
1449
1450             while (this_len > 0)
1451               {
1452                 int pat_ch = *p++;
1453                 int buf_ch = FETCH_BYTE (this_pos);
1454                 TRANSLATE (buf_ch, trt, buf_ch);
1455
1456                 if (buf_ch != pat_ch)
1457                   break;
1458                 this_len--;
1459                 this_pos++;
1460               }
1461
1462             if (this_len == 0)
1463               {
1464                 pos -= len;
1465                 break;
1466               }
1467
1468             pos--;
1469           }
1470
1471         n++;
1472       }
1473
1474  stop:
1475   if (n == 0)
1476     {
1477       if (forward)
1478         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1479       else
1480         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1481
1482       return pos;
1483     }
1484   else if (n > 0)
1485     return -n;
1486   else
1487     return n;
1488 }
1489 \f
1490 /* Do Boyer-Moore search N times for the string PAT,
1491    whose length is LEN/LEN_BYTE,
1492    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1493    DIRECTION says which direction we search in.
1494    TRT and INVERSE_TRT are translation tables.
1495
1496    This kind of search works if all the characters in PAT that have
1497    nontrivial translation are the same aside from the last byte.  This
1498    makes it possible to translate just the last byte of a character,
1499    and do so after just a simple test of the context.
1500
1501    If that criterion is not satisfied, do not call this function.  */
1502
1503 static int
1504 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1505              pos, pos_byte, lim, lim_byte, charset_base)
1506      int n;
1507      unsigned char *base_pat;
1508      int len, len_byte;
1509      Lisp_Object trt;
1510      Lisp_Object inverse_trt;
1511      int pos, pos_byte;
1512      int lim, lim_byte;
1513      int charset_base;
1514 {
1515   int direction = ((n > 0) ? 1 : -1);
1516   register int dirlen;
1517   int infinity, limit, k, stride_for_teases;
1518   register int *BM_tab;
1519   int *BM_tab_base;
1520   register unsigned char *cursor, *p_limit;
1521   register int i, j;
1522   unsigned char *pat;
1523   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1524
1525   unsigned char simple_translate[0400];
1526   int translate_prev_byte;
1527   int translate_anteprev_byte;
1528
1529 #ifdef C_ALLOCA
1530   int BM_tab_space[0400];
1531   BM_tab = &BM_tab_space[0];
1532 #else
1533   BM_tab = (int *) alloca (0400 * sizeof (int));
1534 #endif
1535   /* The general approach is that we are going to maintain that we know */
1536   /* the first (closest to the present position, in whatever direction */
1537   /* we're searching) character that could possibly be the last */
1538   /* (furthest from present position) character of a valid match.  We */
1539   /* advance the state of our knowledge by looking at that character */
1540   /* and seeing whether it indeed matches the last character of the */
1541   /* pattern.  If it does, we take a closer look.  If it does not, we */
1542   /* move our pointer (to putative last characters) as far as is */
1543   /* logically possible.  This amount of movement, which I call a */
1544   /* stride, will be the length of the pattern if the actual character */
1545   /* appears nowhere in the pattern, otherwise it will be the distance */
1546   /* from the last occurrence of that character to the end of the */
1547   /* pattern. */
1548   /* As a coding trick, an enormous stride is coded into the table for */
1549   /* characters that match the last character.  This allows use of only */
1550   /* a single test, a test for having gone past the end of the */
1551   /* permissible match region, to test for both possible matches (when */
1552   /* the stride goes past the end immediately) and failure to */
1553   /* match (where you get nudged past the end one stride at a time). */
1554
1555   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1556   /* is determined only by the last character of the putative match. */
1557   /* If that character does not match, we will stride the proper */
1558   /* distance to propose a match that superimposes it on the last */
1559   /* instance of a character that matches it (per trt), or misses */
1560   /* it entirely if there is none. */
1561
1562   dirlen = len_byte * direction;
1563   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1564   if (direction < 0)
1565     pat = (base_pat += len_byte - 1);
1566   else
1567     pat = base_pat;
1568   BM_tab_base = BM_tab;
1569   BM_tab += 0400;
1570   j = dirlen;           /* to get it in a register */
1571   /* A character that does not appear in the pattern induces a */
1572   /* stride equal to the pattern length. */
1573   while (BM_tab_base != BM_tab)
1574     {
1575       *--BM_tab = j;
1576       *--BM_tab = j;
1577       *--BM_tab = j;
1578       *--BM_tab = j;
1579     }
1580
1581   /* We use this for translation, instead of TRT itself.
1582      We fill this in to handle the characters that actually
1583      occur in the pattern.  Others don't matter anyway!  */
1584   bzero (simple_translate, sizeof simple_translate);
1585   for (i = 0; i < 0400; i++)
1586     simple_translate[i] = i;
1587
1588   i = 0;
1589   while (i != infinity)
1590     {
1591       unsigned char *ptr = pat + i;
1592       i += direction;
1593       if (i == dirlen)
1594         i = infinity;
1595       if (! NILP (trt))
1596         {
1597           int ch;
1598           int untranslated;
1599           int this_translated = 1;
1600
1601           if (multibyte
1602               && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1])))
1603             {
1604               unsigned char *charstart = ptr;
1605               while (! CHAR_HEAD_P (*charstart))
1606                 charstart--;
1607               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1608               if (charset_base == (untranslated & ~0xff))
1609                 {
1610                   TRANSLATE (ch, trt, untranslated);
1611                   if (! CHAR_HEAD_P (*ptr))
1612                     {
1613                       translate_prev_byte = ptr[-1];
1614                       if (! CHAR_HEAD_P (translate_prev_byte))
1615                         translate_anteprev_byte = ptr[-2];
1616                     }
1617                 }
1618               else
1619                 {
1620                   this_translated = 0;
1621                   ch = *ptr;
1622                 }
1623             }
1624           else if (!multibyte)
1625             TRANSLATE (ch, trt, *ptr);
1626           else
1627             {
1628               ch = *ptr;
1629               this_translated = 0;
1630             }
1631
1632           if (ch > 0400)
1633             j = ((unsigned char) ch) | 0200;
1634           else
1635             j = (unsigned char) ch;
1636
1637           if (i == infinity)
1638             stride_for_teases = BM_tab[j];
1639
1640           BM_tab[j] = dirlen - i;
1641           /* A translation table is accompanied by its inverse -- see */
1642           /* comment following downcase_table for details */
1643           if (this_translated)
1644             {
1645               int starting_ch = ch;
1646               int starting_j = j;
1647               while (1)
1648                 {
1649                   TRANSLATE (ch, inverse_trt, ch);
1650                   if (ch > 0400)
1651                     j = ((unsigned char) ch) | 0200;
1652                   else
1653                     j = (unsigned char) ch;
1654
1655                   /* For all the characters that map into CH,
1656                      set up simple_translate to map the last byte
1657                      into STARTING_J.  */
1658                   simple_translate[j] = starting_j;
1659                   if (ch == starting_ch)
1660                     break;
1661                   BM_tab[j] = dirlen - i;
1662                 }
1663             }
1664         }
1665       else
1666         {
1667           j = *ptr;
1668
1669           if (i == infinity)
1670             stride_for_teases = BM_tab[j];
1671           BM_tab[j] = dirlen - i;
1672         }
1673       /* stride_for_teases tells how much to stride if we get a */
1674       /* match on the far character but are subsequently */
1675       /* disappointed, by recording what the stride would have been */
1676       /* for that character if the last character had been */
1677       /* different. */
1678     }
1679   infinity = dirlen - infinity;
1680   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1681   /* loop invariant - POS_BYTE points at where last char (first
1682      char if reverse) of pattern would align in a possible match.  */
1683   while (n != 0)
1684     {
1685       int tail_end;
1686       unsigned char *tail_end_ptr;
1687
1688       /* It's been reported that some (broken) compiler thinks that
1689          Boolean expressions in an arithmetic context are unsigned.
1690          Using an explicit ?1:0 prevents this.  */
1691       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1692           < 0)
1693         return (n * (0 - direction));
1694       /* First we do the part we can by pointers (maybe nothing) */
1695       QUIT;
1696       pat = base_pat;
1697       limit = pos_byte - dirlen + direction;
1698       if (direction > 0)
1699         {
1700           limit = BUFFER_CEILING_OF (limit);
1701           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1702              can take on without hitting edge of buffer or the gap.  */
1703           limit = min (limit, pos_byte + 20000);
1704           limit = min (limit, lim_byte - 1);
1705         }
1706       else
1707         {
1708           limit = BUFFER_FLOOR_OF (limit);
1709           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1710              can take on without hitting edge of buffer or the gap.  */
1711           limit = max (limit, pos_byte - 20000);
1712           limit = max (limit, lim_byte);
1713         }
1714       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1715       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1716
1717       if ((limit - pos_byte) * direction > 20)
1718         {
1719           unsigned char *p2;
1720
1721           p_limit = BYTE_POS_ADDR (limit);
1722           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1723           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1724           while (1)             /* use one cursor setting as long as i can */
1725             {
1726               if (direction > 0) /* worth duplicating */
1727                 {
1728                   /* Use signed comparison if appropriate
1729                      to make cursor+infinity sure to be > p_limit.
1730                      Assuming that the buffer lies in a range of addresses
1731                      that are all "positive" (as ints) or all "negative",
1732                      either kind of comparison will work as long
1733                      as we don't step by infinity.  So pick the kind
1734                      that works when we do step by infinity.  */
1735                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1736                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1737                       cursor += BM_tab[*cursor];
1738                   else
1739                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1740                       cursor += BM_tab[*cursor];
1741                 }
1742               else
1743                 {
1744                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1745                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1746                       cursor += BM_tab[*cursor];
1747                   else
1748                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1749                       cursor += BM_tab[*cursor];
1750                 }
1751 /* If you are here, cursor is beyond the end of the searched region. */
1752 /* This can happen if you match on the far character of the pattern, */
1753 /* because the "stride" of that character is infinity, a number able */
1754 /* to throw you well beyond the end of the search.  It can also */
1755 /* happen if you fail to match within the permitted region and would */
1756 /* otherwise try a character beyond that region */
1757               if ((cursor - p_limit) * direction <= len_byte)
1758                 break;  /* a small overrun is genuine */
1759               cursor -= infinity; /* large overrun = hit */
1760               i = dirlen - direction;
1761               if (! NILP (trt))
1762                 {
1763                   while ((i -= direction) + direction != 0)
1764                     {
1765                       int ch;
1766                       cursor -= direction;
1767                       /* Translate only the last byte of a character.  */
1768                       if (! multibyte
1769                           || ((cursor == tail_end_ptr
1770                                || CHAR_HEAD_P (cursor[1]))
1771                               && (CHAR_HEAD_P (cursor[0])
1772                                   || (translate_prev_byte == cursor[-1]
1773                                       && (CHAR_HEAD_P (translate_prev_byte)
1774                                           || translate_anteprev_byte == cursor[-2])))))
1775                         ch = simple_translate[*cursor];
1776                       else
1777                         ch = *cursor;
1778                       if (pat[i] != ch)
1779                         break;
1780                     }
1781                 }
1782               else
1783                 {
1784                   while ((i -= direction) + direction != 0)
1785                     {
1786                       cursor -= direction;
1787                       if (pat[i] != *cursor)
1788                         break;
1789                     }
1790                 }
1791               cursor += dirlen - i - direction; /* fix cursor */
1792               if (i + direction == 0)
1793                 {
1794                   int position;
1795
1796                   cursor -= direction;
1797
1798                   position = pos_byte + cursor - p2 + ((direction > 0)
1799                                                        ? 1 - len_byte : 0);
1800                   set_search_regs (position, len_byte);
1801
1802                   if ((n -= direction) != 0)
1803                     cursor += dirlen; /* to resume search */
1804                   else
1805                     return ((direction > 0)
1806                             ? search_regs.end[0] : search_regs.start[0]);
1807                 }
1808               else
1809                 cursor += stride_for_teases; /* <sigh> we lose -  */
1810             }
1811           pos_byte += cursor - p2;
1812         }
1813       else
1814         /* Now we'll pick up a clump that has to be done the hard */
1815         /* way because it covers a discontinuity */
1816         {
1817           limit = ((direction > 0)
1818                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1819                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1820           limit = ((direction > 0)
1821                    ? min (limit + len_byte, lim_byte - 1)
1822                    : max (limit - len_byte, lim_byte));
1823           /* LIMIT is now the last value POS_BYTE can have
1824              and still be valid for a possible match.  */
1825           while (1)
1826             {
1827               /* This loop can be coded for space rather than */
1828               /* speed because it will usually run only once. */
1829               /* (the reach is at most len + 21, and typically */
1830               /* does not exceed len) */
1831               while ((limit - pos_byte) * direction >= 0)
1832                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1833               /* now run the same tests to distinguish going off the */
1834               /* end, a match or a phony match. */
1835               if ((pos_byte - limit) * direction <= len_byte)
1836                 break;  /* ran off the end */
1837               /* Found what might be a match.
1838                  Set POS_BYTE back to last (first if reverse) pos.  */
1839               pos_byte -= infinity;
1840               i = dirlen - direction;
1841               while ((i -= direction) + direction != 0)
1842                 {
1843                   int ch;
1844                   unsigned char *ptr;
1845                   pos_byte -= direction;
1846                   ptr = BYTE_POS_ADDR (pos_byte);
1847                   /* Translate only the last byte of a character.  */
1848                   if (! multibyte
1849                       || ((ptr == tail_end_ptr
1850                            || CHAR_HEAD_P (ptr[1]))
1851                           && (CHAR_HEAD_P (ptr[0])
1852                               || (translate_prev_byte == ptr[-1]
1853                                   && (CHAR_HEAD_P (translate_prev_byte)
1854                                       || translate_anteprev_byte == ptr[-2])))))
1855                     ch = simple_translate[*ptr];
1856                   else
1857                     ch = *ptr;
1858                   if (pat[i] != ch)
1859                     break;
1860                 }
1861               /* Above loop has moved POS_BYTE part or all the way
1862                  back to the first pos (last pos if reverse).
1863                  Set it once again at the last (first if reverse) char.  */
1864               pos_byte += dirlen - i- direction;
1865               if (i + direction == 0)
1866                 {
1867                   int position;
1868                   pos_byte -= direction;
1869
1870                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1871
1872                   set_search_regs (position, len_byte);
1873
1874                   if ((n -= direction) != 0)
1875                     pos_byte += dirlen; /* to resume search */
1876                   else
1877                     return ((direction > 0)
1878                             ? search_regs.end[0] : search_regs.start[0]);
1879                 }
1880               else
1881                 pos_byte += stride_for_teases;
1882             }
1883           }
1884       /* We have done one clump.  Can we continue? */
1885       if ((lim_byte - pos_byte) * direction < 0)
1886         return ((0 - n) * direction);
1887     }
1888   return BYTE_TO_CHAR (pos_byte);
1889 }
1890
1891 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1892    for a match just found in the current buffer.  */
1893
1894 static void
1895 set_search_regs (beg_byte, nbytes)
1896      int beg_byte, nbytes;
1897 {
1898   /* Make sure we have registers in which to store
1899      the match position.  */
1900   if (search_regs.num_regs == 0)
1901     {
1902       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1903       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1904       search_regs.num_regs = 2;
1905     }
1906
1907   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1908   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1909   XSETBUFFER (last_thing_searched, current_buffer);
1910 }
1911 \f
1912 /* Given a string of words separated by word delimiters,
1913   compute a regexp that matches those exact words
1914   separated by arbitrary punctuation.  */
1915
1916 static Lisp_Object
1917 wordify (string)
1918      Lisp_Object string;
1919 {
1920   register unsigned char *p, *o;
1921   register int i, i_byte, len, punct_count = 0, word_count = 0;
1922   Lisp_Object val;
1923   int prev_c = 0;
1924   int adjust;
1925
1926   CHECK_STRING (string, 0);
1927   p = XSTRING (string)->data;
1928   len = XSTRING (string)->size;
1929
1930   for (i = 0, i_byte = 0; i < len; )
1931     {
1932       int c;
1933
1934       if (STRING_MULTIBYTE (string))
1935         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1936       else
1937         c = XSTRING (string)->data[i++];
1938
1939       if (SYNTAX (c) != Sword)
1940         {
1941           punct_count++;
1942           if (i > 0 && SYNTAX (prev_c) == Sword)
1943             word_count++;
1944         }
1945
1946       prev_c = c;
1947     }
1948
1949   if (SYNTAX (prev_c) == Sword)
1950     word_count++;
1951   if (!word_count)
1952     return build_string ("");
1953
1954   adjust = - punct_count + 5 * (word_count - 1) + 4;
1955   val = make_uninit_multibyte_string (len + adjust,
1956                                       STRING_BYTES (XSTRING (string)) + adjust);
1957
1958   o = XSTRING (val)->data;
1959   *o++ = '\\';
1960   *o++ = 'b';
1961
1962   for (i = 0; i < STRING_BYTES (XSTRING (val)); i++)
1963     if (SYNTAX (p[i]) == Sword)
1964       *o++ = p[i];
1965     else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1966       {
1967         *o++ = '\\';
1968         *o++ = 'W';
1969         *o++ = '\\';
1970         *o++ = 'W';
1971         *o++ = '*';
1972       }
1973
1974   *o++ = '\\';
1975   *o++ = 'b';
1976
1977   return val;
1978 }
1979 \f
1980 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1981   "MSearch backward: ",
1982   "Search backward from point for STRING.\n\
1983 Set point to the beginning of the occurrence found, and return point.\n\
1984 An optional second argument bounds the search; it is a buffer position.\n\
1985 The match found must not extend before that position.\n\
1986 Optional third argument, if t, means if fail just return nil (no error).\n\
1987  If not nil and not t, position at limit of search and return nil.\n\
1988 Optional fourth argument is repeat count--search for successive occurrences.\n\
1989 See also the functions `match-beginning', `match-end' and `replace-match'.")
1990   (string, bound, noerror, count)
1991      Lisp_Object string, bound, noerror, count;
1992 {
1993   return search_command (string, bound, noerror, count, -1, 0, 0);
1994 }
1995
1996 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
1997   "Search forward from point for STRING.\n\
1998 Set point to the end of the occurrence found, and return point.\n\
1999 An optional second argument bounds the search; it is a buffer position.\n\
2000 The match found must not extend after that position.  nil is equivalent\n\
2001   to (point-max).\n\
2002 Optional third argument, if t, means if fail just return nil (no error).\n\
2003   If not nil and not t, move to limit of search and return nil.\n\
2004 Optional fourth argument is repeat count--search for successive occurrences.\n\
2005 See also the functions `match-beginning', `match-end' and `replace-match'.")
2006   (string, bound, noerror, count)
2007      Lisp_Object string, bound, noerror, count;
2008 {
2009   return search_command (string, bound, noerror, count, 1, 0, 0);
2010 }
2011
2012 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2013   "sWord search backward: ",
2014   "Search backward from point for STRING, ignoring differences in punctuation.\n\
2015 Set point to the beginning of the occurrence found, and return point.\n\
2016 An optional second argument bounds the search; it is a buffer position.\n\
2017 The match found must not extend before that position.\n\
2018 Optional third argument, if t, means if fail just return nil (no error).\n\
2019   If not nil and not t, move to limit of search and return nil.\n\
2020 Optional fourth argument is repeat count--search for successive occurrences.")
2021   (string, bound, noerror, count)
2022      Lisp_Object string, bound, noerror, count;
2023 {
2024   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2025 }
2026
2027 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2028   "sWord search: ",
2029   "Search forward from point for STRING, ignoring differences in punctuation.\n\
2030 Set point to the end of the occurrence found, and return point.\n\
2031 An optional second argument bounds the search; it is a buffer position.\n\
2032 The match found must not extend after that position.\n\
2033 Optional third argument, if t, means if fail just return nil (no error).\n\
2034   If not nil and not t, move to limit of search and return nil.\n\
2035 Optional fourth argument is repeat count--search for successive occurrences.")
2036   (string, bound, noerror, count)
2037      Lisp_Object string, bound, noerror, count;
2038 {
2039   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2040 }
2041
2042 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2043   "sRE search backward: ",
2044   "Search backward from point for match for regular expression REGEXP.\n\
2045 Set point to the beginning of the match, and return point.\n\
2046 The match found is the one starting last in the buffer\n\
2047 and yet ending before the origin of the search.\n\
2048 An optional second argument bounds the search; it is a buffer position.\n\
2049 The match found must start at or after that position.\n\
2050 Optional third argument, if t, means if fail just return nil (no error).\n\
2051   If not nil and not t, move to limit of search and return nil.\n\
2052 Optional fourth argument is repeat count--search for successive occurrences.\n\
2053 See also the functions `match-beginning', `match-end' and `replace-match'.")
2054   (regexp, bound, noerror, count)
2055      Lisp_Object regexp, bound, noerror, count;
2056 {
2057   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2058 }
2059
2060 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2061   "sRE search: ",
2062   "Search forward from point for regular expression REGEXP.\n\
2063 Set point to the end of the occurrence found, and return point.\n\
2064 An optional second argument bounds the search; it is a buffer position.\n\
2065 The match found must not extend after that position.\n\
2066 Optional third argument, if t, means if fail just return nil (no error).\n\
2067   If not nil and not t, move to limit of search and return nil.\n\
2068 Optional fourth argument is repeat count--search for successive occurrences.\n\
2069 See also the functions `match-beginning', `match-end' and `replace-match'.")
2070   (regexp, bound, noerror, count)
2071      Lisp_Object regexp, bound, noerror, count;
2072 {
2073   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2074 }
2075
2076 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2077   "sPosix search backward: ",
2078   "Search backward from point for match for regular expression REGEXP.\n\
2079 Find the longest match in accord with Posix regular expression rules.\n\
2080 Set point to the beginning of the match, and return point.\n\
2081 The match found is the one starting last in the buffer\n\
2082 and yet ending before the origin of the search.\n\
2083 An optional second argument bounds the search; it is a buffer position.\n\
2084 The match found must start at or after that position.\n\
2085 Optional third argument, if t, means if fail just return nil (no error).\n\
2086   If not nil and not t, move to limit of search and return nil.\n\
2087 Optional fourth argument is repeat count--search for successive occurrences.\n\
2088 See also the functions `match-beginning', `match-end' and `replace-match'.")
2089   (regexp, bound, noerror, count)
2090      Lisp_Object regexp, bound, noerror, count;
2091 {
2092   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2093 }
2094
2095 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2096   "sPosix search: ",
2097   "Search forward from point for regular expression REGEXP.\n\
2098 Find the longest match in accord with Posix regular expression rules.\n\
2099 Set point to the end of the occurrence found, and return point.\n\
2100 An optional second argument bounds the search; it is a buffer position.\n\
2101 The match found must not extend after that position.\n\
2102 Optional third argument, if t, means if fail just return nil (no error).\n\
2103   If not nil and not t, move to limit of search and return nil.\n\
2104 Optional fourth argument is repeat count--search for successive occurrences.\n\
2105 See also the functions `match-beginning', `match-end' and `replace-match'.")
2106   (regexp, bound, noerror, count)
2107      Lisp_Object regexp, bound, noerror, count;
2108 {
2109   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2110 }
2111 \f
2112 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2113   "Replace text matched by last search with NEWTEXT.\n\
2114 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2115 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2116 based on the replaced text.\n\
2117 If the replaced text has only capital letters\n\
2118 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2119 If the replaced text has at least one word starting with a capital letter,\n\
2120 then capitalize each word in NEWTEXT.\n\n\
2121 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2122 Otherwise treat `\\' as special:\n\
2123   `\\&' in NEWTEXT means substitute original matched text.\n\
2124   `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2125        If Nth parens didn't match, substitute nothing.\n\
2126   `\\\\' means insert one `\\'.\n\
2127 FIXEDCASE and LITERAL are optional arguments.\n\
2128 Leaves point at end of replacement text.\n\
2129 \n\
2130 The optional fourth argument STRING can be a string to modify.\n\
2131 In that case, this function creates and returns a new string\n\
2132 which is made by replacing the part of STRING that was matched.\n\
2133 \n\
2134 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2135 It says to replace just that subexpression instead of the whole match.\n\
2136 This is useful only after a regular expression search or match\n\
2137 since only regular expressions have distinguished subexpressions.")
2138   (newtext, fixedcase, literal, string, subexp)
2139      Lisp_Object newtext, fixedcase, literal, string, subexp;
2140 {
2141   enum { nochange, all_caps, cap_initial } case_action;
2142   register int pos, last;
2143   int some_multiletter_word;
2144   int some_lowercase;
2145   int some_uppercase;
2146   int some_nonuppercase_initial;
2147   register int c, prevc;
2148   int inslen;
2149   int sub;
2150   int opoint, newpoint;
2151
2152   CHECK_STRING (newtext, 0);
2153
2154   if (! NILP (string))
2155     CHECK_STRING (string, 4);
2156
2157   case_action = nochange;       /* We tried an initialization */
2158                                 /* but some C compilers blew it */
2159
2160   if (search_regs.num_regs <= 0)
2161     error ("replace-match called before any match found");
2162
2163   if (NILP (subexp))
2164     sub = 0;
2165   else
2166     {
2167       CHECK_NUMBER (subexp, 3);
2168       sub = XINT (subexp);
2169       if (sub < 0 || sub >= search_regs.num_regs)
2170         args_out_of_range (subexp, make_number (search_regs.num_regs));
2171     }
2172
2173   if (NILP (string))
2174     {
2175       if (search_regs.start[sub] < BEGV
2176           || search_regs.start[sub] > search_regs.end[sub]
2177           || search_regs.end[sub] > ZV)
2178         args_out_of_range (make_number (search_regs.start[sub]),
2179                            make_number (search_regs.end[sub]));
2180     }
2181   else
2182     {
2183       if (search_regs.start[sub] < 0
2184           || search_regs.start[sub] > search_regs.end[sub]
2185           || search_regs.end[sub] > XSTRING (string)->size)
2186         args_out_of_range (make_number (search_regs.start[sub]),
2187                            make_number (search_regs.end[sub]));
2188     }
2189
2190   if (NILP (fixedcase))
2191     {
2192       int beg;
2193       /* Decide how to casify by examining the matched text. */
2194
2195       if (NILP (string))
2196         last = CHAR_TO_BYTE (search_regs.end[sub]);
2197       else
2198         last = search_regs.end[sub];
2199
2200       if (NILP (string))
2201         beg = CHAR_TO_BYTE (search_regs.start[sub]);
2202       else
2203         beg = search_regs.start[sub];
2204
2205       prevc = '\n';
2206       case_action = all_caps;
2207
2208       /* some_multiletter_word is set nonzero if any original word
2209          is more than one letter long. */
2210       some_multiletter_word = 0;
2211       some_lowercase = 0;
2212       some_nonuppercase_initial = 0;
2213       some_uppercase = 0;
2214
2215       for (pos = beg; pos < last; pos++)
2216         {
2217           if (NILP (string))
2218             c = FETCH_BYTE (pos);
2219           else
2220             c = XSTRING (string)->data[pos];
2221
2222           if (LOWERCASEP (c))
2223             {
2224               /* Cannot be all caps if any original char is lower case */
2225
2226               some_lowercase = 1;
2227               if (SYNTAX (prevc) != Sword)
2228                 some_nonuppercase_initial = 1;
2229               else
2230                 some_multiletter_word = 1;
2231             }
2232           else if (!NOCASEP (c))
2233             {
2234               some_uppercase = 1;
2235               if (SYNTAX (prevc) != Sword)
2236                 ;
2237               else
2238                 some_multiletter_word = 1;
2239             }
2240           else
2241             {
2242               /* If the initial is a caseless word constituent,
2243                  treat that like a lowercase initial.  */
2244               if (SYNTAX (prevc) != Sword)
2245                 some_nonuppercase_initial = 1;
2246             }
2247
2248           prevc = c;
2249         }
2250
2251       /* Convert to all caps if the old text is all caps
2252          and has at least one multiletter word.  */
2253       if (! some_lowercase && some_multiletter_word)
2254         case_action = all_caps;
2255       /* Capitalize each word, if the old text has all capitalized words.  */
2256       else if (!some_nonuppercase_initial && some_multiletter_word)
2257         case_action = cap_initial;
2258       else if (!some_nonuppercase_initial && some_uppercase)
2259         /* Should x -> yz, operating on X, give Yz or YZ?
2260            We'll assume the latter.  */
2261         case_action = all_caps;
2262       else
2263         case_action = nochange;
2264     }
2265
2266   /* Do replacement in a string.  */
2267   if (!NILP (string))
2268     {
2269       Lisp_Object before, after;
2270
2271       before = Fsubstring (string, make_number (0),
2272                            make_number (search_regs.start[sub]));
2273       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2274
2275       /* Substitute parts of the match into NEWTEXT
2276          if desired.  */
2277       if (NILP (literal))
2278         {
2279           int lastpos = -1;
2280           int lastpos_byte = -1;
2281           /* We build up the substituted string in ACCUM.  */
2282           Lisp_Object accum;
2283           Lisp_Object middle;
2284           int pos_byte;
2285
2286           accum = Qnil;
2287
2288           for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
2289             {
2290               int substart = -1;
2291               int subend;
2292               int delbackslash = 0;
2293
2294               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2295
2296               if (c == '\\')
2297                 {
2298                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2299                   if (c == '&')
2300                     {
2301                       substart = search_regs.start[sub];
2302                       subend = search_regs.end[sub];
2303                     }
2304                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2305                     {
2306                       if (search_regs.start[c - '0'] >= 0)
2307                         {
2308                           substart = search_regs.start[c - '0'];
2309                           subend = search_regs.end[c - '0'];
2310                         }
2311                     }
2312                   else if (c == '\\')
2313                     delbackslash = 1;
2314                   else
2315                     error ("Invalid use of `\\' in replacement text");
2316                 }
2317               if (substart >= 0)
2318                 {
2319                   if (pos - 1 != lastpos + 1)
2320                     middle = substring_both (newtext, lastpos + 1,
2321                                              lastpos_byte + 1,
2322                                              pos - 1, pos_byte - 1);
2323                   else
2324                     middle = Qnil;
2325                   accum = concat3 (accum, middle,
2326                                    Fsubstring (string,
2327                                                make_number (substart),
2328                                                make_number (subend)));
2329                   lastpos = pos;
2330                   lastpos_byte = pos_byte;
2331                 }
2332               else if (delbackslash)
2333                 {
2334                   middle = substring_both (newtext, lastpos + 1,
2335                                            lastpos_byte + 1,
2336                                            pos, pos_byte);
2337
2338                   accum = concat2 (accum, middle);
2339                   lastpos = pos;
2340                   lastpos_byte = pos_byte;
2341                 }
2342             }
2343
2344           if (pos != lastpos + 1)
2345             middle = substring_both (newtext, lastpos + 1,
2346                                      lastpos_byte + 1,
2347                                      pos, pos_byte);
2348           else
2349             middle = Qnil;
2350
2351           newtext = concat2 (accum, middle);
2352         }
2353
2354       /* Do case substitution in NEWTEXT if desired.  */
2355       if (case_action == all_caps)
2356         newtext = Fupcase (newtext);
2357       else if (case_action == cap_initial)
2358         newtext = Fupcase_initials (newtext);
2359
2360       return concat3 (before, newtext, after);
2361     }
2362
2363   /* Record point, the move (quietly) to the start of the match.  */
2364   if (PT > search_regs.start[sub])
2365     opoint = PT - ZV;
2366   else
2367     opoint = PT;
2368
2369   TEMP_SET_PT (search_regs.start[sub]);
2370
2371   /* We insert the replacement text before the old text, and then
2372      delete the original text.  This means that markers at the
2373      beginning or end of the original will float to the corresponding
2374      position in the replacement.  */
2375   if (!NILP (literal))
2376     Finsert_and_inherit (1, &newtext);
2377   else
2378     {
2379       struct gcpro gcpro1;
2380       GCPRO1 (newtext);
2381
2382       for (pos = 0; pos < XSTRING (newtext)->size; pos++)
2383         {
2384           int offset = PT - search_regs.start[sub];
2385
2386           c = XSTRING (newtext)->data[pos];
2387           if (c == '\\')
2388             {
2389               c = XSTRING (newtext)->data[++pos];
2390               if (c == '&')
2391                 Finsert_buffer_substring
2392                   (Fcurrent_buffer (),
2393                    make_number (search_regs.start[sub] + offset),
2394                    make_number (search_regs.end[sub] + offset));
2395               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2396                 {
2397                   if (search_regs.start[c - '0'] >= 1)
2398                     Finsert_buffer_substring
2399                       (Fcurrent_buffer (),
2400                        make_number (search_regs.start[c - '0'] + offset),
2401                        make_number (search_regs.end[c - '0'] + offset));
2402                 }
2403               else if (c == '\\')
2404                 insert_char (c);
2405               else
2406                 error ("Invalid use of `\\' in replacement text");
2407             }
2408           else
2409             insert_char (c);
2410         }
2411       UNGCPRO;
2412     }
2413
2414   inslen = PT - (search_regs.start[sub]);
2415   del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2416
2417   if (case_action == all_caps)
2418     Fupcase_region (make_number (PT - inslen), make_number (PT));
2419   else if (case_action == cap_initial)
2420     Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2421
2422   newpoint = PT;
2423
2424   /* Put point back where it was in the text.  */
2425   if (opoint <= 0)
2426     TEMP_SET_PT (opoint + ZV);
2427   else
2428     TEMP_SET_PT (opoint);
2429
2430   /* Now move point "officially" to the start of the inserted replacement.  */
2431   move_if_not_intangible (newpoint);
2432
2433   return Qnil;
2434 }
2435 \f
2436 static Lisp_Object
2437 match_limit (num, beginningp)
2438      Lisp_Object num;
2439      int beginningp;
2440 {
2441   register int n;
2442
2443   CHECK_NUMBER (num, 0);
2444   n = XINT (num);
2445   if (n < 0 || n >= search_regs.num_regs)
2446     args_out_of_range (num, make_number (search_regs.num_regs));
2447   if (search_regs.num_regs <= 0
2448       || search_regs.start[n] < 0)
2449     return Qnil;
2450   return (make_number ((beginningp) ? search_regs.start[n]
2451                                     : search_regs.end[n]));
2452 }
2453
2454 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2455   "Return position of start of text matched by last search.\n\
2456 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2457   regexp.\n\
2458 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2459   SUBEXP pairs.\n\
2460 Zero means the entire text matched by the whole regexp or whole string.")
2461   (subexp)
2462      Lisp_Object subexp;
2463 {
2464   return match_limit (subexp, 1);
2465 }
2466
2467 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2468   "Return position of end of text matched by last search.\n\
2469 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2470   regexp.\n\
2471 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2472   SUBEXP pairs.\n\
2473 Zero means the entire text matched by the whole regexp or whole string.")
2474   (subexp)
2475      Lisp_Object subexp;
2476 {
2477   return match_limit (subexp, 0);
2478 }
2479
2480 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2481   "Return a list containing all info on what the last search matched.\n\
2482 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2483 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2484 if the last match was on a buffer; integers or nil if a string was matched.\n\
2485 Use `store-match-data' to reinstate the data in this list.\n\
2486 \n\
2487 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2488 \(rather than markers) to represent buffer positions.\n\
2489 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough\n\
2490 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2491   (integers, reuse)
2492      Lisp_Object integers, reuse;
2493 {
2494   Lisp_Object tail, prev;
2495   Lisp_Object *data;
2496   int i, len;
2497
2498   if (NILP (last_thing_searched))
2499     return Qnil;
2500
2501   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2502                                  * sizeof (Lisp_Object));
2503
2504   len = -1;
2505   for (i = 0; i < search_regs.num_regs; i++)
2506     {
2507       int start = search_regs.start[i];
2508       if (start >= 0)
2509         {
2510           if (EQ (last_thing_searched, Qt)
2511               || ! NILP (integers))
2512             {
2513               XSETFASTINT (data[2 * i], start);
2514               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2515             }
2516           else if (BUFFERP (last_thing_searched))
2517             {
2518               data[2 * i] = Fmake_marker ();
2519               Fset_marker (data[2 * i],
2520                            make_number (start),
2521                            last_thing_searched);
2522               data[2 * i + 1] = Fmake_marker ();
2523               Fset_marker (data[2 * i + 1],
2524                            make_number (search_regs.end[i]),
2525                            last_thing_searched);
2526             }
2527           else
2528             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2529             abort ();
2530
2531           len = i;
2532         }
2533       else
2534         data[2 * i] = data [2 * i + 1] = Qnil;
2535     }
2536
2537   /* If REUSE is not usable, cons up the values and return them.  */
2538   if (! CONSP (reuse))
2539     return Flist (2 * len + 2, data);
2540
2541   /* If REUSE is a list, store as many value elements as will fit
2542      into the elements of REUSE.  */
2543   for (i = 0, tail = reuse; CONSP (tail);
2544        i++, tail = XCONS (tail)->cdr)
2545     {
2546       if (i < 2 * len + 2)
2547         XCONS (tail)->car = data[i];
2548       else
2549         XCONS (tail)->car = Qnil;
2550       prev = tail;
2551     }
2552
2553   /* If we couldn't fit all value elements into REUSE,
2554      cons up the rest of them and add them to the end of REUSE.  */
2555   if (i < 2 * len + 2)
2556     XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2557
2558   return reuse;
2559 }
2560
2561
2562 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2563   "Set internal data on last search match from elements of LIST.\n\
2564 LIST should have been created by calling `match-data' previously.")
2565   (list)
2566      register Lisp_Object list;
2567 {
2568   register int i;
2569   register Lisp_Object marker;
2570
2571   if (running_asynch_code)
2572     save_search_regs ();
2573
2574   if (!CONSP (list) && !NILP (list))
2575     list = wrong_type_argument (Qconsp, list);
2576
2577   /* Unless we find a marker with a buffer in LIST, assume that this
2578      match data came from a string.  */
2579   last_thing_searched = Qt;
2580
2581   /* Allocate registers if they don't already exist.  */
2582   {
2583     int length = XFASTINT (Flength (list)) / 2;
2584
2585     if (length > search_regs.num_regs)
2586       {
2587         if (search_regs.num_regs == 0)
2588           {
2589             search_regs.start
2590               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2591             search_regs.end
2592               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2593           }
2594         else
2595           {
2596             search_regs.start
2597               = (regoff_t *) xrealloc (search_regs.start,
2598                                        length * sizeof (regoff_t));
2599             search_regs.end
2600               = (regoff_t *) xrealloc (search_regs.end,
2601                                        length * sizeof (regoff_t));
2602           }
2603
2604         search_regs.num_regs = length;
2605       }
2606   }
2607
2608   for (i = 0; i < search_regs.num_regs; i++)
2609     {
2610       marker = Fcar (list);
2611       if (NILP (marker))
2612         {
2613           search_regs.start[i] = -1;
2614           list = Fcdr (list);
2615         }
2616       else
2617         {
2618           if (MARKERP (marker))
2619             {
2620               if (XMARKER (marker)->buffer == 0)
2621                 XSETFASTINT (marker, 0);
2622               else
2623                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2624             }
2625
2626           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2627           search_regs.start[i] = XINT (marker);
2628           list = Fcdr (list);
2629
2630           marker = Fcar (list);
2631           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2632             XSETFASTINT (marker, 0);
2633
2634           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2635           search_regs.end[i] = XINT (marker);
2636         }
2637       list = Fcdr (list);
2638     }
2639
2640   return Qnil;
2641 }
2642
2643 /* If non-zero the match data have been saved in saved_search_regs
2644    during the execution of a sentinel or filter. */
2645 static int search_regs_saved;
2646 static struct re_registers saved_search_regs;
2647
2648 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2649    if asynchronous code (filter or sentinel) is running. */
2650 static void
2651 save_search_regs ()
2652 {
2653   if (!search_regs_saved)
2654     {
2655       saved_search_regs.num_regs = search_regs.num_regs;
2656       saved_search_regs.start = search_regs.start;
2657       saved_search_regs.end = search_regs.end;
2658       search_regs.num_regs = 0;
2659       search_regs.start = 0;
2660       search_regs.end = 0;
2661
2662       search_regs_saved = 1;
2663     }
2664 }
2665
2666 /* Called upon exit from filters and sentinels. */
2667 void
2668 restore_match_data ()
2669 {
2670   if (search_regs_saved)
2671     {
2672       if (search_regs.num_regs > 0)
2673         {
2674           xfree (search_regs.start);
2675           xfree (search_regs.end);
2676         }
2677       search_regs.num_regs = saved_search_regs.num_regs;
2678       search_regs.start = saved_search_regs.start;
2679       search_regs.end = saved_search_regs.end;
2680
2681       search_regs_saved = 0;
2682     }
2683 }
2684
2685 /* Quote a string to inactivate reg-expr chars */
2686
2687 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2688   "Return a regexp string which matches exactly STRING and nothing else.")
2689   (string)
2690      Lisp_Object string;
2691 {
2692   register unsigned char *in, *out, *end;
2693   register unsigned char *temp;
2694   int backslashes_added = 0;
2695
2696   CHECK_STRING (string, 0);
2697
2698   temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2699
2700   /* Now copy the data into the new string, inserting escapes. */
2701
2702   in = XSTRING (string)->data;
2703   end = in + STRING_BYTES (XSTRING (string));
2704   out = temp;
2705
2706   for (; in != end; in++)
2707     {
2708       if (*in == '[' || *in == ']'
2709           || *in == '*' || *in == '.' || *in == '\\'
2710           || *in == '?' || *in == '+'
2711           || *in == '^' || *in == '$')
2712         *out++ = '\\', backslashes_added++;
2713       *out++ = *in;
2714     }
2715
2716   return make_specified_string (temp,
2717                                 XSTRING (string)->size + backslashes_added,
2718                                 out - temp,
2719                                 STRING_MULTIBYTE (string));
2720 }
2721 \f
2722 void
2723 syms_of_search ()
2724 {
2725   register int i;
2726
2727   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2728     {
2729       searchbufs[i].buf.allocated = 100;
2730       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2731       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2732       searchbufs[i].regexp = Qnil;
2733       staticpro (&searchbufs[i].regexp);
2734       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2735     }
2736   searchbuf_head = &searchbufs[0];
2737
2738   Qsearch_failed = intern ("search-failed");
2739   staticpro (&Qsearch_failed);
2740   Qinvalid_regexp = intern ("invalid-regexp");
2741   staticpro (&Qinvalid_regexp);
2742
2743   Fput (Qsearch_failed, Qerror_conditions,
2744         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2745   Fput (Qsearch_failed, Qerror_message,
2746         build_string ("Search failed"));
2747
2748   Fput (Qinvalid_regexp, Qerror_conditions,
2749         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2750   Fput (Qinvalid_regexp, Qerror_message,
2751         build_string ("Invalid regexp"));
2752
2753   last_thing_searched = Qnil;
2754   staticpro (&last_thing_searched);
2755
2756   defsubr (&Slooking_at);
2757   defsubr (&Sposix_looking_at);
2758   defsubr (&Sstring_match);
2759   defsubr (&Sposix_string_match);
2760   defsubr (&Ssearch_forward);
2761   defsubr (&Ssearch_backward);
2762   defsubr (&Sword_search_forward);
2763   defsubr (&Sword_search_backward);
2764   defsubr (&Sre_search_forward);
2765   defsubr (&Sre_search_backward);
2766   defsubr (&Sposix_search_forward);
2767   defsubr (&Sposix_search_backward);
2768   defsubr (&Sreplace_match);
2769   defsubr (&Smatch_beginning);
2770   defsubr (&Smatch_end);
2771   defsubr (&Smatch_data);
2772   defsubr (&Sset_match_data);
2773   defsubr (&Sregexp_quote);
2774 }