src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999, 2004
   3              Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   struct re_pattern_buffer buf;
  46   char fastmap[0400];
  47   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  48   char posix;
  49 };
  50
  51 /* The instances of that struct.  */
  52 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  53
  54 /* The head of the linked list; points to the most recently used buffer.  */
  55 struct regexp_cache *searchbuf_head;
  56
  57
  58 /* Every call to re_match, etc., must pass &search_regs as the regs
  59    argument unless you can show it is unnecessary (i.e., if re_match
  60    is certainly going to be called again before region-around-match
  61    can be called).
  62
  63    Since the registers are now dynamically allocated, we need to make
  64    sure not to refer to the Nth register before checking that it has
  65    been allocated by checking search_regs.num_regs.
  66
  67    The regex code keeps track of whether it has allocated the search
  68    buffer using bits in the re_pattern_buffer.  This means that whenever
  69    you compile a new pattern, it completely forgets whether it has
  70    allocated any registers, and will allocate new registers the next
  71    time you call a searching or matching function.  Therefore, we need
  72    to call re_set_registers after compiling a new pattern or after
  73    setting the match registers, so that the regex functions will be
  74    able to free or re-allocate it properly.  */
  75 static struct re_registers search_regs;
  76
  77 /* The buffer in which the last search was performed, or
  78    Qt if the last search was done in a string;
  79    Qnil if no searching has been done yet.  */
  80 static Lisp_Object last_thing_searched;
  81
  82 /* error condition signaled when regexp compile_pattern fails */
  83
  84 Lisp_Object Qinvalid_regexp;
  85
  86 Lisp_Object Vsearch_spaces_regexp;
  87
  88 static void set_search_regs ();
  89 static void save_search_regs ();
  90 static int simple_search ();
  91 static int boyer_moore ();
  92 static int search_buffer ();
  93
  94 static void
  95 matcher_overflow ()
  96 {
  97   error ("Stack overflow in regexp matcher");
  98 }
  99
 100 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 101    PATTERN is the pattern to compile.
 102    CP is the place to put the result.
 103    TRANSLATE is a translation table for ignoring case, or nil for none.
 104    REGP is the structure that says where to store the "register"
 105    values that will result from matching this pattern.
 106    If it is 0, we should compile the pattern not to record any
 107    subexpression bounds.
 108    POSIX is nonzero if we want full backtracking (POSIX style)
 109    for this pattern.  0 means backtrack only enough to get a valid match.
 110    MULTIBYTE is nonzero iff a target of match is a multibyte buffer or
 111    string.
 112
 113    The behavior also depends on Vsearch_spaces_regexp.  */
 114
 115 static void
 116 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 117      struct regexp_cache *cp;
 118      Lisp_Object pattern;
 119      Lisp_Object translate;
 120      struct re_registers *regp;
 121      int posix;
 122      int multibyte;
 123 {
 124   char *val;
 125   reg_syntax_t old;
 126
 127   cp->regexp = Qnil;
 128   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 129   cp->posix = posix;
 130   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 131   cp->buf.target_multibyte = multibyte;
 132   cp->whitespace_regexp = Vsearch_spaces_regexp;
 133   BLOCK_INPUT;
 134   old = re_set_syntax (RE_SYNTAX_EMACS
 135                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 136   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 137                             : SDATA (Vsearch_spaces_regexp));
 138
 139   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 140                                      SBYTES (pattern), &cp->buf);
 141
 142   re_set_whitespace_regexp (NULL);
 143
 144   re_set_syntax (old);
 145   UNBLOCK_INPUT;
 146   if (val)
 147     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 148
 149   cp->regexp = Fcopy_sequence (pattern);
 150 }
 151
 152 /* Shrink each compiled regexp buffer in the cache
 153    to the size actually used right now.
 154    This is called from garbage collection.  */
 155
 156 void
 157 shrink_regexp_cache ()
 158 {
 159   struct regexp_cache *cp;
 160
 161   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 162     {
 163       cp->buf.allocated = cp->buf.used;
 164       cp->buf.buffer
 165         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 166     }
 167 }
 168
 169 /* Compile a regexp if necessary, but first check to see if there's one in
 170    the cache.
 171    PATTERN is the pattern to compile.
 172    TRANSLATE is a translation table for ignoring case, or nil for none.
 173    REGP is the structure that says where to store the "register"
 174    values that will result from matching this pattern.
 175    If it is 0, we should compile the pattern not to record any
 176    subexpression bounds.
 177    POSIX is nonzero if we want full backtracking (POSIX style)
 178    for this pattern.  0 means backtrack only enough to get a valid match.  */
 179
 180 struct re_pattern_buffer *
 181 compile_pattern (pattern, regp, translate, posix, multibyte)
 182      Lisp_Object pattern;
 183      struct re_registers *regp;
 184      Lisp_Object translate;
 185      int posix, multibyte;
 186 {
 187   struct regexp_cache *cp, **cpp;
 188
 189   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 190     {
 191       cp = *cpp;
 192       /* Entries are initialized to nil, and may be set to nil by
 193          compile_pattern_1 if the pattern isn't valid.  Don't apply
 194          string accessors in those cases.  However, compile_pattern_1
 195          is only applied to the cache entry we pick here to reuse.  So
 196          nil should never appear before a non-nil entry.  */
 197       if (NILP (cp->regexp))
 198         goto compile_it;
 199       if (SCHARS (cp->regexp) == SCHARS (pattern)
 200           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 201           && !NILP (Fstring_equal (cp->regexp, pattern))
 202           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 203           && cp->posix == posix
 204           && cp->buf.target_multibyte == multibyte
 205           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 206         break;
 207
 208       /* If we're at the end of the cache, compile into the nil cell
 209          we found, or the last (least recently used) cell with a
 210          string value.  */
 211       if (cp->next == 0)
 212         {
 213         compile_it:
 214           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 215           break;
 216         }
 217     }
 218
 219   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 220      either because we found it in the cache or because we just compiled it.
 221      Move it to the front of the queue to mark it as most recently used.  */
 222   *cpp = cp->next;
 223   cp->next = searchbuf_head;
 224   searchbuf_head = cp;
 225
 226   /* Advise the searching functions about the space we have allocated
 227      for register data.  */
 228   if (regp)
 229     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 230
 231   return &cp->buf;
 232 }
 233
 234 /* Error condition used for failing searches */
 235 Lisp_Object Qsearch_failed;
 236
 237 Lisp_Object
 238 signal_failure (arg)
 239      Lisp_Object arg;
 240 {
 241   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 242   return Qnil;
 243 }
 244 \f
 245 static Lisp_Object
 246 looking_at_1 (string, posix)
 247      Lisp_Object string;
 248      int posix;
 249 {
 250   Lisp_Object val;
 251   unsigned char *p1, *p2;
 252   int s1, s2;
 253   register int i;
 254   struct re_pattern_buffer *bufp;
 255
 256   if (running_asynch_code)
 257     save_search_regs ();
 258
 259   CHECK_STRING (string);
 260   bufp = compile_pattern (string, &search_regs,
 261                           (!NILP (current_buffer->case_fold_search)
 262                            ? current_buffer->case_canon_table : Qnil),
 263                           posix,
 264                           !NILP (current_buffer->enable_multibyte_characters));
 265
 266   immediate_quit = 1;
 267   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 268
 269   /* Get pointers and sizes of the two strings
 270      that make up the visible portion of the buffer. */
 271
 272   p1 = BEGV_ADDR;
 273   s1 = GPT_BYTE - BEGV_BYTE;
 274   p2 = GAP_END_ADDR;
 275   s2 = ZV_BYTE - GPT_BYTE;
 276   if (s1 < 0)
 277     {
 278       p2 = p1;
 279       s2 = ZV_BYTE - BEGV_BYTE;
 280       s1 = 0;
 281     }
 282   if (s2 < 0)
 283     {
 284       s1 = ZV_BYTE - BEGV_BYTE;
 285       s2 = 0;
 286     }
 287
 288   re_match_object = Qnil;
 289
 290   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 291                   PT_BYTE - BEGV_BYTE, &search_regs,
 292                   ZV_BYTE - BEGV_BYTE);
 293   immediate_quit = 0;
 294
 295   if (i == -2)
 296     matcher_overflow ();
 297
 298   val = (0 <= i ? Qt : Qnil);
 299   if (i >= 0)
 300     for (i = 0; i < search_regs.num_regs; i++)
 301       if (search_regs.start[i] >= 0)
 302         {
 303           search_regs.start[i]
 304             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 305           search_regs.end[i]
 306             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 307         }
 308   XSETBUFFER (last_thing_searched, current_buffer);
 309   return val;
 310 }
 311
 312 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 313        doc: /* Return t if text after point matches regular expression REGEXP.
 314 This function modifies the match data that `match-beginning',
 315 `match-end' and `match-data' access; save and restore the match
 316 data if you want to preserve them.  */)
 317      (regexp)
 318      Lisp_Object regexp;
 319 {
 320   return looking_at_1 (regexp, 0);
 321 }
 322
 323 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 324        doc: /* Return t if text after point matches regular expression REGEXP.
 325 Find the longest match, in accord with Posix regular expression rules.
 326 This function modifies the match data that `match-beginning',
 327 `match-end' and `match-data' access; save and restore the match
 328 data if you want to preserve them.  */)
 329      (regexp)
 330      Lisp_Object regexp;
 331 {
 332   return looking_at_1 (regexp, 1);
 333 }
 334 \f
 335 static Lisp_Object
 336 string_match_1 (regexp, string, start, posix)
 337      Lisp_Object regexp, string, start;
 338      int posix;
 339 {
 340   int val;
 341   struct re_pattern_buffer *bufp;
 342   int pos, pos_byte;
 343   int i;
 344
 345   if (running_asynch_code)
 346     save_search_regs ();
 347
 348   CHECK_STRING (regexp);
 349   CHECK_STRING (string);
 350
 351   if (NILP (start))
 352     pos = 0, pos_byte = 0;
 353   else
 354     {
 355       int len = SCHARS (string);
 356
 357       CHECK_NUMBER (start);
 358       pos = XINT (start);
 359       if (pos < 0 && -pos <= len)
 360         pos = len + pos;
 361       else if (0 > pos || pos > len)
 362         args_out_of_range (string, start);
 363       pos_byte = string_char_to_byte (string, pos);
 364     }
 365
 366   bufp = compile_pattern (regexp, &search_regs,
 367                           (!NILP (current_buffer->case_fold_search)
 368                            ? current_buffer->case_canon_table : Qnil),
 369                           posix,
 370                           STRING_MULTIBYTE (string));
 371   immediate_quit = 1;
 372   re_match_object = string;
 373
 374   val = re_search (bufp, (char *) SDATA (string),
 375                    SBYTES (string), pos_byte,
 376                    SBYTES (string) - pos_byte,
 377                    &search_regs);
 378   immediate_quit = 0;
 379   last_thing_searched = Qt;
 380   if (val == -2)
 381     matcher_overflow ();
 382   if (val < 0) return Qnil;
 383
 384   for (i = 0; i < search_regs.num_regs; i++)
 385     if (search_regs.start[i] >= 0)
 386       {
 387         search_regs.start[i]
 388           = string_byte_to_char (string, search_regs.start[i]);
 389         search_regs.end[i]
 390           = string_byte_to_char (string, search_regs.end[i]);
 391       }
 392
 393   return make_number (string_byte_to_char (string, val));
 394 }
 395
 396 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 397        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 398 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 399 If third arg START is non-nil, start search at that index in STRING.
 400 For index of first char beyond the match, do (match-end 0).
 401 `match-end' and `match-beginning' also give indices of substrings
 402 matched by parenthesis constructs in the pattern.
 403
 404 You can use the function `match-string' to extract the substrings
 405 matched by the parenthesis constructions in REGEXP. */)
 406      (regexp, string, start)
 407      Lisp_Object regexp, string, start;
 408 {
 409   return string_match_1 (regexp, string, start, 0);
 410 }
 411
 412 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 413        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 414 Find the longest match, in accord with Posix regular expression rules.
 415 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 416 If third arg START is non-nil, start search at that index in STRING.
 417 For index of first char beyond the match, do (match-end 0).
 418 `match-end' and `match-beginning' also give indices of substrings
 419 matched by parenthesis constructs in the pattern.  */)
 420      (regexp, string, start)
 421      Lisp_Object regexp, string, start;
 422 {
 423   return string_match_1 (regexp, string, start, 1);
 424 }
 425
 426 /* Match REGEXP against STRING, searching all of STRING,
 427    and return the index of the match, or negative on failure.
 428    This does not clobber the match data.  */
 429
 430 int
 431 fast_string_match (regexp, string)
 432      Lisp_Object regexp, string;
 433 {
 434   int val;
 435   struct re_pattern_buffer *bufp;
 436
 437   bufp = compile_pattern (regexp, 0, Qnil,
 438                           0, STRING_MULTIBYTE (string));
 439   immediate_quit = 1;
 440   re_match_object = string;
 441
 442   val = re_search (bufp, (char *) SDATA (string),
 443                    SBYTES (string), 0,
 444                    SBYTES (string), 0);
 445   immediate_quit = 0;
 446   return val;
 447 }
 448
 449 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 450    and return the index of the match, or negative on failure.
 451    This does not clobber the match data.
 452    We assume that STRING contains single-byte characters.  */
 453
 454 extern Lisp_Object Vascii_downcase_table;
 455
 456 int
 457 fast_c_string_match_ignore_case (regexp, string)
 458      Lisp_Object regexp;
 459      const char *string;
 460 {
 461   int val;
 462   struct re_pattern_buffer *bufp;
 463   int len = strlen (string);
 464
 465   regexp = string_make_unibyte (regexp);
 466   re_match_object = Qt;
 467   bufp = compile_pattern (regexp, 0,
 468                           Vascii_canon_table, 0,
 469                           0);
 470   immediate_quit = 1;
 471   val = re_search (bufp, string, len, 0, len, 0);
 472   immediate_quit = 0;
 473   return val;
 474 }
 475
 476 /* Like fast_string_match but ignore case.  */
 477
 478 int
 479 fast_string_match_ignore_case (regexp, string)
 480      Lisp_Object regexp, string;
 481 {
 482   int val;
 483   struct re_pattern_buffer *bufp;
 484
 485   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 486                           0, STRING_MULTIBYTE (string));
 487   immediate_quit = 1;
 488   re_match_object = string;
 489
 490   val = re_search (bufp, (char *) SDATA (string),
 491                    SBYTES (string), 0,
 492                    SBYTES (string), 0);
 493   immediate_quit = 0;
 494   return val;
 495 }
 496 \f
 497 /* The newline cache: remembering which sections of text have no newlines.  */
 498
 499 /* If the user has requested newline caching, make sure it's on.
 500    Otherwise, make sure it's off.
 501    This is our cheezy way of associating an action with the change of
 502    state of a buffer-local variable.  */
 503 static void
 504 newline_cache_on_off (buf)
 505      struct buffer *buf;
 506 {
 507   if (NILP (buf->cache_long_line_scans))
 508     {
 509       /* It should be off.  */
 510       if (buf->newline_cache)
 511         {
 512           free_region_cache (buf->newline_cache);
 513           buf->newline_cache = 0;
 514         }
 515     }
 516   else
 517     {
 518       /* It should be on.  */
 519       if (buf->newline_cache == 0)
 520         buf->newline_cache = new_region_cache ();
 521     }
 522 }
 523
 524 \f
 525 /* Search for COUNT instances of the character TARGET between START and END.
 526
 527    If COUNT is positive, search forwards; END must be >= START.
 528    If COUNT is negative, search backwards for the -COUNTth instance;
 529       END must be <= START.
 530    If COUNT is zero, do anything you please; run rogue, for all I care.
 531
 532    If END is zero, use BEGV or ZV instead, as appropriate for the
 533    direction indicated by COUNT.
 534
 535    If we find COUNT instances, set *SHORTAGE to zero, and return the
 536    position past the COUNTth match.  Note that for reverse motion
 537    this is not the same as the usual convention for Emacs motion commands.
 538
 539    If we don't find COUNT instances before reaching END, set *SHORTAGE
 540    to the number of TARGETs left unfound, and return END.
 541
 542    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 543    except when inside redisplay.  */
 544
 545 int
 546 scan_buffer (target, start, end, count, shortage, allow_quit)
 547      register int target;
 548      int start, end;
 549      int count;
 550      int *shortage;
 551      int allow_quit;
 552 {
 553   struct region_cache *newline_cache;
 554   int direction;
 555
 556   if (count > 0)
 557     {
 558       direction = 1;
 559       if (! end) end = ZV;
 560     }
 561   else
 562     {
 563       direction = -1;
 564       if (! end) end = BEGV;
 565     }
 566
 567   newline_cache_on_off (current_buffer);
 568   newline_cache = current_buffer->newline_cache;
 569
 570   if (shortage != 0)
 571     *shortage = 0;
 572
 573   immediate_quit = allow_quit;
 574
 575   if (count > 0)
 576     while (start != end)
 577       {
 578         /* Our innermost scanning loop is very simple; it doesn't know
 579            about gaps, buffer ends, or the newline cache.  ceiling is
 580            the position of the last character before the next such
 581            obstacle --- the last character the dumb search loop should
 582            examine.  */
 583         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 584         int start_byte = CHAR_TO_BYTE (start);
 585         int tem;
 586
 587         /* If we're looking for a newline, consult the newline cache
 588            to see where we can avoid some scanning.  */
 589         if (target == '\n' && newline_cache)
 590           {
 591             int next_change;
 592             immediate_quit = 0;
 593             while (region_cache_forward
 594                    (current_buffer, newline_cache, start_byte, &next_change))
 595               start_byte = next_change;
 596             immediate_quit = allow_quit;
 597
 598             /* START should never be after END.  */
 599             if (start_byte > ceiling_byte)
 600               start_byte = ceiling_byte;
 601
 602             /* Now the text after start is an unknown region, and
 603                next_change is the position of the next known region. */
 604             ceiling_byte = min (next_change - 1, ceiling_byte);
 605           }
 606
 607         /* The dumb loop can only scan text stored in contiguous
 608            bytes. BUFFER_CEILING_OF returns the last character
 609            position that is contiguous, so the ceiling is the
 610            position after that.  */
 611         tem = BUFFER_CEILING_OF (start_byte);
 612         ceiling_byte = min (tem, ceiling_byte);
 613
 614         {
 615           /* The termination address of the dumb loop.  */
 616           register unsigned char *ceiling_addr
 617             = BYTE_POS_ADDR (ceiling_byte) + 1;
 618           register unsigned char *cursor
 619             = BYTE_POS_ADDR (start_byte);
 620           unsigned char *base = cursor;
 621
 622           while (cursor < ceiling_addr)
 623             {
 624               unsigned char *scan_start = cursor;
 625
 626               /* The dumb loop.  */
 627               while (*cursor != target && ++cursor < ceiling_addr)
 628                 ;
 629
 630               /* If we're looking for newlines, cache the fact that
 631                  the region from start to cursor is free of them. */
 632               if (target == '\n' && newline_cache)
 633                 know_region_cache (current_buffer, newline_cache,
 634                                    start_byte + scan_start - base,
 635                                    start_byte + cursor - base);
 636
 637               /* Did we find the target character?  */
 638               if (cursor < ceiling_addr)
 639                 {
 640                   if (--count == 0)
 641                     {
 642                       immediate_quit = 0;
 643                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 644                     }
 645                   cursor++;
 646                 }
 647             }
 648
 649           start = BYTE_TO_CHAR (start_byte + cursor - base);
 650         }
 651       }
 652   else
 653     while (start > end)
 654       {
 655         /* The last character to check before the next obstacle.  */
 656         int ceiling_byte = CHAR_TO_BYTE (end);
 657         int start_byte = CHAR_TO_BYTE (start);
 658         int tem;
 659
 660         /* Consult the newline cache, if appropriate.  */
 661         if (target == '\n' && newline_cache)
 662           {
 663             int next_change;
 664             immediate_quit = 0;
 665             while (region_cache_backward
 666                    (current_buffer, newline_cache, start_byte, &next_change))
 667               start_byte = next_change;
 668             immediate_quit = allow_quit;
 669
 670             /* Start should never be at or before end.  */
 671             if (start_byte <= ceiling_byte)
 672               start_byte = ceiling_byte + 1;
 673
 674             /* Now the text before start is an unknown region, and
 675                next_change is the position of the next known region. */
 676             ceiling_byte = max (next_change, ceiling_byte);
 677           }
 678
 679         /* Stop scanning before the gap.  */
 680         tem = BUFFER_FLOOR_OF (start_byte - 1);
 681         ceiling_byte = max (tem, ceiling_byte);
 682
 683         {
 684           /* The termination address of the dumb loop.  */
 685           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 686           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 687           unsigned char *base = cursor;
 688
 689           while (cursor >= ceiling_addr)
 690             {
 691               unsigned char *scan_start = cursor;
 692
 693               while (*cursor != target && --cursor >= ceiling_addr)
 694                 ;
 695
 696               /* If we're looking for newlines, cache the fact that
 697                  the region from after the cursor to start is free of them.  */
 698               if (target == '\n' && newline_cache)
 699                 know_region_cache (current_buffer, newline_cache,
 700                                    start_byte + cursor - base,
 701                                    start_byte + scan_start - base);
 702
 703               /* Did we find the target character?  */
 704               if (cursor >= ceiling_addr)
 705                 {
 706                   if (++count >= 0)
 707                     {
 708                       immediate_quit = 0;
 709                       return BYTE_TO_CHAR (start_byte + cursor - base);
 710                     }
 711                   cursor--;
 712                 }
 713             }
 714
 715           start = BYTE_TO_CHAR (start_byte + cursor - base);
 716         }
 717       }
 718
 719   immediate_quit = 0;
 720   if (shortage != 0)
 721     *shortage = count * direction;
 722   return start;
 723 }
 724 \f
 725 /* Search for COUNT instances of a line boundary, which means either a
 726    newline or (if selective display enabled) a carriage return.
 727    Start at START.  If COUNT is negative, search backwards.
 728
 729    We report the resulting position by calling TEMP_SET_PT_BOTH.
 730
 731    If we find COUNT instances. we position after (always after,
 732    even if scanning backwards) the COUNTth match, and return 0.
 733
 734    If we don't find COUNT instances before reaching the end of the
 735    buffer (or the beginning, if scanning backwards), we return
 736    the number of line boundaries left unfound, and position at
 737    the limit we bumped up against.
 738
 739    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 740    except in special cases.  */
 741
 742 int
 743 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 744      int start, start_byte;
 745      int limit, limit_byte;
 746      register int count;
 747      int allow_quit;
 748 {
 749   int direction = ((count > 0) ? 1 : -1);
 750
 751   register unsigned char *cursor;
 752   unsigned char *base;
 753
 754   register int ceiling;
 755   register unsigned char *ceiling_addr;
 756
 757   int old_immediate_quit = immediate_quit;
 758
 759   /* The code that follows is like scan_buffer
 760      but checks for either newline or carriage return.  */
 761
 762   if (allow_quit)
 763     immediate_quit++;
 764
 765   start_byte = CHAR_TO_BYTE (start);
 766
 767   if (count > 0)
 768     {
 769       while (start_byte < limit_byte)
 770         {
 771           ceiling =  BUFFER_CEILING_OF (start_byte);
 772           ceiling = min (limit_byte - 1, ceiling);
 773           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 774           base = (cursor = BYTE_POS_ADDR (start_byte));
 775           while (1)
 776             {
 777               while (*cursor != '\n' && ++cursor != ceiling_addr)
 778                 ;
 779
 780               if (cursor != ceiling_addr)
 781                 {
 782                   if (--count == 0)
 783                     {
 784                       immediate_quit = old_immediate_quit;
 785                       start_byte = start_byte + cursor - base + 1;
 786                       start = BYTE_TO_CHAR (start_byte);
 787                       TEMP_SET_PT_BOTH (start, start_byte);
 788                       return 0;
 789                     }
 790                   else
 791                     if (++cursor == ceiling_addr)
 792                       break;
 793                 }
 794               else
 795                 break;
 796             }
 797           start_byte += cursor - base;
 798         }
 799     }
 800   else
 801     {
 802       while (start_byte > limit_byte)
 803         {
 804           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 805           ceiling = max (limit_byte, ceiling);
 806           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 807           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 808           while (1)
 809             {
 810               while (--cursor != ceiling_addr && *cursor != '\n')
 811                 ;
 812
 813               if (cursor != ceiling_addr)
 814                 {
 815                   if (++count == 0)
 816                     {
 817                       immediate_quit = old_immediate_quit;
 818                       /* Return the position AFTER the match we found.  */
 819                       start_byte = start_byte + cursor - base + 1;
 820                       start = BYTE_TO_CHAR (start_byte);
 821                       TEMP_SET_PT_BOTH (start, start_byte);
 822                       return 0;
 823                     }
 824                 }
 825               else
 826                 break;
 827             }
 828           /* Here we add 1 to compensate for the last decrement
 829              of CURSOR, which took it past the valid range.  */
 830           start_byte += cursor - base + 1;
 831         }
 832     }
 833
 834   TEMP_SET_PT_BOTH (limit, limit_byte);
 835   immediate_quit = old_immediate_quit;
 836
 837   return count * direction;
 838 }
 839
 840 int
 841 find_next_newline_no_quit (from, cnt)
 842      register int from, cnt;
 843 {
 844   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 845 }
 846
 847 /* Like find_next_newline, but returns position before the newline,
 848    not after, and only search up to TO.  This isn't just
 849    find_next_newline (...)-1, because you might hit TO.  */
 850
 851 int
 852 find_before_next_newline (from, to, cnt)
 853      int from, to, cnt;
 854 {
 855   int shortage;
 856   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 857
 858   if (shortage == 0)
 859     pos--;
 860
 861   return pos;
 862 }
 863 \f
 864 /* Subroutines of Lisp buffer search functions. */
 865
 866 static Lisp_Object
 867 search_command (string, bound, noerror, count, direction, RE, posix)
 868      Lisp_Object string, bound, noerror, count;
 869      int direction;
 870      int RE;
 871      int posix;
 872 {
 873   register int np;
 874   int lim, lim_byte;
 875   int n = direction;
 876
 877   if (!NILP (count))
 878     {
 879       CHECK_NUMBER (count);
 880       n *= XINT (count);
 881     }
 882
 883   CHECK_STRING (string);
 884   if (NILP (bound))
 885     {
 886       if (n > 0)
 887         lim = ZV, lim_byte = ZV_BYTE;
 888       else
 889         lim = BEGV, lim_byte = BEGV_BYTE;
 890     }
 891   else
 892     {
 893       CHECK_NUMBER_COERCE_MARKER (bound);
 894       lim = XINT (bound);
 895       if (n > 0 ? lim < PT : lim > PT)
 896         error ("Invalid search bound (wrong side of point)");
 897       if (lim > ZV)
 898         lim = ZV, lim_byte = ZV_BYTE;
 899       else if (lim < BEGV)
 900         lim = BEGV, lim_byte = BEGV_BYTE;
 901       else
 902         lim_byte = CHAR_TO_BYTE (lim);
 903     }
 904
 905   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 906                       (!NILP (current_buffer->case_fold_search)
 907                        ? current_buffer->case_canon_table
 908                        : Qnil),
 909                       (!NILP (current_buffer->case_fold_search)
 910                        ? current_buffer->case_eqv_table
 911                        : Qnil),
 912                       posix);
 913   if (np <= 0)
 914     {
 915       if (NILP (noerror))
 916         return signal_failure (string);
 917       if (!EQ (noerror, Qt))
 918         {
 919           if (lim < BEGV || lim > ZV)
 920             abort ();
 921           SET_PT_BOTH (lim, lim_byte);
 922           return Qnil;
 923 #if 0 /* This would be clean, but maybe programs depend on
 924          a value of nil here.  */
 925           np = lim;
 926 #endif
 927         }
 928       else
 929         return Qnil;
 930     }
 931
 932   if (np < BEGV || np > ZV)
 933     abort ();
 934
 935   SET_PT (np);
 936
 937   return make_number (np);
 938 }
 939 \f
 940 /* Return 1 if REGEXP it matches just one constant string.  */
 941
 942 static int
 943 trivial_regexp_p (regexp)
 944      Lisp_Object regexp;
 945 {
 946   int len = SBYTES (regexp);
 947   unsigned char *s = SDATA (regexp);
 948   while (--len >= 0)
 949     {
 950       switch (*s++)
 951         {
 952         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 953           return 0;
 954         case '\\':
 955           if (--len < 0)
 956             return 0;
 957           switch (*s++)
 958             {
 959             case '|': case '(': case ')': case '`': case '\'': case 'b':
 960             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 961             case 'S': case '=': case '{': case '}': case '_':
 962             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 963             case '1': case '2': case '3': case '4': case '5':
 964             case '6': case '7': case '8': case '9':
 965               return 0;
 966             }
 967         }
 968     }
 969   return 1;
 970 }
 971
 972 /* Search for the n'th occurrence of STRING in the current buffer,
 973    starting at position POS and stopping at position LIM,
 974    treating STRING as a literal string if RE is false or as
 975    a regular expression if RE is true.
 976
 977    If N is positive, searching is forward and LIM must be greater than POS.
 978    If N is negative, searching is backward and LIM must be less than POS.
 979
 980    Returns -x if x occurrences remain to be found (x > 0),
 981    or else the position at the beginning of the Nth occurrence
 982    (if searching backward) or the end (if searching forward).
 983
 984    POSIX is nonzero if we want full backtracking (POSIX style)
 985    for this pattern.  0 means backtrack only enough to get a valid match.  */
 986
 987 #define TRANSLATE(out, trt, d)                  \
 988 do                                              \
 989   {                                             \
 990     if (! NILP (trt))                           \
 991       {                                         \
 992         Lisp_Object temp;                       \
 993         temp = Faref (trt, make_number (d));    \
 994         if (INTEGERP (temp))                    \
 995           out = XINT (temp);                    \
 996         else                                    \
 997           out = d;                              \
 998       }                                         \
 999     else                                        \
1000       out = d;                                  \
1001   }                                             \
1002 while (0)
1003
1004 static int
1005 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1006                RE, trt, inverse_trt, posix)
1007      Lisp_Object string;
1008      int pos;
1009      int pos_byte;
1010      int lim;
1011      int lim_byte;
1012      int n;
1013      int RE;
1014      Lisp_Object trt;
1015      Lisp_Object inverse_trt;
1016      int posix;
1017 {
1018   int len = SCHARS (string);
1019   int len_byte = SBYTES (string);
1020   register int i;
1021
1022   if (running_asynch_code)
1023     save_search_regs ();
1024
1025   /* Searching 0 times means don't move.  */
1026   /* Null string is found at starting position.  */
1027   if (len == 0 || n == 0)
1028     {
1029       set_search_regs (pos_byte, 0);
1030       return pos;
1031     }
1032
1033   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1034     {
1035       unsigned char *p1, *p2;
1036       int s1, s2;
1037       struct re_pattern_buffer *bufp;
1038
1039       bufp = compile_pattern (string, &search_regs, trt, posix,
1040                               !NILP (current_buffer->enable_multibyte_characters));
1041
1042       immediate_quit = 1;       /* Quit immediately if user types ^G,
1043                                    because letting this function finish
1044                                    can take too long. */
1045       QUIT;                     /* Do a pending quit right away,
1046                                    to avoid paradoxical behavior */
1047       /* Get pointers and sizes of the two strings
1048          that make up the visible portion of the buffer. */
1049
1050       p1 = BEGV_ADDR;
1051       s1 = GPT_BYTE - BEGV_BYTE;
1052       p2 = GAP_END_ADDR;
1053       s2 = ZV_BYTE - GPT_BYTE;
1054       if (s1 < 0)
1055         {
1056           p2 = p1;
1057           s2 = ZV_BYTE - BEGV_BYTE;
1058           s1 = 0;
1059         }
1060       if (s2 < 0)
1061         {
1062           s1 = ZV_BYTE - BEGV_BYTE;
1063           s2 = 0;
1064         }
1065       re_match_object = Qnil;
1066
1067       while (n < 0)
1068         {
1069           int val;
1070           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1071                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1072                              &search_regs,
1073                              /* Don't allow match past current point */
1074                              pos_byte - BEGV_BYTE);
1075           if (val == -2)
1076             {
1077               matcher_overflow ();
1078             }
1079           if (val >= 0)
1080             {
1081               pos_byte = search_regs.start[0] + BEGV_BYTE;
1082               for (i = 0; i < search_regs.num_regs; i++)
1083                 if (search_regs.start[i] >= 0)
1084                   {
1085                     search_regs.start[i]
1086                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1087                     search_regs.end[i]
1088                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1089                   }
1090               XSETBUFFER (last_thing_searched, current_buffer);
1091               /* Set pos to the new position. */
1092               pos = search_regs.start[0];
1093             }
1094           else
1095             {
1096               immediate_quit = 0;
1097               return (n);
1098             }
1099           n++;
1100         }
1101       while (n > 0)
1102         {
1103           int val;
1104           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1105                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106                              &search_regs,
1107                              lim_byte - BEGV_BYTE);
1108           if (val == -2)
1109             {
1110               matcher_overflow ();
1111             }
1112           if (val >= 0)
1113             {
1114               pos_byte = search_regs.end[0] + BEGV_BYTE;
1115               for (i = 0; i < search_regs.num_regs; i++)
1116                 if (search_regs.start[i] >= 0)
1117                   {
1118                     search_regs.start[i]
1119                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1120                     search_regs.end[i]
1121                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1122                   }
1123               XSETBUFFER (last_thing_searched, current_buffer);
1124               pos = search_regs.end[0];
1125             }
1126           else
1127             {
1128               immediate_quit = 0;
1129               return (0 - n);
1130             }
1131           n--;
1132         }
1133       immediate_quit = 0;
1134       return (pos);
1135     }
1136   else                          /* non-RE case */
1137     {
1138       unsigned char *raw_pattern, *pat;
1139       int raw_pattern_size;
1140       int raw_pattern_size_byte;
1141       unsigned char *patbuf;
1142       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1143       unsigned char *base_pat = SDATA (string);
1144       /* Set to nozero if we find a non-ASCII char that need
1145          translation.  */
1146       int char_base = 0;
1147       int boyer_moore_ok = 1;
1148
1149       /* MULTIBYTE says whether the text to be searched is multibyte.
1150          We must convert PATTERN to match that, or we will not really
1151          find things right.  */
1152
1153       if (multibyte == STRING_MULTIBYTE (string))
1154         {
1155           raw_pattern = (unsigned char *) SDATA (string);
1156           raw_pattern_size = SCHARS (string);
1157           raw_pattern_size_byte = SBYTES (string);
1158         }
1159       else if (multibyte)
1160         {
1161           raw_pattern_size = SCHARS (string);
1162           raw_pattern_size_byte
1163             = count_size_as_multibyte (SDATA (string),
1164                                        raw_pattern_size);
1165           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1166           copy_text (SDATA (string), raw_pattern,
1167                      SCHARS (string), 0, 1);
1168         }
1169       else
1170         {
1171           /* Converting multibyte to single-byte.
1172
1173              ??? Perhaps this conversion should be done in a special way
1174              by subtracting nonascii-insert-offset from each non-ASCII char,
1175              so that only the multibyte chars which really correspond to
1176              the chosen single-byte character set can possibly match.  */
1177           raw_pattern_size = SCHARS (string);
1178           raw_pattern_size_byte = SCHARS (string);
1179           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1180           copy_text (SDATA (string), raw_pattern,
1181                      SBYTES (string), 1, 0);
1182         }
1183
1184       /* Copy and optionally translate the pattern.  */
1185       len = raw_pattern_size;
1186       len_byte = raw_pattern_size_byte;
1187       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1188       pat = patbuf;
1189       base_pat = raw_pattern;
1190       if (multibyte)
1191         {
1192           /* Fill patbuf by translated characters in STRING while
1193              checking if we can use boyer-moore search.  If TRT is
1194              non-nil, we can use boyer-moore search only if TRT can be
1195              represented by the byte array of 256 elements.  For that,
1196              all non-ASCII case-equivalents of all case-senstive
1197              characters in STRING must belong to the same charset and
1198              row.  */
1199
1200           while (--len >= 0)
1201             {
1202               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1203               int c, translated, inverse;
1204               int in_charlen, charlen;
1205
1206               /* If we got here and the RE flag is set, it's because we're
1207                  dealing with a regexp known to be trivial, so the backslash
1208                  just quotes the next character.  */
1209               if (RE && *base_pat == '\\')
1210                 {
1211                   len--;
1212                   len_byte--;
1213                   base_pat++;
1214                 }
1215
1216               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1217
1218               if (NILP (trt))
1219                 {
1220                   str = base_pat;
1221                   charlen = in_charlen;
1222                 }
1223               else
1224                 {
1225                   /* Translate the character.  */
1226                   TRANSLATE (translated, trt, c);
1227                   charlen = CHAR_STRING (translated, str_base);
1228                   str = str_base;
1229
1230                   /* Check if C has any other case-equivalents.  */
1231                   TRANSLATE (inverse, inverse_trt, c);
1232                   /* If so, check if we can use boyer-moore.  */
1233                   if (c != inverse && boyer_moore_ok)
1234                     {
1235                       /* Check if all equivalents belong to the same
1236                          group of characters.  Note that the check of C
1237                          itself is done by the last iteration.  Note
1238                          also that we don't have to check ASCII
1239                          characters because boyer-moore search can
1240                          always handle their translation.  */
1241                       while (1)
1242                         {
1243                           if (! ASCII_BYTE_P (inverse))
1244                             {
1245                               if (CHAR_BYTE8_P (inverse))
1246                                 {
1247                                   /* Boyer-moore search can't handle a
1248                                      translation of an eight-bit
1249                                      character.  */
1250                                   boyer_moore_ok = 0;
1251                                   break;
1252                                 }
1253                               else if (char_base == 0)
1254                                 char_base = inverse & ~0x3F;
1255                               else if ((inverse & ~0x3F)
1256                                        != char_base)
1257                                 {
1258                                   boyer_moore_ok = 0;
1259                                   break;
1260                                 }
1261                             }
1262                           if (c == inverse)
1263                             break;
1264                           TRANSLATE (inverse, inverse_trt, inverse);
1265                         }
1266                     }
1267                 }
1268
1269               /* Store this character into the translated pattern.  */
1270               bcopy (str, pat, charlen);
1271               pat += charlen;
1272               base_pat += in_charlen;
1273               len_byte -= in_charlen;
1274             }
1275         }
1276       else
1277         {
1278           /* Unibyte buffer.  */
1279           char_base = 0;
1280           while (--len >= 0)
1281             {
1282               int c, translated;
1283
1284               /* If we got here and the RE flag is set, it's because we're
1285                  dealing with a regexp known to be trivial, so the backslash
1286                  just quotes the next character.  */
1287               if (RE && *base_pat == '\\')
1288                 {
1289                   len--;
1290                   raw_pattern_size--;
1291                   base_pat++;
1292                 }
1293               c = *base_pat++;
1294               TRANSLATE (translated, trt, c);
1295               *pat++ = translated;
1296             }
1297         }
1298
1299       len_byte = pat - patbuf;
1300       len = raw_pattern_size;
1301       pat = base_pat = patbuf;
1302
1303       if (boyer_moore_ok)
1304         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1305                             pos, pos_byte, lim, lim_byte,
1306                             char_base);
1307       else
1308         return simple_search (n, pat, len, len_byte, trt,
1309                               pos, pos_byte, lim, lim_byte);
1310     }
1311 }
1312 \f
1313 /* Do a simple string search N times for the string PAT,
1314    whose length is LEN/LEN_BYTE,
1315    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1316    TRT is the translation table.
1317
1318    Return the character position where the match is found.
1319    Otherwise, if M matches remained to be found, return -M.
1320
1321    This kind of search works regardless of what is in PAT and
1322    regardless of what is in TRT.  It is used in cases where
1323    boyer_moore cannot work.  */
1324
1325 static int
1326 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1327      int n;
1328      unsigned char *pat;
1329      int len, len_byte;
1330      Lisp_Object trt;
1331      int pos, pos_byte;
1332      int lim, lim_byte;
1333 {
1334   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1335   int forward = n > 0;
1336
1337   if (lim > pos && multibyte)
1338     while (n > 0)
1339       {
1340         while (1)
1341           {
1342             /* Try matching at position POS.  */
1343             int this_pos = pos;
1344             int this_pos_byte = pos_byte;
1345             int this_len = len;
1346             int this_len_byte = len_byte;
1347             unsigned char *p = pat;
1348             if (pos + len > lim)
1349               goto stop;
1350
1351             while (this_len > 0)
1352               {
1353                 int charlen, buf_charlen;
1354                 int pat_ch, buf_ch;
1355
1356                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1357                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1358                                                  ZV_BYTE - this_pos_byte,
1359                                                  buf_charlen);
1360                 TRANSLATE (buf_ch, trt, buf_ch);
1361
1362                 if (buf_ch != pat_ch)
1363                   break;
1364
1365                 this_len_byte -= charlen;
1366                 this_len--;
1367                 p += charlen;
1368
1369                 this_pos_byte += buf_charlen;
1370                 this_pos++;
1371               }
1372
1373             if (this_len == 0)
1374               {
1375                 pos += len;
1376                 pos_byte += len_byte;
1377                 break;
1378               }
1379
1380             INC_BOTH (pos, pos_byte);
1381           }
1382
1383         n--;
1384       }
1385   else if (lim > pos)
1386     while (n > 0)
1387       {
1388         while (1)
1389           {
1390             /* Try matching at position POS.  */
1391             int this_pos = pos;
1392             int this_len = len;
1393             unsigned char *p = pat;
1394
1395             if (pos + len > lim)
1396               goto stop;
1397
1398             while (this_len > 0)
1399               {
1400                 int pat_ch = *p++;
1401                 int buf_ch = FETCH_BYTE (this_pos);
1402                 TRANSLATE (buf_ch, trt, buf_ch);
1403
1404                 if (buf_ch != pat_ch)
1405                   break;
1406
1407                 this_len--;
1408                 this_pos++;
1409               }
1410
1411             if (this_len == 0)
1412               {
1413                 pos += len;
1414                 break;
1415               }
1416
1417             pos++;
1418           }
1419
1420         n--;
1421       }
1422   /* Backwards search.  */
1423   else if (lim < pos && multibyte)
1424     while (n < 0)
1425       {
1426         while (1)
1427           {
1428             /* Try matching at position POS.  */
1429             int this_pos = pos - len;
1430             int this_pos_byte;
1431             int this_len = len;
1432             int this_len_byte = len_byte;
1433             unsigned char *p = pat;
1434
1435             if (pos - len < lim)
1436               goto stop;
1437             this_pos_byte = CHAR_TO_BYTE (this_pos);
1438
1439             while (this_len > 0)
1440               {
1441                 int charlen, buf_charlen;
1442                 int pat_ch, buf_ch;
1443
1444                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1445                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1446                                                  ZV_BYTE - this_pos_byte,
1447                                                  buf_charlen);
1448                 TRANSLATE (buf_ch, trt, buf_ch);
1449
1450                 if (buf_ch != pat_ch)
1451                   break;
1452
1453                 this_len_byte -= charlen;
1454                 this_len--;
1455                 p += charlen;
1456                 this_pos_byte += buf_charlen;
1457                 this_pos++;
1458               }
1459
1460             if (this_len == 0)
1461               {
1462                 pos -= len;
1463                 pos_byte -= len_byte;
1464                 break;
1465               }
1466
1467             DEC_BOTH (pos, pos_byte);
1468           }
1469
1470         n++;
1471       }
1472   else if (lim < pos)
1473     while (n < 0)
1474       {
1475         while (1)
1476           {
1477             /* Try matching at position POS.  */
1478             int this_pos = pos - len;
1479             int this_len = len;
1480             unsigned char *p = pat;
1481
1482             if (pos - len < lim)
1483               goto stop;
1484
1485             while (this_len > 0)
1486               {
1487                 int pat_ch = *p++;
1488                 int buf_ch = FETCH_BYTE (this_pos);
1489                 TRANSLATE (buf_ch, trt, buf_ch);
1490
1491                 if (buf_ch != pat_ch)
1492                   break;
1493                 this_len--;
1494                 this_pos++;
1495               }
1496
1497             if (this_len == 0)
1498               {
1499                 pos -= len;
1500                 break;
1501               }
1502
1503             pos--;
1504           }
1505
1506         n++;
1507       }
1508
1509  stop:
1510   if (n == 0)
1511     {
1512       if (forward)
1513         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1514       else
1515         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1516
1517       return pos;
1518     }
1519   else if (n > 0)
1520     return -n;
1521   else
1522     return n;
1523 }
1524 \f
1525 /* Do Boyer-Moore search N times for the string BASE_PAT,
1526    whose length is LEN/LEN_BYTE,
1527    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1528    DIRECTION says which direction we search in.
1529    TRT and INVERSE_TRT are translation tables.
1530    Characters in PAT are already translated by TRT.
1531
1532    This kind of search works if all the characters in BASE_PAT that
1533    have nontrivial translation are the same aside from the last byte.
1534    This makes it possible to translate just the last byte of a
1535    character, and do so after just a simple test of the context.
1536    CHAR_BASE is nonzero iff there is such a non-ASCII character.
1537
1538    If that criterion is not satisfied, do not call this function.  */
1539
1540 static int
1541 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1542              pos, pos_byte, lim, lim_byte, char_base)
1543      int n;
1544      unsigned char *base_pat;
1545      int len, len_byte;
1546      Lisp_Object trt;
1547      Lisp_Object inverse_trt;
1548      int pos, pos_byte;
1549      int lim, lim_byte;
1550      int char_base;
1551 {
1552   int direction = ((n > 0) ? 1 : -1);
1553   register int dirlen;
1554   int infinity, limit, stride_for_teases = 0;
1555   register int *BM_tab;
1556   int *BM_tab_base;
1557   register unsigned char *cursor, *p_limit;
1558   register int i, j;
1559   unsigned char *pat, *pat_end;
1560   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1561
1562   unsigned char simple_translate[0400];
1563   /* These are set to the preceding bytes of a byte to be translated
1564      if charset_base is nonzero.  As the maximum byte length of a
1565      multibyte character is 5, we have to check at most four previous
1566      bytes.  */
1567   int translate_prev_byte1 = 0;
1568   int translate_prev_byte2 = 0;
1569   int translate_prev_byte3 = 0;
1570   int translate_prev_byte4 = 0;
1571
1572 #ifdef C_ALLOCA
1573   int BM_tab_space[0400];
1574   BM_tab = &BM_tab_space[0];
1575 #else
1576   BM_tab = (int *) alloca (0400 * sizeof (int));
1577 #endif
1578   /* The general approach is that we are going to maintain that we know */
1579   /* the first (closest to the present position, in whatever direction */
1580   /* we're searching) character that could possibly be the last */
1581   /* (furthest from present position) character of a valid match.  We */
1582   /* advance the state of our knowledge by looking at that character */
1583   /* and seeing whether it indeed matches the last character of the */
1584   /* pattern.  If it does, we take a closer look.  If it does not, we */
1585   /* move our pointer (to putative last characters) as far as is */
1586   /* logically possible.  This amount of movement, which I call a */
1587   /* stride, will be the length of the pattern if the actual character */
1588   /* appears nowhere in the pattern, otherwise it will be the distance */
1589   /* from the last occurrence of that character to the end of the */
1590   /* pattern. */
1591   /* As a coding trick, an enormous stride is coded into the table for */
1592   /* characters that match the last character.  This allows use of only */
1593   /* a single test, a test for having gone past the end of the */
1594   /* permissible match region, to test for both possible matches (when */
1595   /* the stride goes past the end immediately) and failure to */
1596   /* match (where you get nudged past the end one stride at a time). */
1597
1598   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1599   /* is determined only by the last character of the putative match. */
1600   /* If that character does not match, we will stride the proper */
1601   /* distance to propose a match that superimposes it on the last */
1602   /* instance of a character that matches it (per trt), or misses */
1603   /* it entirely if there is none. */
1604
1605   dirlen = len_byte * direction;
1606   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1607
1608   /* Record position after the end of the pattern.  */
1609   pat_end = base_pat + len_byte;
1610   /* BASE_PAT points to a character that we start scanning from.
1611      It is the first character in a forward search,
1612      the last character in a backward search.  */
1613   if (direction < 0)
1614     base_pat = pat_end - 1;
1615
1616   BM_tab_base = BM_tab;
1617   BM_tab += 0400;
1618   j = dirlen;           /* to get it in a register */
1619   /* A character that does not appear in the pattern induces a */
1620   /* stride equal to the pattern length. */
1621   while (BM_tab_base != BM_tab)
1622     {
1623       *--BM_tab = j;
1624       *--BM_tab = j;
1625       *--BM_tab = j;
1626       *--BM_tab = j;
1627     }
1628
1629   /* We use this for translation, instead of TRT itself.
1630      We fill this in to handle the characters that actually
1631      occur in the pattern.  Others don't matter anyway!  */
1632   bzero (simple_translate, sizeof simple_translate);
1633   for (i = 0; i < 0400; i++)
1634     simple_translate[i] = i;
1635
1636   if (char_base)
1637     {
1638       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1639          byte following them are the target of translation.  */
1640       unsigned char str[MAX_MULTIBYTE_LENGTH];
1641       int len = CHAR_STRING (char_base, str);
1642
1643       translate_prev_byte1 = str[len - 2];
1644       if (len > 2)
1645         {
1646           translate_prev_byte2 = str[len - 3];
1647           if (len > 3)
1648             {
1649               translate_prev_byte3 = str[len - 4];
1650               if (len > 4)
1651                 translate_prev_byte4 = str[len - 5];
1652             }
1653         }
1654     }
1655
1656   i = 0;
1657   while (i != infinity)
1658     {
1659       unsigned char *ptr = base_pat + i;
1660       i += direction;
1661       if (i == dirlen)
1662         i = infinity;
1663       if (! NILP (trt))
1664         {
1665           /* If the byte currently looking at is a head of a character
1666              to check case-equivalents, set CH to that character.  An
1667              ASCII character and a non-ASCII character matching with
1668              CHAR_BASE are to be checked.  */
1669           int ch = -1;
1670
1671           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1672             ch = *ptr;
1673           else if (char_base && CHAR_HEAD_P (*ptr))
1674             {
1675               ch = STRING_CHAR (ptr, pat_end - ptr);
1676               if (char_base != (ch & ~0x3F))
1677                 ch = -1;
1678             }
1679
1680           j = *ptr;
1681           if (i == infinity)
1682             stride_for_teases = BM_tab[j];
1683
1684           BM_tab[j] = dirlen - i;
1685           /* A translation table is accompanied by its inverse -- see */
1686           /* comment following downcase_table for details */
1687           if (ch >= 0)
1688             {
1689               int starting_ch = ch;
1690               int starting_j;
1691
1692               if (ch > 0400)
1693                 starting_j = (ch & ~0x3F) | 0200;
1694               else
1695                 starting_j = ch;
1696               while (1)
1697                 {
1698                   TRANSLATE (ch, inverse_trt, ch);
1699                   if (ch > 0400)
1700                     j = (ch & ~0x3F) | 0200;
1701                   else
1702                     j = ch;
1703
1704                   /* For all the characters that map into CH,
1705                      set up simple_translate to map the last byte
1706                      into STARTING_J.  */
1707                   simple_translate[j] = starting_j;
1708                   if (ch == starting_ch)
1709                     break;
1710                   BM_tab[j] = dirlen - i;
1711                 }
1712             }
1713         }
1714       else
1715         {
1716           j = *ptr;
1717
1718           if (i == infinity)
1719             stride_for_teases = BM_tab[j];
1720           BM_tab[j] = dirlen - i;
1721         }
1722       /* stride_for_teases tells how much to stride if we get a */
1723       /* match on the far character but are subsequently */
1724       /* disappointed, by recording what the stride would have been */
1725       /* for that character if the last character had been */
1726       /* different. */
1727     }
1728   infinity = dirlen - infinity;
1729   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1730   /* loop invariant - POS_BYTE points at where last char (first
1731      char if reverse) of pattern would align in a possible match.  */
1732   while (n != 0)
1733     {
1734       int tail_end;
1735       unsigned char *tail_end_ptr;
1736
1737       /* It's been reported that some (broken) compiler thinks that
1738          Boolean expressions in an arithmetic context are unsigned.
1739          Using an explicit ?1:0 prevents this.  */
1740       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1741           < 0)
1742         return (n * (0 - direction));
1743       /* First we do the part we can by pointers (maybe nothing) */
1744       QUIT;
1745       pat = base_pat;
1746       limit = pos_byte - dirlen + direction;
1747       if (direction > 0)
1748         {
1749           limit = BUFFER_CEILING_OF (limit);
1750           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1751              can take on without hitting edge of buffer or the gap.  */
1752           limit = min (limit, pos_byte + 20000);
1753           limit = min (limit, lim_byte - 1);
1754         }
1755       else
1756         {
1757           limit = BUFFER_FLOOR_OF (limit);
1758           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1759              can take on without hitting edge of buffer or the gap.  */
1760           limit = max (limit, pos_byte - 20000);
1761           limit = max (limit, lim_byte);
1762         }
1763       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1764       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1765
1766       if ((limit - pos_byte) * direction > 20)
1767         {
1768           unsigned char *p2;
1769
1770           p_limit = BYTE_POS_ADDR (limit);
1771           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1772           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1773           while (1)             /* use one cursor setting as long as i can */
1774             {
1775               if (direction > 0) /* worth duplicating */
1776                 {
1777                   /* Use signed comparison if appropriate
1778                      to make cursor+infinity sure to be > p_limit.
1779                      Assuming that the buffer lies in a range of addresses
1780                      that are all "positive" (as ints) or all "negative",
1781                      either kind of comparison will work as long
1782                      as we don't step by infinity.  So pick the kind
1783                      that works when we do step by infinity.  */
1784                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1785                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1786                       cursor += BM_tab[*cursor];
1787                   else
1788                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1789                       cursor += BM_tab[*cursor];
1790                 }
1791               else
1792                 {
1793                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1794                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1795                       cursor += BM_tab[*cursor];
1796                   else
1797                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1798                       cursor += BM_tab[*cursor];
1799                 }
1800 /* If you are here, cursor is beyond the end of the searched region. */
1801 /* This can happen if you match on the far character of the pattern, */
1802 /* because the "stride" of that character is infinity, a number able */
1803 /* to throw you well beyond the end of the search.  It can also */
1804 /* happen if you fail to match within the permitted region and would */
1805 /* otherwise try a character beyond that region */
1806               if ((cursor - p_limit) * direction <= len_byte)
1807                 break;  /* a small overrun is genuine */
1808               cursor -= infinity; /* large overrun = hit */
1809               i = dirlen - direction;
1810               if (! NILP (trt))
1811                 {
1812                   while ((i -= direction) + direction != 0)
1813                     {
1814                       int ch;
1815                       cursor -= direction;
1816                       /* Translate only the last byte of a character.  */
1817                       if (! multibyte
1818                           || ((cursor == tail_end_ptr
1819                                || CHAR_HEAD_P (cursor[1]))
1820                               && (CHAR_HEAD_P (cursor[0])
1821                                   /* Check if this is the last byte of
1822                                      a translable character.  */
1823                                   || (translate_prev_byte1 == cursor[-1]
1824                                       && (CHAR_HEAD_P (translate_prev_byte1)
1825                                           || (translate_prev_byte2 == cursor[-2]
1826                                               && (CHAR_HEAD_P (translate_prev_byte2)
1827                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1828                         ch = simple_translate[*cursor];
1829                       else
1830                         ch = *cursor;
1831                       if (pat[i] != ch)
1832                         break;
1833                     }
1834                 }
1835               else
1836                 {
1837                   while ((i -= direction) + direction != 0)
1838                     {
1839                       cursor -= direction;
1840                       if (pat[i] != *cursor)
1841                         break;
1842                     }
1843                 }
1844               cursor += dirlen - i - direction; /* fix cursor */
1845               if (i + direction == 0)
1846                 {
1847                   int position;
1848
1849                   cursor -= direction;
1850
1851                   position = pos_byte + cursor - p2 + ((direction > 0)
1852                                                        ? 1 - len_byte : 0);
1853                   set_search_regs (position, len_byte);
1854
1855                   if ((n -= direction) != 0)
1856                     cursor += dirlen; /* to resume search */
1857                   else
1858                     return ((direction > 0)
1859                             ? search_regs.end[0] : search_regs.start[0]);
1860                 }
1861               else
1862                 cursor += stride_for_teases; /* <sigh> we lose -  */
1863             }
1864           pos_byte += cursor - p2;
1865         }
1866       else
1867         /* Now we'll pick up a clump that has to be done the hard */
1868         /* way because it covers a discontinuity */
1869         {
1870           limit = ((direction > 0)
1871                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1872                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1873           limit = ((direction > 0)
1874                    ? min (limit + len_byte, lim_byte - 1)
1875                    : max (limit - len_byte, lim_byte));
1876           /* LIMIT is now the last value POS_BYTE can have
1877              and still be valid for a possible match.  */
1878           while (1)
1879             {
1880               /* This loop can be coded for space rather than */
1881               /* speed because it will usually run only once. */
1882               /* (the reach is at most len + 21, and typically */
1883               /* does not exceed len) */
1884               while ((limit - pos_byte) * direction >= 0)
1885                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1886               /* now run the same tests to distinguish going off the */
1887               /* end, a match or a phony match. */
1888               if ((pos_byte - limit) * direction <= len_byte)
1889                 break;  /* ran off the end */
1890               /* Found what might be a match.
1891                  Set POS_BYTE back to last (first if reverse) pos.  */
1892               pos_byte -= infinity;
1893               i = dirlen - direction;
1894               while ((i -= direction) + direction != 0)
1895                 {
1896                   int ch;
1897                   unsigned char *ptr;
1898                   pos_byte -= direction;
1899                   ptr = BYTE_POS_ADDR (pos_byte);
1900                   /* Translate only the last byte of a character.  */
1901                   if (! multibyte
1902                       || ((ptr == tail_end_ptr
1903                            || CHAR_HEAD_P (ptr[1]))
1904                           && (CHAR_HEAD_P (ptr[0])
1905                               /* Check if this is the last byte of a
1906                                  translable character.  */
1907                               || (translate_prev_byte1 == ptr[-1]
1908                                   && (CHAR_HEAD_P (translate_prev_byte1)
1909                                       || (translate_prev_byte2 == ptr[-2]
1910                                           && (CHAR_HEAD_P (translate_prev_byte2)
1911                                               || translate_prev_byte3 == ptr[-3])))))))
1912                     ch = simple_translate[*ptr];
1913                   else
1914                     ch = *ptr;
1915                   if (pat[i] != ch)
1916                     break;
1917                 }
1918               /* Above loop has moved POS_BYTE part or all the way
1919                  back to the first pos (last pos if reverse).
1920                  Set it once again at the last (first if reverse) char.  */
1921               pos_byte += dirlen - i- direction;
1922               if (i + direction == 0)
1923                 {
1924                   int position;
1925                   pos_byte -= direction;
1926
1927                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1928
1929                   set_search_regs (position, len_byte);
1930
1931                   if ((n -= direction) != 0)
1932                     pos_byte += dirlen; /* to resume search */
1933                   else
1934                     return ((direction > 0)
1935                             ? search_regs.end[0] : search_regs.start[0]);
1936                 }
1937               else
1938                 pos_byte += stride_for_teases;
1939             }
1940           }
1941       /* We have done one clump.  Can we continue? */
1942       if ((lim_byte - pos_byte) * direction < 0)
1943         return ((0 - n) * direction);
1944     }
1945   return BYTE_TO_CHAR (pos_byte);
1946 }
1947
1948 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1949    for the overall match just found in the current buffer.
1950    Also clear out the match data for registers 1 and up.  */
1951
1952 static void
1953 set_search_regs (beg_byte, nbytes)
1954      int beg_byte, nbytes;
1955 {
1956   int i;
1957
1958   /* Make sure we have registers in which to store
1959      the match position.  */
1960   if (search_regs.num_regs == 0)
1961     {
1962       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1963       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1964       search_regs.num_regs = 2;
1965     }
1966
1967   /* Clear out the other registers.  */
1968   for (i = 1; i < search_regs.num_regs; i++)
1969     {
1970       search_regs.start[i] = -1;
1971       search_regs.end[i] = -1;
1972     }
1973
1974   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1975   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1976   XSETBUFFER (last_thing_searched, current_buffer);
1977 }
1978 \f
1979 /* Given a string of words separated by word delimiters,
1980   compute a regexp that matches those exact words
1981   separated by arbitrary punctuation.  */
1982
1983 static Lisp_Object
1984 wordify (string)
1985      Lisp_Object string;
1986 {
1987   register unsigned char *p, *o;
1988   register int i, i_byte, len, punct_count = 0, word_count = 0;
1989   Lisp_Object val;
1990   int prev_c = 0;
1991   int adjust;
1992
1993   CHECK_STRING (string);
1994   p = SDATA (string);
1995   len = SCHARS (string);
1996
1997   for (i = 0, i_byte = 0; i < len; )
1998     {
1999       int c;
2000
2001       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2002
2003       if (SYNTAX (c) != Sword)
2004         {
2005           punct_count++;
2006           if (i > 0 && SYNTAX (prev_c) == Sword)
2007             word_count++;
2008         }
2009
2010       prev_c = c;
2011     }
2012
2013   if (SYNTAX (prev_c) == Sword)
2014     word_count++;
2015   if (!word_count)
2016     return empty_string;
2017
2018   adjust = - punct_count + 5 * (word_count - 1) + 4;
2019   if (STRING_MULTIBYTE (string))
2020     val = make_uninit_multibyte_string (len + adjust,
2021                                         SBYTES (string)
2022                                         + adjust);
2023   else
2024     val = make_uninit_string (len + adjust);
2025
2026   o = SDATA (val);
2027   *o++ = '\\';
2028   *o++ = 'b';
2029   prev_c = 0;
2030
2031   for (i = 0, i_byte = 0; i < len; )
2032     {
2033       int c;
2034       int i_byte_orig = i_byte;
2035
2036       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2037
2038       if (SYNTAX (c) == Sword)
2039         {
2040           bcopy (SDATA (string) + i_byte_orig, o,
2041                  i_byte - i_byte_orig);
2042           o += i_byte - i_byte_orig;
2043         }
2044       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2045         {
2046           *o++ = '\\';
2047           *o++ = 'W';
2048           *o++ = '\\';
2049           *o++ = 'W';
2050           *o++ = '*';
2051         }
2052
2053       prev_c = c;
2054     }
2055
2056   *o++ = '\\';
2057   *o++ = 'b';
2058
2059   return val;
2060 }
2061 \f
2062 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2063        "MSearch backward: ",
2064        doc: /* Search backward from point for STRING.
2065 Set point to the beginning of the occurrence found, and return point.
2066 An optional second argument bounds the search; it is a buffer position.
2067 The match found must not extend before that position.
2068 Optional third argument, if t, means if fail just return nil (no error).
2069  If not nil and not t, position at limit of search and return nil.
2070 Optional fourth argument is repeat count--search for successive occurrences.
2071
2072 Search case-sensitivity is determined by the value of the variable
2073 `case-fold-search', which see.
2074
2075 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2076      (string, bound, noerror, count)
2077      Lisp_Object string, bound, noerror, count;
2078 {
2079   return search_command (string, bound, noerror, count, -1, 0, 0);
2080 }
2081
2082 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2083        doc: /* Search forward from point for STRING.
2084 Set point to the end of the occurrence found, and return point.
2085 An optional second argument bounds the search; it is a buffer position.
2086 The match found must not extend after that position.  nil is equivalent
2087   to (point-max).
2088 Optional third argument, if t, means if fail just return nil (no error).
2089   If not nil and not t, move to limit of search and return nil.
2090 Optional fourth argument is repeat count--search for successive occurrences.
2091
2092 Search case-sensitivity is determined by the value of the variable
2093 `case-fold-search', which see.
2094
2095 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2096      (string, bound, noerror, count)
2097      Lisp_Object string, bound, noerror, count;
2098 {
2099   return search_command (string, bound, noerror, count, 1, 0, 0);
2100 }
2101
2102 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2103        "sWord search backward: ",
2104        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2105 Set point to the beginning of the occurrence found, and return point.
2106 An optional second argument bounds the search; it is a buffer position.
2107 The match found must not extend before that position.
2108 Optional third argument, if t, means if fail just return nil (no error).
2109   If not nil and not t, move to limit of search and return nil.
2110 Optional fourth argument is repeat count--search for successive occurrences.  */)
2111      (string, bound, noerror, count)
2112      Lisp_Object string, bound, noerror, count;
2113 {
2114   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2115 }
2116
2117 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2118        "sWord search: ",
2119        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2120 Set point to the end of the occurrence found, and return point.
2121 An optional second argument bounds the search; it is a buffer position.
2122 The match found must not extend after that position.
2123 Optional third argument, if t, means if fail just return nil (no error).
2124   If not nil and not t, move to limit of search and return nil.
2125 Optional fourth argument is repeat count--search for successive occurrences.  */)
2126      (string, bound, noerror, count)
2127      Lisp_Object string, bound, noerror, count;
2128 {
2129   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2130 }
2131
2132 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2133        "sRE search backward: ",
2134        doc: /* Search backward from point for match for regular expression REGEXP.
2135 Set point to the beginning of the match, and return point.
2136 The match found is the one starting last in the buffer
2137 and yet ending before the origin of the search.
2138 An optional second argument bounds the search; it is a buffer position.
2139 The match found must start at or after that position.
2140 Optional third argument, if t, means if fail just return nil (no error).
2141   If not nil and not t, move to limit of search and return nil.
2142 Optional fourth argument is repeat count--search for successive occurrences.
2143 See also the functions `match-beginning', `match-end', `match-string',
2144 and `replace-match'.  */)
2145      (regexp, bound, noerror, count)
2146      Lisp_Object regexp, bound, noerror, count;
2147 {
2148   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2149 }
2150
2151 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2152        "sRE search: ",
2153        doc: /* Search forward from point for regular expression REGEXP.
2154 Set point to the end of the occurrence found, and return point.
2155 An optional second argument bounds the search; it is a buffer position.
2156 The match found must not extend after that position.
2157 Optional third argument, if t, means if fail just return nil (no error).
2158   If not nil and not t, move to limit of search and return nil.
2159 Optional fourth argument is repeat count--search for successive occurrences.
2160 See also the functions `match-beginning', `match-end', `match-string',
2161 and `replace-match'.  */)
2162      (regexp, bound, noerror, count)
2163      Lisp_Object regexp, bound, noerror, count;
2164 {
2165   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2166 }
2167
2168 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2169        "sPosix search backward: ",
2170        doc: /* Search backward from point for match for regular expression REGEXP.
2171 Find the longest match in accord with Posix regular expression rules.
2172 Set point to the beginning of the match, and return point.
2173 The match found is the one starting last in the buffer
2174 and yet ending before the origin of the search.
2175 An optional second argument bounds the search; it is a buffer position.
2176 The match found must start at or after that position.
2177 Optional third argument, if t, means if fail just return nil (no error).
2178   If not nil and not t, move to limit of search and return nil.
2179 Optional fourth argument is repeat count--search for successive occurrences.
2180 See also the functions `match-beginning', `match-end', `match-string',
2181 and `replace-match'.  */)
2182      (regexp, bound, noerror, count)
2183      Lisp_Object regexp, bound, noerror, count;
2184 {
2185   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2186 }
2187
2188 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2189        "sPosix search: ",
2190        doc: /* Search forward from point for regular expression REGEXP.
2191 Find the longest match in accord with Posix regular expression rules.
2192 Set point to the end of the occurrence found, and return point.
2193 An optional second argument bounds the search; it is a buffer position.
2194 The match found must not extend after that position.
2195 Optional third argument, if t, means if fail just return nil (no error).
2196   If not nil and not t, move to limit of search and return nil.
2197 Optional fourth argument is repeat count--search for successive occurrences.
2198 See also the functions `match-beginning', `match-end', `match-string',
2199 and `replace-match'.  */)
2200      (regexp, bound, noerror, count)
2201      Lisp_Object regexp, bound, noerror, count;
2202 {
2203   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2204 }
2205 \f
2206 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2207        doc: /* Replace text matched by last search with NEWTEXT.
2208 Leave point at the end of the replacement text.
2209
2210 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2211 Otherwise maybe capitalize the whole text, or maybe just word initials,
2212 based on the replaced text.
2213 If the replaced text has only capital letters
2214 and has at least one multiletter word, convert NEWTEXT to all caps.
2215 Otherwise if all words are capitalized in the replaced text,
2216 capitalize each word in NEWTEXT.
2217
2218 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2219 Otherwise treat `\\' as special:
2220   `\\&' in NEWTEXT means substitute original matched text.
2221   `\\N' means substitute what matched the Nth `\\(...\\)'.
2222        If Nth parens didn't match, substitute nothing.
2223   `\\\\' means insert one `\\'.
2224 Case conversion does not apply to these substitutions.
2225
2226 FIXEDCASE and LITERAL are optional arguments.
2227
2228 The optional fourth argument STRING can be a string to modify.
2229 This is meaningful when the previous match was done against STRING,
2230 using `string-match'.  When used this way, `replace-match'
2231 creates and returns a new string made by copying STRING and replacing
2232 the part of STRING that was matched.
2233
2234 The optional fifth argument SUBEXP specifies a subexpression;
2235 it says to replace just that subexpression with NEWTEXT,
2236 rather than replacing the entire matched text.
2237 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2238 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2239 NEWTEXT in place of subexp N.
2240 This is useful only after a regular expression search or match,
2241 since only regular expressions have distinguished subexpressions.  */)
2242      (newtext, fixedcase, literal, string, subexp)
2243      Lisp_Object newtext, fixedcase, literal, string, subexp;
2244 {
2245   enum { nochange, all_caps, cap_initial } case_action;
2246   register int pos, pos_byte;
2247   int some_multiletter_word;
2248   int some_lowercase;
2249   int some_uppercase;
2250   int some_nonuppercase_initial;
2251   register int c, prevc;
2252   int sub;
2253   int opoint, newpoint;
2254
2255   CHECK_STRING (newtext);
2256
2257   if (! NILP (string))
2258     CHECK_STRING (string);
2259
2260   case_action = nochange;       /* We tried an initialization */
2261                                 /* but some C compilers blew it */
2262
2263   if (search_regs.num_regs <= 0)
2264     error ("replace-match called before any match found");
2265
2266   if (NILP (subexp))
2267     sub = 0;
2268   else
2269     {
2270       CHECK_NUMBER (subexp);
2271       sub = XINT (subexp);
2272       if (sub < 0 || sub >= search_regs.num_regs)
2273         args_out_of_range (subexp, make_number (search_regs.num_regs));
2274     }
2275
2276   if (NILP (string))
2277     {
2278       if (search_regs.start[sub] < BEGV
2279           || search_regs.start[sub] > search_regs.end[sub]
2280           || search_regs.end[sub] > ZV)
2281         args_out_of_range (make_number (search_regs.start[sub]),
2282                            make_number (search_regs.end[sub]));
2283     }
2284   else
2285     {
2286       if (search_regs.start[sub] < 0
2287           || search_regs.start[sub] > search_regs.end[sub]
2288           || search_regs.end[sub] > SCHARS (string))
2289         args_out_of_range (make_number (search_regs.start[sub]),
2290                            make_number (search_regs.end[sub]));
2291     }
2292
2293   if (NILP (fixedcase))
2294     {
2295       /* Decide how to casify by examining the matched text. */
2296       int last;
2297
2298       pos = search_regs.start[sub];
2299       last = search_regs.end[sub];
2300
2301       if (NILP (string))
2302         pos_byte = CHAR_TO_BYTE (pos);
2303       else
2304         pos_byte = string_char_to_byte (string, pos);
2305
2306       prevc = '\n';
2307       case_action = all_caps;
2308
2309       /* some_multiletter_word is set nonzero if any original word
2310          is more than one letter long. */
2311       some_multiletter_word = 0;
2312       some_lowercase = 0;
2313       some_nonuppercase_initial = 0;
2314       some_uppercase = 0;
2315
2316       while (pos < last)
2317         {
2318           if (NILP (string))
2319             {
2320               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2321               INC_BOTH (pos, pos_byte);
2322             }
2323           else
2324             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2325
2326           if (LOWERCASEP (c))
2327             {
2328               /* Cannot be all caps if any original char is lower case */
2329
2330               some_lowercase = 1;
2331               if (SYNTAX (prevc) != Sword)
2332                 some_nonuppercase_initial = 1;
2333               else
2334                 some_multiletter_word = 1;
2335             }
2336           else if (!NOCASEP (c))
2337             {
2338               some_uppercase = 1;
2339               if (SYNTAX (prevc) != Sword)
2340                 ;
2341               else
2342                 some_multiletter_word = 1;
2343             }
2344           else
2345             {
2346               /* If the initial is a caseless word constituent,
2347                  treat that like a lowercase initial.  */
2348               if (SYNTAX (prevc) != Sword)
2349                 some_nonuppercase_initial = 1;
2350             }
2351
2352           prevc = c;
2353         }
2354
2355       /* Convert to all caps if the old text is all caps
2356          and has at least one multiletter word.  */
2357       if (! some_lowercase && some_multiletter_word)
2358         case_action = all_caps;
2359       /* Capitalize each word, if the old text has all capitalized words.  */
2360       else if (!some_nonuppercase_initial && some_multiletter_word)
2361         case_action = cap_initial;
2362       else if (!some_nonuppercase_initial && some_uppercase)
2363         /* Should x -> yz, operating on X, give Yz or YZ?
2364            We'll assume the latter.  */
2365         case_action = all_caps;
2366       else
2367         case_action = nochange;
2368     }
2369
2370   /* Do replacement in a string.  */
2371   if (!NILP (string))
2372     {
2373       Lisp_Object before, after;
2374
2375       before = Fsubstring (string, make_number (0),
2376                            make_number (search_regs.start[sub]));
2377       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2378
2379       /* Substitute parts of the match into NEWTEXT
2380          if desired.  */
2381       if (NILP (literal))
2382         {
2383           int lastpos = 0;
2384           int lastpos_byte = 0;
2385           /* We build up the substituted string in ACCUM.  */
2386           Lisp_Object accum;
2387           Lisp_Object middle;
2388           int length = SBYTES (newtext);
2389
2390           accum = Qnil;
2391
2392           for (pos_byte = 0, pos = 0; pos_byte < length;)
2393             {
2394               int substart = -1;
2395               int subend = 0;
2396               int delbackslash = 0;
2397
2398               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2399
2400               if (c == '\\')
2401                 {
2402                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2403
2404                   if (c == '&')
2405                     {
2406                       substart = search_regs.start[sub];
2407                       subend = search_regs.end[sub];
2408                     }
2409                   else if (c >= '1' && c <= '9')
2410                     {
2411                       if (search_regs.start[c - '0'] >= 0
2412                           && c <= search_regs.num_regs + '0')
2413                         {
2414                           substart = search_regs.start[c - '0'];
2415                           subend = search_regs.end[c - '0'];
2416                         }
2417                       else
2418                         {
2419                           /* If that subexp did not match,
2420                              replace \\N with nothing.  */
2421                           substart = 0;
2422                           subend = 0;
2423                         }
2424                     }
2425                   else if (c == '\\')
2426                     delbackslash = 1;
2427                   else
2428                     error ("Invalid use of `\\' in replacement text");
2429                 }
2430               if (substart >= 0)
2431                 {
2432                   if (pos - 2 != lastpos)
2433                     middle = substring_both (newtext, lastpos,
2434                                              lastpos_byte,
2435                                              pos - 2, pos_byte - 2);
2436                   else
2437                     middle = Qnil;
2438                   accum = concat3 (accum, middle,
2439                                    Fsubstring (string,
2440                                                make_number (substart),
2441                                                make_number (subend)));
2442                   lastpos = pos;
2443                   lastpos_byte = pos_byte;
2444                 }
2445               else if (delbackslash)
2446                 {
2447                   middle = substring_both (newtext, lastpos,
2448                                            lastpos_byte,
2449                                            pos - 1, pos_byte - 1);
2450
2451                   accum = concat2 (accum, middle);
2452                   lastpos = pos;
2453                   lastpos_byte = pos_byte;
2454                 }
2455             }
2456
2457           if (pos != lastpos)
2458             middle = substring_both (newtext, lastpos,
2459                                      lastpos_byte,
2460                                      pos, pos_byte);
2461           else
2462             middle = Qnil;
2463
2464           newtext = concat2 (accum, middle);
2465         }
2466
2467       /* Do case substitution in NEWTEXT if desired.  */
2468       if (case_action == all_caps)
2469         newtext = Fupcase (newtext);
2470       else if (case_action == cap_initial)
2471         newtext = Fupcase_initials (newtext);
2472
2473       return concat3 (before, newtext, after);
2474     }
2475
2476   /* Record point, then move (quietly) to the start of the match.  */
2477   if (PT >= search_regs.end[sub])
2478     opoint = PT - ZV;
2479   else if (PT > search_regs.start[sub])
2480     opoint = search_regs.end[sub] - ZV;
2481   else
2482     opoint = PT;
2483
2484   /* If we want non-literal replacement,
2485      perform substitution on the replacement string.  */
2486   if (NILP (literal))
2487     {
2488       int length = SBYTES (newtext);
2489       unsigned char *substed;
2490       int substed_alloc_size, substed_len;
2491       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2492       int str_multibyte = STRING_MULTIBYTE (newtext);
2493       Lisp_Object rev_tbl;
2494       int really_changed = 0;
2495
2496       rev_tbl = Qnil;
2497
2498       substed_alloc_size = length * 2 + 100;
2499       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2500       substed_len = 0;
2501
2502       /* Go thru NEWTEXT, producing the actual text to insert in
2503          SUBSTED while adjusting multibyteness to that of the current
2504          buffer.  */
2505
2506       for (pos_byte = 0, pos = 0; pos_byte < length;)
2507         {
2508           unsigned char str[MAX_MULTIBYTE_LENGTH];
2509           unsigned char *add_stuff = NULL;
2510           int add_len = 0;
2511           int idx = -1;
2512
2513           if (str_multibyte)
2514             {
2515               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2516               if (!buf_multibyte)
2517                 c = multibyte_char_to_unibyte (c, rev_tbl);
2518             }
2519           else
2520             {
2521               /* Note that we don't have to increment POS.  */
2522               c = SREF (newtext, pos_byte++);
2523               if (buf_multibyte)
2524                 c = unibyte_char_to_multibyte (c);
2525             }
2526
2527           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2528              or set IDX to a match index, which means put that part
2529              of the buffer text into SUBSTED.  */
2530
2531           if (c == '\\')
2532             {
2533               really_changed = 1;
2534
2535               if (str_multibyte)
2536                 {
2537                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2538                                                       pos, pos_byte);
2539                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2540                     c = multibyte_char_to_unibyte (c, rev_tbl);
2541                 }
2542               else
2543                 {
2544                   c = SREF (newtext, pos_byte++);
2545                   if (buf_multibyte)
2546                     c = unibyte_char_to_multibyte (c);
2547                 }
2548
2549               if (c == '&')
2550                 idx = sub;
2551               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2552                 {
2553                   if (search_regs.start[c - '0'] >= 1)
2554                     idx = c - '0';
2555                 }
2556               else if (c == '\\')
2557                 add_len = 1, add_stuff = "\\";
2558               else
2559                 {
2560                   xfree (substed);
2561                   error ("Invalid use of `\\' in replacement text");
2562                 }
2563             }
2564           else
2565             {
2566               add_len = CHAR_STRING (c, str);
2567               add_stuff = str;
2568             }
2569
2570           /* If we want to copy part of a previous match,
2571              set up ADD_STUFF and ADD_LEN to point to it.  */
2572           if (idx >= 0)
2573             {
2574               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2575               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2576               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2577                 move_gap (search_regs.start[idx]);
2578               add_stuff = BYTE_POS_ADDR (begbyte);
2579             }
2580
2581           /* Now the stuff we want to add to SUBSTED
2582              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2583
2584           /* Make sure SUBSTED is big enough.  */
2585           if (substed_len + add_len >= substed_alloc_size)
2586             {
2587               substed_alloc_size = substed_len + add_len + 500;
2588               substed = (unsigned char *) xrealloc (substed,
2589                                                     substed_alloc_size + 1);
2590             }
2591
2592           /* Now add to the end of SUBSTED.  */
2593           if (add_stuff)
2594             {
2595               bcopy (add_stuff, substed + substed_len, add_len);
2596               substed_len += add_len;
2597             }
2598         }
2599
2600       if (really_changed)
2601         {
2602           if (buf_multibyte)
2603             {
2604               int nchars = multibyte_chars_in_text (substed, substed_len);
2605
2606               newtext = make_multibyte_string (substed, nchars, substed_len);
2607             }
2608           else
2609             newtext = make_unibyte_string (substed, substed_len);
2610         }
2611       xfree (substed);
2612     }
2613
2614   /* Replace the old text with the new in the cleanest possible way.  */
2615   replace_range (search_regs.start[sub], search_regs.end[sub],
2616                  newtext, 1, 0, 1);
2617   newpoint = search_regs.start[sub] + SCHARS (newtext);
2618
2619   if (case_action == all_caps)
2620     Fupcase_region (make_number (search_regs.start[sub]),
2621                     make_number (newpoint));
2622   else if (case_action == cap_initial)
2623     Fupcase_initials_region (make_number (search_regs.start[sub]),
2624                              make_number (newpoint));
2625
2626   /* Adjust search data for this change.  */
2627   {
2628     int oldend = search_regs.end[sub];
2629     int oldstart = search_regs.start[sub];
2630     int change = newpoint - search_regs.end[sub];
2631     int i;
2632
2633     for (i = 0; i < search_regs.num_regs; i++)
2634       {
2635         if (search_regs.start[i] >= oldend)
2636           search_regs.start[i] += change;
2637         else if (search_regs.start[i] > oldstart)
2638           search_regs.start[i] = oldstart;
2639         if (search_regs.end[i] >= oldend)
2640           search_regs.end[i] += change;
2641         else if (search_regs.end[i] > oldstart)
2642           search_regs.end[i] = oldstart;
2643       }
2644   }
2645
2646   /* Put point back where it was in the text.  */
2647   if (opoint <= 0)
2648     TEMP_SET_PT (opoint + ZV);
2649   else
2650     TEMP_SET_PT (opoint);
2651
2652   /* Now move point "officially" to the start of the inserted replacement.  */
2653   move_if_not_intangible (newpoint);
2654
2655   return Qnil;
2656 }
2657 \f
2658 static Lisp_Object
2659 match_limit (num, beginningp)
2660      Lisp_Object num;
2661      int beginningp;
2662 {
2663   register int n;
2664
2665   CHECK_NUMBER (num);
2666   n = XINT (num);
2667   if (n < 0)
2668     args_out_of_range (num, make_number (0));
2669   if (search_regs.num_regs <= 0)
2670     error ("No match data, because no search succeeded");
2671   if (n >= search_regs.num_regs
2672       || search_regs.start[n] < 0)
2673     return Qnil;
2674   return (make_number ((beginningp) ? search_regs.start[n]
2675                                     : search_regs.end[n]));
2676 }
2677
2678 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2679        doc: /* Return position of start of text matched by last search.
2680 SUBEXP, a number, specifies which parenthesized expression in the last
2681   regexp.
2682 Value is nil if SUBEXPth pair didn't match, or there were less than
2683   SUBEXP pairs.
2684 Zero means the entire text matched by the whole regexp or whole string.  */)
2685      (subexp)
2686      Lisp_Object subexp;
2687 {
2688   return match_limit (subexp, 1);
2689 }
2690
2691 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2692        doc: /* Return position of end of text matched by last search.
2693 SUBEXP, a number, specifies which parenthesized expression in the last
2694   regexp.
2695 Value is nil if SUBEXPth pair didn't match, or there were less than
2696   SUBEXP pairs.
2697 Zero means the entire text matched by the whole regexp or whole string.  */)
2698      (subexp)
2699      Lisp_Object subexp;
2700 {
2701   return match_limit (subexp, 0);
2702 }
2703
2704 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2705        doc: /* Return a list containing all info on what the last search matched.
2706 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2707 All the elements are markers or nil (nil if the Nth pair didn't match)
2708 if the last match was on a buffer; integers or nil if a string was matched.
2709 Use `store-match-data' to reinstate the data in this list.
2710
2711 If INTEGERS (the optional first argument) is non-nil, always use
2712 integers \(rather than markers) to represent buffer positions.  In
2713 this case, and if the last match was in a buffer, the buffer will get
2714 stored as one additional element at the end of the list.
2715
2716 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2717 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2718
2719 Return value is undefined if the last search failed.  */)
2720      (integers, reuse)
2721      Lisp_Object integers, reuse;
2722 {
2723   Lisp_Object tail, prev;
2724   Lisp_Object *data;
2725   int i, len;
2726
2727   if (NILP (last_thing_searched))
2728     return Qnil;
2729
2730   prev = Qnil;
2731
2732   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2733                                  * sizeof (Lisp_Object));
2734
2735   len = 0;
2736   for (i = 0; i < search_regs.num_regs; i++)
2737     {
2738       int start = search_regs.start[i];
2739       if (start >= 0)
2740         {
2741           if (EQ (last_thing_searched, Qt)
2742               || ! NILP (integers))
2743             {
2744               XSETFASTINT (data[2 * i], start);
2745               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2746             }
2747           else if (BUFFERP (last_thing_searched))
2748             {
2749               data[2 * i] = Fmake_marker ();
2750               Fset_marker (data[2 * i],
2751                            make_number (start),
2752                            last_thing_searched);
2753               data[2 * i + 1] = Fmake_marker ();
2754               Fset_marker (data[2 * i + 1],
2755                            make_number (search_regs.end[i]),
2756                            last_thing_searched);
2757             }
2758           else
2759             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2760             abort ();
2761
2762           len = 2*(i+1);
2763         }
2764       else
2765         data[2 * i] = data [2 * i + 1] = Qnil;
2766     }
2767
2768   if (BUFFERP (last_thing_searched) && !NILP (integers))
2769     {
2770       data[len] = last_thing_searched;
2771       len++;
2772     }
2773
2774   /* If REUSE is not usable, cons up the values and return them.  */
2775   if (! CONSP (reuse))
2776     return Flist (len, data);
2777
2778   /* If REUSE is a list, store as many value elements as will fit
2779      into the elements of REUSE.  */
2780   for (i = 0, tail = reuse; CONSP (tail);
2781        i++, tail = XCDR (tail))
2782     {
2783       if (i < len)
2784         XSETCAR (tail, data[i]);
2785       else
2786         XSETCAR (tail, Qnil);
2787       prev = tail;
2788     }
2789
2790   /* If we couldn't fit all value elements into REUSE,
2791      cons up the rest of them and add them to the end of REUSE.  */
2792   if (i < len)
2793     XSETCDR (prev, Flist (len - i, data + i));
2794
2795   return reuse;
2796 }
2797
2798
2799 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2800        doc: /* Set internal data on last search match from elements of LIST.
2801 LIST should have been created by calling `match-data' previously.  */)
2802      (list)
2803      register Lisp_Object list;
2804 {
2805   register int i;
2806   register Lisp_Object marker;
2807
2808   if (running_asynch_code)
2809     save_search_regs ();
2810
2811   if (!CONSP (list) && !NILP (list))
2812     list = wrong_type_argument (Qconsp, list);
2813
2814   /* Unless we find a marker with a buffer or an explicit buffer
2815      in LIST, assume that this match data came from a string.  */
2816   last_thing_searched = Qt;
2817
2818   /* Allocate registers if they don't already exist.  */
2819   {
2820     int length = XFASTINT (Flength (list)) / 2;
2821
2822     if (length > search_regs.num_regs)
2823       {
2824         if (search_regs.num_regs == 0)
2825           {
2826             search_regs.start
2827               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2828             search_regs.end
2829               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2830           }
2831         else
2832           {
2833             search_regs.start
2834               = (regoff_t *) xrealloc (search_regs.start,
2835                                        length * sizeof (regoff_t));
2836             search_regs.end
2837               = (regoff_t *) xrealloc (search_regs.end,
2838                                        length * sizeof (regoff_t));
2839           }
2840
2841         for (i = search_regs.num_regs; i < length; i++)
2842           search_regs.start[i] = -1;
2843
2844         search_regs.num_regs = length;
2845       }
2846
2847     for (i = 0;; i++)
2848       {
2849         marker = Fcar (list);
2850         if (BUFFERP (marker))
2851           {
2852             last_thing_searched = marker;
2853             break;
2854           }
2855         if (i >= length)
2856           break;
2857         if (NILP (marker))
2858           {
2859             search_regs.start[i] = -1;
2860             list = Fcdr (list);
2861           }
2862         else
2863           {
2864             int from;
2865
2866             if (MARKERP (marker))
2867               {
2868                 if (XMARKER (marker)->buffer == 0)
2869                   XSETFASTINT (marker, 0);
2870                 else
2871                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2872               }
2873
2874             CHECK_NUMBER_COERCE_MARKER (marker);
2875             from = XINT (marker);
2876             list = Fcdr (list);
2877
2878             marker = Fcar (list);
2879             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2880               XSETFASTINT (marker, 0);
2881
2882             CHECK_NUMBER_COERCE_MARKER (marker);
2883             search_regs.start[i] = from;
2884             search_regs.end[i] = XINT (marker);
2885           }
2886         list = Fcdr (list);
2887       }
2888
2889     for (; i < search_regs.num_regs; i++)
2890       search_regs.start[i] = -1;
2891   }
2892
2893   return Qnil;
2894 }
2895
2896 /* If non-zero the match data have been saved in saved_search_regs
2897    during the execution of a sentinel or filter. */
2898 static int search_regs_saved;
2899 static struct re_registers saved_search_regs;
2900 static Lisp_Object saved_last_thing_searched;
2901
2902 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2903    if asynchronous code (filter or sentinel) is running. */
2904 static void
2905 save_search_regs ()
2906 {
2907   if (!search_regs_saved)
2908     {
2909       saved_search_regs.num_regs = search_regs.num_regs;
2910       saved_search_regs.start = search_regs.start;
2911       saved_search_regs.end = search_regs.end;
2912       saved_last_thing_searched = last_thing_searched;
2913       last_thing_searched = Qnil;
2914       search_regs.num_regs = 0;
2915       search_regs.start = 0;
2916       search_regs.end = 0;
2917
2918       search_regs_saved = 1;
2919     }
2920 }
2921
2922 /* Called upon exit from filters and sentinels. */
2923 void
2924 restore_match_data ()
2925 {
2926   if (search_regs_saved)
2927     {
2928       if (search_regs.num_regs > 0)
2929         {
2930           xfree (search_regs.start);
2931           xfree (search_regs.end);
2932         }
2933       search_regs.num_regs = saved_search_regs.num_regs;
2934       search_regs.start = saved_search_regs.start;
2935       search_regs.end = saved_search_regs.end;
2936       last_thing_searched = saved_last_thing_searched;
2937       saved_last_thing_searched = Qnil;
2938       search_regs_saved = 0;
2939     }
2940 }
2941
2942 /* Quote a string to inactivate reg-expr chars */
2943
2944 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2945        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
2946      (string)
2947      Lisp_Object string;
2948 {
2949   register unsigned char *in, *out, *end;
2950   register unsigned char *temp;
2951   int backslashes_added = 0;
2952
2953   CHECK_STRING (string);
2954
2955   temp = (unsigned char *) alloca (SBYTES (string) * 2);
2956
2957   /* Now copy the data into the new string, inserting escapes. */
2958
2959   in = SDATA (string);
2960   end = in + SBYTES (string);
2961   out = temp;
2962
2963   for (; in != end; in++)
2964     {
2965       if (*in == '[' || *in == ']'
2966           || *in == '*' || *in == '.' || *in == '\\'
2967           || *in == '?' || *in == '+'
2968           || *in == '^' || *in == '$')
2969         *out++ = '\\', backslashes_added++;
2970       *out++ = *in;
2971     }
2972
2973   return make_specified_string (temp,
2974                                 SCHARS (string) + backslashes_added,
2975                                 out - temp,
2976                                 STRING_MULTIBYTE (string));
2977 }
2978 \f
2979 void
2980 syms_of_search ()
2981 {
2982   register int i;
2983
2984   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2985     {
2986       searchbufs[i].buf.allocated = 100;
2987       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
2988       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2989       searchbufs[i].regexp = Qnil;
2990       searchbufs[i].whitespace_regexp = Qnil;
2991       staticpro (&searchbufs[i].regexp);
2992       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2993     }
2994   searchbuf_head = &searchbufs[0];
2995
2996   Qsearch_failed = intern ("search-failed");
2997   staticpro (&Qsearch_failed);
2998   Qinvalid_regexp = intern ("invalid-regexp");
2999   staticpro (&Qinvalid_regexp);
3000
3001   Fput (Qsearch_failed, Qerror_conditions,
3002         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3003   Fput (Qsearch_failed, Qerror_message,
3004         build_string ("Search failed"));
3005
3006   Fput (Qinvalid_regexp, Qerror_conditions,
3007         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3008   Fput (Qinvalid_regexp, Qerror_message,
3009         build_string ("Invalid regexp"));
3010
3011   last_thing_searched = Qnil;
3012   staticpro (&last_thing_searched);
3013
3014   saved_last_thing_searched = Qnil;
3015   staticpro (&saved_last_thing_searched);
3016
3017   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3018       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3019 Some commands use this for user-specified regexps.
3020 Spaces that occur inside character classes or repetition operators
3021 or other such regexp constructs are not replaced with this.
3022 A value of nil (which is the normal value) means treat spaces literally.  */);
3023   Vsearch_spaces_regexp = Qnil;
3024
3025   defsubr (&Slooking_at);
3026   defsubr (&Sposix_looking_at);
3027   defsubr (&Sstring_match);
3028   defsubr (&Sposix_string_match);
3029   defsubr (&Ssearch_forward);
3030   defsubr (&Ssearch_backward);
3031   defsubr (&Sword_search_forward);
3032   defsubr (&Sword_search_backward);
3033   defsubr (&Sre_search_forward);
3034   defsubr (&Sre_search_backward);
3035   defsubr (&Sposix_search_forward);
3036   defsubr (&Sposix_search_backward);
3037   defsubr (&Sreplace_match);
3038   defsubr (&Smatch_beginning);
3039   defsubr (&Smatch_end);
3040   defsubr (&Smatch_data);
3041   defsubr (&Sset_match_data);
3042   defsubr (&Sregexp_quote);
3043 }
3044
3045 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3046    (do not change this comment) */