src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999, 2004
   3              Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp;
  45   struct re_pattern_buffer buf;
  46   char fastmap[0400];
  47   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  48   char posix;
  49 };
  50
  51 /* The instances of that struct.  */
  52 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  53
  54 /* The head of the linked list; points to the most recently used buffer.  */
  55 struct regexp_cache *searchbuf_head;
  56
  57
  58 /* Every call to re_match, etc., must pass &search_regs as the regs
  59    argument unless you can show it is unnecessary (i.e., if re_match
  60    is certainly going to be called again before region-around-match
  61    can be called).
  62
  63    Since the registers are now dynamically allocated, we need to make
  64    sure not to refer to the Nth register before checking that it has
  65    been allocated by checking search_regs.num_regs.
  66
  67    The regex code keeps track of whether it has allocated the search
  68    buffer using bits in the re_pattern_buffer.  This means that whenever
  69    you compile a new pattern, it completely forgets whether it has
  70    allocated any registers, and will allocate new registers the next
  71    time you call a searching or matching function.  Therefore, we need
  72    to call re_set_registers after compiling a new pattern or after
  73    setting the match registers, so that the regex functions will be
  74    able to free or re-allocate it properly.  */
  75 static struct re_registers search_regs;
  76
  77 /* The buffer in which the last search was performed, or
  78    Qt if the last search was done in a string;
  79    Qnil if no searching has been done yet.  */
  80 static Lisp_Object last_thing_searched;
  81
  82 /* error condition signaled when regexp compile_pattern fails */
  83
  84 Lisp_Object Qinvalid_regexp;
  85
  86 static void set_search_regs ();
  87 static void save_search_regs ();
  88 static int simple_search ();
  89 static int boyer_moore ();
  90 static int search_buffer ();
  91
  92 static void
  93 matcher_overflow ()
  94 {
  95   error ("Stack overflow in regexp matcher");
  96 }
  97
  98 /* Compile a regexp and signal a Lisp error if anything goes wrong.
  99    PATTERN is the pattern to compile.
 100    CP is the place to put the result.
 101    TRANSLATE is a translation table for ignoring case, or nil for none.
 102    REGP is the structure that says where to store the "register"
 103    values that will result from matching this pattern.
 104    If it is 0, we should compile the pattern not to record any
 105    subexpression bounds.
 106    POSIX is nonzero if we want full backtracking (POSIX style)
 107    for this pattern.  0 means backtrack only enough to get a valid match.
 108    MULTIBYTE is nonzero iff a target of match is a multibyte buffer or
 109    string.  */
 110
 111 static void
 112 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 113      struct regexp_cache *cp;
 114      Lisp_Object pattern;
 115      Lisp_Object translate;
 116      struct re_registers *regp;
 117      int posix;
 118      int multibyte;
 119 {
 120   char *val;
 121   reg_syntax_t old;
 122
 123   cp->regexp = Qnil;
 124   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 125   cp->posix = posix;
 126   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 127   cp->buf.target_multibyte = multibyte;
 128   BLOCK_INPUT;
 129   old = re_set_syntax (RE_SYNTAX_EMACS
 130                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 131   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 132                                      SBYTES (pattern), &cp->buf);
 133   re_set_syntax (old);
 134   UNBLOCK_INPUT;
 135   if (val)
 136     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 137
 138   cp->regexp = Fcopy_sequence (pattern);
 139 }
 140
 141 /* Shrink each compiled regexp buffer in the cache
 142    to the size actually used right now.
 143    This is called from garbage collection.  */
 144
 145 void
 146 shrink_regexp_cache ()
 147 {
 148   struct regexp_cache *cp;
 149
 150   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 151     {
 152       cp->buf.allocated = cp->buf.used;
 153       cp->buf.buffer
 154         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 155     }
 156 }
 157
 158 /* Compile a regexp if necessary, but first check to see if there's one in
 159    the cache.
 160    PATTERN is the pattern to compile.
 161    TRANSLATE is a translation table for ignoring case, or nil for none.
 162    REGP is the structure that says where to store the "register"
 163    values that will result from matching this pattern.
 164    If it is 0, we should compile the pattern not to record any
 165    subexpression bounds.
 166    POSIX is nonzero if we want full backtracking (POSIX style)
 167    for this pattern.  0 means backtrack only enough to get a valid match.  */
 168
 169 struct re_pattern_buffer *
 170 compile_pattern (pattern, regp, translate, posix, multibyte)
 171      Lisp_Object pattern;
 172      struct re_registers *regp;
 173      Lisp_Object translate;
 174      int posix, multibyte;
 175 {
 176   struct regexp_cache *cp, **cpp;
 177
 178   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 179     {
 180       cp = *cpp;
 181       /* Entries are initialized to nil, and may be set to nil by
 182          compile_pattern_1 if the pattern isn't valid.  Don't apply
 183          string accessors in those cases.  However, compile_pattern_1
 184          is only applied to the cache entry we pick here to reuse.  So
 185          nil should never appear before a non-nil entry.  */
 186       if (NILP (cp->regexp))
 187         goto compile_it;
 188       if (SCHARS (cp->regexp) == SCHARS (pattern)
 189           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 190           && !NILP (Fstring_equal (cp->regexp, pattern))
 191           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 192           && cp->posix == posix
 193           && cp->buf.target_multibyte == multibyte)
 194         break;
 195
 196       /* If we're at the end of the cache, compile into the nil cell
 197          we found, or the last (least recently used) cell with a
 198          string value.  */
 199       if (cp->next == 0)
 200         {
 201         compile_it:
 202           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 203           break;
 204         }
 205     }
 206
 207   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 208      either because we found it in the cache or because we just compiled it.
 209      Move it to the front of the queue to mark it as most recently used.  */
 210   *cpp = cp->next;
 211   cp->next = searchbuf_head;
 212   searchbuf_head = cp;
 213
 214   /* Advise the searching functions about the space we have allocated
 215      for register data.  */
 216   if (regp)
 217     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 218
 219   return &cp->buf;
 220 }
 221
 222 /* Error condition used for failing searches */
 223 Lisp_Object Qsearch_failed;
 224
 225 Lisp_Object
 226 signal_failure (arg)
 227      Lisp_Object arg;
 228 {
 229   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 230   return Qnil;
 231 }
 232 \f
 233 static Lisp_Object
 234 looking_at_1 (string, posix)
 235      Lisp_Object string;
 236      int posix;
 237 {
 238   Lisp_Object val;
 239   unsigned char *p1, *p2;
 240   int s1, s2;
 241   register int i;
 242   struct re_pattern_buffer *bufp;
 243
 244   if (running_asynch_code)
 245     save_search_regs ();
 246
 247   CHECK_STRING (string);
 248   bufp = compile_pattern (string, &search_regs,
 249                           (!NILP (current_buffer->case_fold_search)
 250                            ? DOWNCASE_TABLE : Qnil),
 251                           posix,
 252                           !NILP (current_buffer->enable_multibyte_characters));
 253
 254   immediate_quit = 1;
 255   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 256
 257   /* Get pointers and sizes of the two strings
 258      that make up the visible portion of the buffer. */
 259
 260   p1 = BEGV_ADDR;
 261   s1 = GPT_BYTE - BEGV_BYTE;
 262   p2 = GAP_END_ADDR;
 263   s2 = ZV_BYTE - GPT_BYTE;
 264   if (s1 < 0)
 265     {
 266       p2 = p1;
 267       s2 = ZV_BYTE - BEGV_BYTE;
 268       s1 = 0;
 269     }
 270   if (s2 < 0)
 271     {
 272       s1 = ZV_BYTE - BEGV_BYTE;
 273       s2 = 0;
 274     }
 275
 276   re_match_object = Qnil;
 277
 278   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 279                   PT_BYTE - BEGV_BYTE, &search_regs,
 280                   ZV_BYTE - BEGV_BYTE);
 281   immediate_quit = 0;
 282
 283   if (i == -2)
 284     matcher_overflow ();
 285
 286   val = (0 <= i ? Qt : Qnil);
 287   if (i >= 0)
 288     for (i = 0; i < search_regs.num_regs; i++)
 289       if (search_regs.start[i] >= 0)
 290         {
 291           search_regs.start[i]
 292             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 293           search_regs.end[i]
 294             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 295         }
 296   XSETBUFFER (last_thing_searched, current_buffer);
 297   return val;
 298 }
 299
 300 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 301        doc: /* Return t if text after point matches regular expression REGEXP.
 302 This function modifies the match data that `match-beginning',
 303 `match-end' and `match-data' access; save and restore the match
 304 data if you want to preserve them.  */)
 305      (regexp)
 306      Lisp_Object regexp;
 307 {
 308   return looking_at_1 (regexp, 0);
 309 }
 310
 311 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 312        doc: /* Return t if text after point matches regular expression REGEXP.
 313 Find the longest match, in accord with Posix regular expression rules.
 314 This function modifies the match data that `match-beginning',
 315 `match-end' and `match-data' access; save and restore the match
 316 data if you want to preserve them.  */)
 317      (regexp)
 318      Lisp_Object regexp;
 319 {
 320   return looking_at_1 (regexp, 1);
 321 }
 322 \f
 323 static Lisp_Object
 324 string_match_1 (regexp, string, start, posix)
 325      Lisp_Object regexp, string, start;
 326      int posix;
 327 {
 328   int val;
 329   struct re_pattern_buffer *bufp;
 330   int pos, pos_byte;
 331   int i;
 332
 333   if (running_asynch_code)
 334     save_search_regs ();
 335
 336   CHECK_STRING (regexp);
 337   CHECK_STRING (string);
 338
 339   if (NILP (start))
 340     pos = 0, pos_byte = 0;
 341   else
 342     {
 343       int len = SCHARS (string);
 344
 345       CHECK_NUMBER (start);
 346       pos = XINT (start);
 347       if (pos < 0 && -pos <= len)
 348         pos = len + pos;
 349       else if (0 > pos || pos > len)
 350         args_out_of_range (string, start);
 351       pos_byte = string_char_to_byte (string, pos);
 352     }
 353
 354   bufp = compile_pattern (regexp, &search_regs,
 355                           (!NILP (current_buffer->case_fold_search)
 356                            ? DOWNCASE_TABLE : Qnil),
 357                           posix,
 358                           STRING_MULTIBYTE (string));
 359   immediate_quit = 1;
 360   re_match_object = string;
 361
 362   val = re_search (bufp, (char *) SDATA (string),
 363                    SBYTES (string), pos_byte,
 364                    SBYTES (string) - pos_byte,
 365                    &search_regs);
 366   immediate_quit = 0;
 367   last_thing_searched = Qt;
 368   if (val == -2)
 369     matcher_overflow ();
 370   if (val < 0) return Qnil;
 371
 372   for (i = 0; i < search_regs.num_regs; i++)
 373     if (search_regs.start[i] >= 0)
 374       {
 375         search_regs.start[i]
 376           = string_byte_to_char (string, search_regs.start[i]);
 377         search_regs.end[i]
 378           = string_byte_to_char (string, search_regs.end[i]);
 379       }
 380
 381   return make_number (string_byte_to_char (string, val));
 382 }
 383
 384 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 385        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 386 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 387 If third arg START is non-nil, start search at that index in STRING.
 388 For index of first char beyond the match, do (match-end 0).
 389 `match-end' and `match-beginning' also give indices of substrings
 390 matched by parenthesis constructs in the pattern.
 391
 392 You can use the function `match-string' to extract the substrings
 393 matched by the parenthesis constructions in REGEXP. */)
 394      (regexp, string, start)
 395      Lisp_Object regexp, string, start;
 396 {
 397   return string_match_1 (regexp, string, start, 0);
 398 }
 399
 400 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 401        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 402 Find the longest match, in accord with Posix regular expression rules.
 403 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 404 If third arg START is non-nil, start search at that index in STRING.
 405 For index of first char beyond the match, do (match-end 0).
 406 `match-end' and `match-beginning' also give indices of substrings
 407 matched by parenthesis constructs in the pattern.  */)
 408      (regexp, string, start)
 409      Lisp_Object regexp, string, start;
 410 {
 411   return string_match_1 (regexp, string, start, 1);
 412 }
 413
 414 /* Match REGEXP against STRING, searching all of STRING,
 415    and return the index of the match, or negative on failure.
 416    This does not clobber the match data.  */
 417
 418 int
 419 fast_string_match (regexp, string)
 420      Lisp_Object regexp, string;
 421 {
 422   int val;
 423   struct re_pattern_buffer *bufp;
 424
 425   bufp = compile_pattern (regexp, 0, Qnil,
 426                           0, STRING_MULTIBYTE (string));
 427   immediate_quit = 1;
 428   re_match_object = string;
 429
 430   val = re_search (bufp, (char *) SDATA (string),
 431                    SBYTES (string), 0,
 432                    SBYTES (string), 0);
 433   immediate_quit = 0;
 434   return val;
 435 }
 436
 437 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 438    and return the index of the match, or negative on failure.
 439    This does not clobber the match data.
 440    We assume that STRING contains single-byte characters.  */
 441
 442 extern Lisp_Object Vascii_downcase_table;
 443
 444 int
 445 fast_c_string_match_ignore_case (regexp, string)
 446      Lisp_Object regexp;
 447      const char *string;
 448 {
 449   int val;
 450   struct re_pattern_buffer *bufp;
 451   int len = strlen (string);
 452
 453   regexp = string_make_unibyte (regexp);
 454   re_match_object = Qt;
 455   bufp = compile_pattern (regexp, 0,
 456                           Vascii_downcase_table, 0,
 457                           0);
 458   immediate_quit = 1;
 459   val = re_search (bufp, string, len, 0, len, 0);
 460   immediate_quit = 0;
 461   return val;
 462 }
 463 \f
 464 /* The newline cache: remembering which sections of text have no newlines.  */
 465
 466 /* If the user has requested newline caching, make sure it's on.
 467    Otherwise, make sure it's off.
 468    This is our cheezy way of associating an action with the change of
 469    state of a buffer-local variable.  */
 470 static void
 471 newline_cache_on_off (buf)
 472      struct buffer *buf;
 473 {
 474   if (NILP (buf->cache_long_line_scans))
 475     {
 476       /* It should be off.  */
 477       if (buf->newline_cache)
 478         {
 479           free_region_cache (buf->newline_cache);
 480           buf->newline_cache = 0;
 481         }
 482     }
 483   else
 484     {
 485       /* It should be on.  */
 486       if (buf->newline_cache == 0)
 487         buf->newline_cache = new_region_cache ();
 488     }
 489 }
 490
 491 \f
 492 /* Search for COUNT instances of the character TARGET between START and END.
 493
 494    If COUNT is positive, search forwards; END must be >= START.
 495    If COUNT is negative, search backwards for the -COUNTth instance;
 496       END must be <= START.
 497    If COUNT is zero, do anything you please; run rogue, for all I care.
 498
 499    If END is zero, use BEGV or ZV instead, as appropriate for the
 500    direction indicated by COUNT.
 501
 502    If we find COUNT instances, set *SHORTAGE to zero, and return the
 503    position after the COUNTth match.  Note that for reverse motion
 504    this is not the same as the usual convention for Emacs motion commands.
 505
 506    If we don't find COUNT instances before reaching END, set *SHORTAGE
 507    to the number of TARGETs left unfound, and return END.
 508
 509    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 510    except when inside redisplay.  */
 511
 512 int
 513 scan_buffer (target, start, end, count, shortage, allow_quit)
 514      register int target;
 515      int start, end;
 516      int count;
 517      int *shortage;
 518      int allow_quit;
 519 {
 520   struct region_cache *newline_cache;
 521   int direction;
 522
 523   if (count > 0)
 524     {
 525       direction = 1;
 526       if (! end) end = ZV;
 527     }
 528   else
 529     {
 530       direction = -1;
 531       if (! end) end = BEGV;
 532     }
 533
 534   newline_cache_on_off (current_buffer);
 535   newline_cache = current_buffer->newline_cache;
 536
 537   if (shortage != 0)
 538     *shortage = 0;
 539
 540   immediate_quit = allow_quit;
 541
 542   if (count > 0)
 543     while (start != end)
 544       {
 545         /* Our innermost scanning loop is very simple; it doesn't know
 546            about gaps, buffer ends, or the newline cache.  ceiling is
 547            the position of the last character before the next such
 548            obstacle --- the last character the dumb search loop should
 549            examine.  */
 550         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 551         int start_byte = CHAR_TO_BYTE (start);
 552         int tem;
 553
 554         /* If we're looking for a newline, consult the newline cache
 555            to see where we can avoid some scanning.  */
 556         if (target == '\n' && newline_cache)
 557           {
 558             int next_change;
 559             immediate_quit = 0;
 560             while (region_cache_forward
 561                    (current_buffer, newline_cache, start_byte, &next_change))
 562               start_byte = next_change;
 563             immediate_quit = allow_quit;
 564
 565             /* START should never be after END.  */
 566             if (start_byte > ceiling_byte)
 567               start_byte = ceiling_byte;
 568
 569             /* Now the text after start is an unknown region, and
 570                next_change is the position of the next known region. */
 571             ceiling_byte = min (next_change - 1, ceiling_byte);
 572           }
 573
 574         /* The dumb loop can only scan text stored in contiguous
 575            bytes. BUFFER_CEILING_OF returns the last character
 576            position that is contiguous, so the ceiling is the
 577            position after that.  */
 578         tem = BUFFER_CEILING_OF (start_byte);
 579         ceiling_byte = min (tem, ceiling_byte);
 580
 581         {
 582           /* The termination address of the dumb loop.  */
 583           register unsigned char *ceiling_addr
 584             = BYTE_POS_ADDR (ceiling_byte) + 1;
 585           register unsigned char *cursor
 586             = BYTE_POS_ADDR (start_byte);
 587           unsigned char *base = cursor;
 588
 589           while (cursor < ceiling_addr)
 590             {
 591               unsigned char *scan_start = cursor;
 592
 593               /* The dumb loop.  */
 594               while (*cursor != target && ++cursor < ceiling_addr)
 595                 ;
 596
 597               /* If we're looking for newlines, cache the fact that
 598                  the region from start to cursor is free of them. */
 599               if (target == '\n' && newline_cache)
 600                 know_region_cache (current_buffer, newline_cache,
 601                                    start_byte + scan_start - base,
 602                                    start_byte + cursor - base);
 603
 604               /* Did we find the target character?  */
 605               if (cursor < ceiling_addr)
 606                 {
 607                   if (--count == 0)
 608                     {
 609                       immediate_quit = 0;
 610                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 611                     }
 612                   cursor++;
 613                 }
 614             }
 615
 616           start = BYTE_TO_CHAR (start_byte + cursor - base);
 617         }
 618       }
 619   else
 620     while (start > end)
 621       {
 622         /* The last character to check before the next obstacle.  */
 623         int ceiling_byte = CHAR_TO_BYTE (end);
 624         int start_byte = CHAR_TO_BYTE (start);
 625         int tem;
 626
 627         /* Consult the newline cache, if appropriate.  */
 628         if (target == '\n' && newline_cache)
 629           {
 630             int next_change;
 631             immediate_quit = 0;
 632             while (region_cache_backward
 633                    (current_buffer, newline_cache, start_byte, &next_change))
 634               start_byte = next_change;
 635             immediate_quit = allow_quit;
 636
 637             /* Start should never be at or before end.  */
 638             if (start_byte <= ceiling_byte)
 639               start_byte = ceiling_byte + 1;
 640
 641             /* Now the text before start is an unknown region, and
 642                next_change is the position of the next known region. */
 643             ceiling_byte = max (next_change, ceiling_byte);
 644           }
 645
 646         /* Stop scanning before the gap.  */
 647         tem = BUFFER_FLOOR_OF (start_byte - 1);
 648         ceiling_byte = max (tem, ceiling_byte);
 649
 650         {
 651           /* The termination address of the dumb loop.  */
 652           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 653           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 654           unsigned char *base = cursor;
 655
 656           while (cursor >= ceiling_addr)
 657             {
 658               unsigned char *scan_start = cursor;
 659
 660               while (*cursor != target && --cursor >= ceiling_addr)
 661                 ;
 662
 663               /* If we're looking for newlines, cache the fact that
 664                  the region from after the cursor to start is free of them.  */
 665               if (target == '\n' && newline_cache)
 666                 know_region_cache (current_buffer, newline_cache,
 667                                    start_byte + cursor - base,
 668                                    start_byte + scan_start - base);
 669
 670               /* Did we find the target character?  */
 671               if (cursor >= ceiling_addr)
 672                 {
 673                   if (++count >= 0)
 674                     {
 675                       immediate_quit = 0;
 676                       return BYTE_TO_CHAR (start_byte + cursor - base);
 677                     }
 678                   cursor--;
 679                 }
 680             }
 681
 682           start = BYTE_TO_CHAR (start_byte + cursor - base);
 683         }
 684       }
 685
 686   immediate_quit = 0;
 687   if (shortage != 0)
 688     *shortage = count * direction;
 689   return start;
 690 }
 691 \f
 692 /* Search for COUNT instances of a line boundary, which means either a
 693    newline or (if selective display enabled) a carriage return.
 694    Start at START.  If COUNT is negative, search backwards.
 695
 696    We report the resulting position by calling TEMP_SET_PT_BOTH.
 697
 698    If we find COUNT instances. we position after (always after,
 699    even if scanning backwards) the COUNTth match, and return 0.
 700
 701    If we don't find COUNT instances before reaching the end of the
 702    buffer (or the beginning, if scanning backwards), we return
 703    the number of line boundaries left unfound, and position at
 704    the limit we bumped up against.
 705
 706    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 707    except in special cases.  */
 708
 709 int
 710 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 711      int start, start_byte;
 712      int limit, limit_byte;
 713      register int count;
 714      int allow_quit;
 715 {
 716   int direction = ((count > 0) ? 1 : -1);
 717
 718   register unsigned char *cursor;
 719   unsigned char *base;
 720
 721   register int ceiling;
 722   register unsigned char *ceiling_addr;
 723
 724   int old_immediate_quit = immediate_quit;
 725
 726   /* The code that follows is like scan_buffer
 727      but checks for either newline or carriage return.  */
 728
 729   if (allow_quit)
 730     immediate_quit++;
 731
 732   start_byte = CHAR_TO_BYTE (start);
 733
 734   if (count > 0)
 735     {
 736       while (start_byte < limit_byte)
 737         {
 738           ceiling =  BUFFER_CEILING_OF (start_byte);
 739           ceiling = min (limit_byte - 1, ceiling);
 740           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 741           base = (cursor = BYTE_POS_ADDR (start_byte));
 742           while (1)
 743             {
 744               while (*cursor != '\n' && ++cursor != ceiling_addr)
 745                 ;
 746
 747               if (cursor != ceiling_addr)
 748                 {
 749                   if (--count == 0)
 750                     {
 751                       immediate_quit = old_immediate_quit;
 752                       start_byte = start_byte + cursor - base + 1;
 753                       start = BYTE_TO_CHAR (start_byte);
 754                       TEMP_SET_PT_BOTH (start, start_byte);
 755                       return 0;
 756                     }
 757                   else
 758                     if (++cursor == ceiling_addr)
 759                       break;
 760                 }
 761               else
 762                 break;
 763             }
 764           start_byte += cursor - base;
 765         }
 766     }
 767   else
 768     {
 769       while (start_byte > limit_byte)
 770         {
 771           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 772           ceiling = max (limit_byte, ceiling);
 773           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 774           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 775           while (1)
 776             {
 777               while (--cursor != ceiling_addr && *cursor != '\n')
 778                 ;
 779
 780               if (cursor != ceiling_addr)
 781                 {
 782                   if (++count == 0)
 783                     {
 784                       immediate_quit = old_immediate_quit;
 785                       /* Return the position AFTER the match we found.  */
 786                       start_byte = start_byte + cursor - base + 1;
 787                       start = BYTE_TO_CHAR (start_byte);
 788                       TEMP_SET_PT_BOTH (start, start_byte);
 789                       return 0;
 790                     }
 791                 }
 792               else
 793                 break;
 794             }
 795           /* Here we add 1 to compensate for the last decrement
 796              of CURSOR, which took it past the valid range.  */
 797           start_byte += cursor - base + 1;
 798         }
 799     }
 800
 801   TEMP_SET_PT_BOTH (limit, limit_byte);
 802   immediate_quit = old_immediate_quit;
 803
 804   return count * direction;
 805 }
 806
 807 int
 808 find_next_newline_no_quit (from, cnt)
 809      register int from, cnt;
 810 {
 811   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 812 }
 813
 814 /* Like find_next_newline, but returns position before the newline,
 815    not after, and only search up to TO.  This isn't just
 816    find_next_newline (...)-1, because you might hit TO.  */
 817
 818 int
 819 find_before_next_newline (from, to, cnt)
 820      int from, to, cnt;
 821 {
 822   int shortage;
 823   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 824
 825   if (shortage == 0)
 826     pos--;
 827
 828   return pos;
 829 }
 830 \f
 831 /* Subroutines of Lisp buffer search functions. */
 832
 833 static Lisp_Object
 834 search_command (string, bound, noerror, count, direction, RE, posix)
 835      Lisp_Object string, bound, noerror, count;
 836      int direction;
 837      int RE;
 838      int posix;
 839 {
 840   register int np;
 841   int lim, lim_byte;
 842   int n = direction;
 843
 844   if (!NILP (count))
 845     {
 846       CHECK_NUMBER (count);
 847       n *= XINT (count);
 848     }
 849
 850   CHECK_STRING (string);
 851   if (NILP (bound))
 852     {
 853       if (n > 0)
 854         lim = ZV, lim_byte = ZV_BYTE;
 855       else
 856         lim = BEGV, lim_byte = BEGV_BYTE;
 857     }
 858   else
 859     {
 860       CHECK_NUMBER_COERCE_MARKER (bound);
 861       lim = XINT (bound);
 862       if (n > 0 ? lim < PT : lim > PT)
 863         error ("Invalid search bound (wrong side of point)");
 864       if (lim > ZV)
 865         lim = ZV, lim_byte = ZV_BYTE;
 866       else if (lim < BEGV)
 867         lim = BEGV, lim_byte = BEGV_BYTE;
 868       else
 869         lim_byte = CHAR_TO_BYTE (lim);
 870     }
 871
 872   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 873                       (!NILP (current_buffer->case_fold_search)
 874                        ? current_buffer->case_canon_table
 875                        : Qnil),
 876                       (!NILP (current_buffer->case_fold_search)
 877                        ? current_buffer->case_eqv_table
 878                        : Qnil),
 879                       posix);
 880   if (np <= 0)
 881     {
 882       if (NILP (noerror))
 883         return signal_failure (string);
 884       if (!EQ (noerror, Qt))
 885         {
 886           if (lim < BEGV || lim > ZV)
 887             abort ();
 888           SET_PT_BOTH (lim, lim_byte);
 889           return Qnil;
 890 #if 0 /* This would be clean, but maybe programs depend on
 891          a value of nil here.  */
 892           np = lim;
 893 #endif
 894         }
 895       else
 896         return Qnil;
 897     }
 898
 899   if (np < BEGV || np > ZV)
 900     abort ();
 901
 902   SET_PT (np);
 903
 904   return make_number (np);
 905 }
 906 \f
 907 /* Return 1 if REGEXP it matches just one constant string.  */
 908
 909 static int
 910 trivial_regexp_p (regexp)
 911      Lisp_Object regexp;
 912 {
 913   int len = SBYTES (regexp);
 914   unsigned char *s = SDATA (regexp);
 915   while (--len >= 0)
 916     {
 917       switch (*s++)
 918         {
 919         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 920           return 0;
 921         case '\\':
 922           if (--len < 0)
 923             return 0;
 924           switch (*s++)
 925             {
 926             case '|': case '(': case ')': case '`': case '\'': case 'b':
 927             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 928             case 'S': case '=': case '{': case '}': case '_':
 929             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 930             case '1': case '2': case '3': case '4': case '5':
 931             case '6': case '7': case '8': case '9':
 932               return 0;
 933             }
 934         }
 935     }
 936   return 1;
 937 }
 938
 939 /* Search for the n'th occurrence of STRING in the current buffer,
 940    starting at position POS and stopping at position LIM,
 941    treating STRING as a literal string if RE is false or as
 942    a regular expression if RE is true.
 943
 944    If N is positive, searching is forward and LIM must be greater than POS.
 945    If N is negative, searching is backward and LIM must be less than POS.
 946
 947    Returns -x if x occurrences remain to be found (x > 0),
 948    or else the position at the beginning of the Nth occurrence
 949    (if searching backward) or the end (if searching forward).
 950
 951    POSIX is nonzero if we want full backtracking (POSIX style)
 952    for this pattern.  0 means backtrack only enough to get a valid match.  */
 953
 954 #define TRANSLATE(out, trt, d)                  \
 955 do                                              \
 956   {                                             \
 957     if (! NILP (trt))                           \
 958       {                                         \
 959         Lisp_Object temp;                       \
 960         temp = Faref (trt, make_number (d));    \
 961         if (INTEGERP (temp))                    \
 962           out = XINT (temp);                    \
 963         else                                    \
 964           out = d;                              \
 965       }                                         \
 966     else                                        \
 967       out = d;                                  \
 968   }                                             \
 969 while (0)
 970
 971 static int
 972 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
 973                RE, trt, inverse_trt, posix)
 974      Lisp_Object string;
 975      int pos;
 976      int pos_byte;
 977      int lim;
 978      int lim_byte;
 979      int n;
 980      int RE;
 981      Lisp_Object trt;
 982      Lisp_Object inverse_trt;
 983      int posix;
 984 {
 985   int len = SCHARS (string);
 986   int len_byte = SBYTES (string);
 987   register int i;
 988
 989   if (running_asynch_code)
 990     save_search_regs ();
 991
 992   /* Searching 0 times means don't move.  */
 993   /* Null string is found at starting position.  */
 994   if (len == 0 || n == 0)
 995     {
 996       set_search_regs (pos_byte, 0);
 997       return pos;
 998     }
 999
1000   if (RE && !trivial_regexp_p (string))
1001     {
1002       unsigned char *p1, *p2;
1003       int s1, s2;
1004       struct re_pattern_buffer *bufp;
1005
1006       bufp = compile_pattern (string, &search_regs, trt, posix,
1007                               !NILP (current_buffer->enable_multibyte_characters));
1008
1009       immediate_quit = 1;       /* Quit immediately if user types ^G,
1010                                    because letting this function finish
1011                                    can take too long. */
1012       QUIT;                     /* Do a pending quit right away,
1013                                    to avoid paradoxical behavior */
1014       /* Get pointers and sizes of the two strings
1015          that make up the visible portion of the buffer. */
1016
1017       p1 = BEGV_ADDR;
1018       s1 = GPT_BYTE - BEGV_BYTE;
1019       p2 = GAP_END_ADDR;
1020       s2 = ZV_BYTE - GPT_BYTE;
1021       if (s1 < 0)
1022         {
1023           p2 = p1;
1024           s2 = ZV_BYTE - BEGV_BYTE;
1025           s1 = 0;
1026         }
1027       if (s2 < 0)
1028         {
1029           s1 = ZV_BYTE - BEGV_BYTE;
1030           s2 = 0;
1031         }
1032       re_match_object = Qnil;
1033
1034       while (n < 0)
1035         {
1036           int val;
1037           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1038                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1039                              &search_regs,
1040                              /* Don't allow match past current point */
1041                              pos_byte - BEGV_BYTE);
1042           if (val == -2)
1043             {
1044               matcher_overflow ();
1045             }
1046           if (val >= 0)
1047             {
1048               pos_byte = search_regs.start[0] + BEGV_BYTE;
1049               for (i = 0; i < search_regs.num_regs; i++)
1050                 if (search_regs.start[i] >= 0)
1051                   {
1052                     search_regs.start[i]
1053                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1054                     search_regs.end[i]
1055                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1056                   }
1057               XSETBUFFER (last_thing_searched, current_buffer);
1058               /* Set pos to the new position. */
1059               pos = search_regs.start[0];
1060             }
1061           else
1062             {
1063               immediate_quit = 0;
1064               return (n);
1065             }
1066           n++;
1067         }
1068       while (n > 0)
1069         {
1070           int val;
1071           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1072                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1073                              &search_regs,
1074                              lim_byte - BEGV_BYTE);
1075           if (val == -2)
1076             {
1077               matcher_overflow ();
1078             }
1079           if (val >= 0)
1080             {
1081               pos_byte = search_regs.end[0] + BEGV_BYTE;
1082               for (i = 0; i < search_regs.num_regs; i++)
1083                 if (search_regs.start[i] >= 0)
1084                   {
1085                     search_regs.start[i]
1086                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1087                     search_regs.end[i]
1088                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1089                   }
1090               XSETBUFFER (last_thing_searched, current_buffer);
1091               pos = search_regs.end[0];
1092             }
1093           else
1094             {
1095               immediate_quit = 0;
1096               return (0 - n);
1097             }
1098           n--;
1099         }
1100       immediate_quit = 0;
1101       return (pos);
1102     }
1103   else                          /* non-RE case */
1104     {
1105       unsigned char *raw_pattern, *pat;
1106       int raw_pattern_size;
1107       int raw_pattern_size_byte;
1108       unsigned char *patbuf;
1109       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1110       unsigned char *base_pat = SDATA (string);
1111       /* High bits of char; 0 for ASCII characters, (CHAR & ~0x3F)
1112          otherwise.  Characters of the same high bits have the same
1113          sequence of bytes but last.  To do the BM search, all
1114          characters in STRING must have the same high bits (including
1115          their case translations).  */
1116       int char_high_bits = -1;
1117       int boyer_moore_ok = 1;
1118
1119       /* MULTIBYTE says whether the text to be searched is multibyte.
1120          We must convert PATTERN to match that, or we will not really
1121          find things right.  */
1122
1123       if (multibyte == STRING_MULTIBYTE (string))
1124         {
1125           raw_pattern = (unsigned char *) SDATA (string);
1126           raw_pattern_size = SCHARS (string);
1127           raw_pattern_size_byte = SBYTES (string);
1128         }
1129       else if (multibyte)
1130         {
1131           raw_pattern_size = SCHARS (string);
1132           raw_pattern_size_byte
1133             = count_size_as_multibyte (SDATA (string),
1134                                        raw_pattern_size);
1135           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1136           copy_text (SDATA (string), raw_pattern,
1137                      SCHARS (string), 0, 1);
1138         }
1139       else
1140         {
1141           /* Converting multibyte to single-byte.
1142
1143              ??? Perhaps this conversion should be done in a special way
1144              by subtracting nonascii-insert-offset from each non-ASCII char,
1145              so that only the multibyte chars which really correspond to
1146              the chosen single-byte character set can possibly match.  */
1147           raw_pattern_size = SCHARS (string);
1148           raw_pattern_size_byte = SCHARS (string);
1149           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1150           copy_text (SDATA (string), raw_pattern,
1151                      SBYTES (string), 1, 0);
1152         }
1153
1154       /* Copy and optionally translate the pattern.  */
1155       len = raw_pattern_size;
1156       len_byte = raw_pattern_size_byte;
1157       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1158       pat = patbuf;
1159       base_pat = raw_pattern;
1160       if (multibyte)
1161         {
1162           while (--len >= 0)
1163             {
1164               int c, translated, inverse;
1165               int in_charlen;
1166
1167               /* If we got here and the RE flag is set, it's because we're
1168                  dealing with a regexp known to be trivial, so the backslash
1169                  just quotes the next character.  */
1170               if (RE && *base_pat == '\\')
1171                 {
1172                   len--;
1173                   len_byte--;
1174                   base_pat++;
1175                 }
1176
1177               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1178
1179               /* Translate the character, if requested.  */
1180               TRANSLATE (translated, trt, c);
1181               TRANSLATE (inverse, inverse_trt, c);
1182
1183               /* Did this char actually get translated?
1184                  Would any other char get translated into it?  */
1185               if (translated != c || inverse != c)
1186                 {
1187                   /* Keep track of which character set row
1188                      contains the characters that need translation.  */
1189                   int this_high_bit = ASCII_CHAR_P (c) ? 0 : (c & ~0x3F);
1190                   int c1 = inverse != c ? inverse : translated;
1191                   int trt_high_bit = ASCII_CHAR_P (c1) ? 0 : (c1 & ~0x3F);
1192
1193                   if (this_high_bit != trt_high_bit)
1194                     boyer_moore_ok = 0;
1195                   else if (char_high_bits == -1)
1196                     char_high_bits = this_high_bit;
1197                   else if (char_high_bits != this_high_bit)
1198                     /* If two different rows appear, needing translation,
1199                        then we cannot use boyer_moore search.  */
1200                     boyer_moore_ok = 0;
1201                 }
1202
1203               /* Store this character into the translated pattern.  */
1204               CHAR_STRING_ADVANCE (translated, pat);
1205               base_pat += in_charlen;
1206               len_byte -= in_charlen;
1207             }
1208         }
1209       else
1210         {
1211           /* Unibyte buffer.  */
1212           char_high_bits = 0;
1213           while (--len >= 0)
1214             {
1215               int c, translated;
1216
1217               /* If we got here and the RE flag is set, it's because we're
1218                  dealing with a regexp known to be trivial, so the backslash
1219                  just quotes the next character.  */
1220               if (RE && *base_pat == '\\')
1221                 {
1222                   len--;
1223                   base_pat++;
1224                 }
1225               c = *base_pat++;
1226               TRANSLATE (translated, trt, c);
1227               *pat++ = translated;
1228             }
1229         }
1230
1231       len_byte = pat - patbuf;
1232       len = raw_pattern_size;
1233       pat = base_pat = patbuf;
1234
1235       if (boyer_moore_ok)
1236         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1237                             pos, pos_byte, lim, lim_byte,
1238                             char_high_bits);
1239       else
1240         return simple_search (n, pat, len, len_byte, trt,
1241                               pos, pos_byte, lim, lim_byte);
1242     }
1243 }
1244 \f
1245 /* Do a simple string search N times for the string PAT,
1246    whose length is LEN/LEN_BYTE,
1247    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1248    TRT is the translation table.
1249
1250    Return the character position where the match is found.
1251    Otherwise, if M matches remained to be found, return -M.
1252
1253    This kind of search works regardless of what is in PAT and
1254    regardless of what is in TRT.  It is used in cases where
1255    boyer_moore cannot work.  */
1256
1257 static int
1258 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1259      int n;
1260      unsigned char *pat;
1261      int len, len_byte;
1262      Lisp_Object trt;
1263      int pos, pos_byte;
1264      int lim, lim_byte;
1265 {
1266   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1267   int forward = n > 0;
1268
1269   if (lim > pos && multibyte)
1270     while (n > 0)
1271       {
1272         while (1)
1273           {
1274             /* Try matching at position POS.  */
1275             int this_pos = pos;
1276             int this_pos_byte = pos_byte;
1277             int this_len = len;
1278             int this_len_byte = len_byte;
1279             unsigned char *p = pat;
1280             if (pos + len > lim)
1281               goto stop;
1282
1283             while (this_len > 0)
1284               {
1285                 int charlen, buf_charlen;
1286                 int pat_ch, buf_ch;
1287
1288                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1289                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1290                                                  ZV_BYTE - this_pos_byte,
1291                                                  buf_charlen);
1292                 TRANSLATE (buf_ch, trt, buf_ch);
1293
1294                 if (buf_ch != pat_ch)
1295                   break;
1296
1297                 this_len_byte -= charlen;
1298                 this_len--;
1299                 p += charlen;
1300
1301                 this_pos_byte += buf_charlen;
1302                 this_pos++;
1303               }
1304
1305             if (this_len == 0)
1306               {
1307                 pos += len;
1308                 pos_byte += len_byte;
1309                 break;
1310               }
1311
1312             INC_BOTH (pos, pos_byte);
1313           }
1314
1315         n--;
1316       }
1317   else if (lim > pos)
1318     while (n > 0)
1319       {
1320         while (1)
1321           {
1322             /* Try matching at position POS.  */
1323             int this_pos = pos;
1324             int this_len = len;
1325             unsigned char *p = pat;
1326
1327             if (pos + len > lim)
1328               goto stop;
1329
1330             while (this_len > 0)
1331               {
1332                 int pat_ch = *p++;
1333                 int buf_ch = FETCH_BYTE (this_pos);
1334                 TRANSLATE (buf_ch, trt, buf_ch);
1335
1336                 if (buf_ch != pat_ch)
1337                   break;
1338
1339                 this_len--;
1340                 this_pos++;
1341               }
1342
1343             if (this_len == 0)
1344               {
1345                 pos += len;
1346                 break;
1347               }
1348
1349             pos++;
1350           }
1351
1352         n--;
1353       }
1354   /* Backwards search.  */
1355   else if (lim < pos && multibyte)
1356     while (n < 0)
1357       {
1358         while (1)
1359           {
1360             /* Try matching at position POS.  */
1361             int this_pos = pos - len;
1362             int this_pos_byte;
1363             int this_len = len;
1364             int this_len_byte = len_byte;
1365             unsigned char *p = pat;
1366
1367             if (pos - len < lim)
1368               goto stop;
1369             this_pos_byte = CHAR_TO_BYTE (this_pos);
1370
1371             while (this_len > 0)
1372               {
1373                 int charlen, buf_charlen;
1374                 int pat_ch, buf_ch;
1375
1376                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1377                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1378                                                  ZV_BYTE - this_pos_byte,
1379                                                  buf_charlen);
1380                 TRANSLATE (buf_ch, trt, buf_ch);
1381
1382                 if (buf_ch != pat_ch)
1383                   break;
1384
1385                 this_len_byte -= charlen;
1386                 this_len--;
1387                 p += charlen;
1388                 this_pos_byte += buf_charlen;
1389                 this_pos++;
1390               }
1391
1392             if (this_len == 0)
1393               {
1394                 pos -= len;
1395                 pos_byte -= len_byte;
1396                 break;
1397               }
1398
1399             DEC_BOTH (pos, pos_byte);
1400           }
1401
1402         n++;
1403       }
1404   else if (lim < pos)
1405     while (n < 0)
1406       {
1407         while (1)
1408           {
1409             /* Try matching at position POS.  */
1410             int this_pos = pos - len;
1411             int this_len = len;
1412             unsigned char *p = pat;
1413
1414             if (pos - len < lim)
1415               goto stop;
1416
1417             while (this_len > 0)
1418               {
1419                 int pat_ch = *p++;
1420                 int buf_ch = FETCH_BYTE (this_pos);
1421                 TRANSLATE (buf_ch, trt, buf_ch);
1422
1423                 if (buf_ch != pat_ch)
1424                   break;
1425                 this_len--;
1426                 this_pos++;
1427               }
1428
1429             if (this_len == 0)
1430               {
1431                 pos -= len;
1432                 break;
1433               }
1434
1435             pos--;
1436           }
1437
1438         n++;
1439       }
1440
1441  stop:
1442   if (n == 0)
1443     {
1444       if (forward)
1445         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1446       else
1447         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1448
1449       return pos;
1450     }
1451   else if (n > 0)
1452     return -n;
1453   else
1454     return n;
1455 }
1456 \f
1457 /* Do Boyer-Moore search N times for the string PAT,
1458    whose length is LEN/LEN_BYTE,
1459    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1460    DIRECTION says which direction we search in.
1461    TRT and INVERSE_TRT are translation tables.
1462
1463    This kind of search works if all the characters in PAT that have
1464    nontrivial translation are the same aside from the last byte.  This
1465    makes it possible to translate just the last byte of a character,
1466    and do so after just a simple test of the context.
1467
1468    If that criterion is not satisfied, do not call this function.  */
1469
1470 static int
1471 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1472              pos, pos_byte, lim, lim_byte, char_high_bits)
1473      int n;
1474      unsigned char *base_pat;
1475      int len, len_byte;
1476      Lisp_Object trt;
1477      Lisp_Object inverse_trt;
1478      int pos, pos_byte;
1479      int lim, lim_byte;
1480      int char_high_bits;
1481 {
1482   int direction = ((n > 0) ? 1 : -1);
1483   register int dirlen;
1484   int infinity, limit, stride_for_teases = 0;
1485   register int *BM_tab;
1486   int *BM_tab_base;
1487   register unsigned char *cursor, *p_limit;
1488   register int i, j;
1489   unsigned char *pat, *pat_end;
1490   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1491
1492   unsigned char simple_translate[0400];
1493   int translate_prev_byte = 0;
1494   int translate_anteprev_byte = 0;
1495
1496 #ifdef C_ALLOCA
1497   int BM_tab_space[0400];
1498   BM_tab = &BM_tab_space[0];
1499 #else
1500   BM_tab = (int *) alloca (0400 * sizeof (int));
1501 #endif
1502   /* The general approach is that we are going to maintain that we know */
1503   /* the first (closest to the present position, in whatever direction */
1504   /* we're searching) character that could possibly be the last */
1505   /* (furthest from present position) character of a valid match.  We */
1506   /* advance the state of our knowledge by looking at that character */
1507   /* and seeing whether it indeed matches the last character of the */
1508   /* pattern.  If it does, we take a closer look.  If it does not, we */
1509   /* move our pointer (to putative last characters) as far as is */
1510   /* logically possible.  This amount of movement, which I call a */
1511   /* stride, will be the length of the pattern if the actual character */
1512   /* appears nowhere in the pattern, otherwise it will be the distance */
1513   /* from the last occurrence of that character to the end of the */
1514   /* pattern. */
1515   /* As a coding trick, an enormous stride is coded into the table for */
1516   /* characters that match the last character.  This allows use of only */
1517   /* a single test, a test for having gone past the end of the */
1518   /* permissible match region, to test for both possible matches (when */
1519   /* the stride goes past the end immediately) and failure to */
1520   /* match (where you get nudged past the end one stride at a time). */
1521
1522   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1523   /* is determined only by the last character of the putative match. */
1524   /* If that character does not match, we will stride the proper */
1525   /* distance to propose a match that superimposes it on the last */
1526   /* instance of a character that matches it (per trt), or misses */
1527   /* it entirely if there is none. */
1528
1529   dirlen = len_byte * direction;
1530   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1531
1532   /* Record position after the end of the pattern.  */
1533   pat_end = base_pat + len_byte;
1534   /* BASE_PAT points to a character that we start scanning from.
1535      It is the first character in a forward search,
1536      the last character in a backward search.  */
1537   if (direction < 0)
1538     base_pat = pat_end - 1;
1539
1540   BM_tab_base = BM_tab;
1541   BM_tab += 0400;
1542   j = dirlen;           /* to get it in a register */
1543   /* A character that does not appear in the pattern induces a */
1544   /* stride equal to the pattern length. */
1545   while (BM_tab_base != BM_tab)
1546     {
1547       *--BM_tab = j;
1548       *--BM_tab = j;
1549       *--BM_tab = j;
1550       *--BM_tab = j;
1551     }
1552
1553   /* We use this for translation, instead of TRT itself.
1554      We fill this in to handle the characters that actually
1555      occur in the pattern.  Others don't matter anyway!  */
1556   bzero (simple_translate, sizeof simple_translate);
1557   for (i = 0; i < 0400; i++)
1558     simple_translate[i] = i;
1559
1560   i = 0;
1561   while (i != infinity)
1562     {
1563       unsigned char *ptr = base_pat + i;
1564       i += direction;
1565       if (i == dirlen)
1566         i = infinity;
1567       if (! NILP (trt))
1568         {
1569           int ch;
1570           int untranslated;
1571           int this_translated = 1;
1572
1573           if (multibyte
1574               /* Is *PTR the last byte of a character?  */
1575               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1576             {
1577               unsigned char *charstart = ptr;
1578               while (! CHAR_HEAD_P (*charstart))
1579                 charstart--;
1580               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1581               if (char_high_bits
1582                   == (ASCII_CHAR_P (untranslated) ? 0 : untranslated & ~0x3F))
1583                 {
1584                   TRANSLATE (ch, trt, untranslated);
1585                   if (! CHAR_HEAD_P (*ptr))
1586                     {
1587                       translate_prev_byte = ptr[-1];
1588                       if (! CHAR_HEAD_P (translate_prev_byte))
1589                         translate_anteprev_byte = ptr[-2];
1590                     }
1591                 }
1592               else
1593                 {
1594                   this_translated = 0;
1595                   ch = *ptr;
1596                 }
1597             }
1598           else if (!multibyte)
1599             TRANSLATE (ch, trt, *ptr);
1600           else
1601             {
1602               ch = *ptr;
1603               this_translated = 0;
1604             }
1605
1606           if (this_translated
1607               && ch >= 0200)
1608             j = (ch & 0x3F) | 0200;
1609           else
1610             j = (unsigned char) ch;
1611
1612           if (i == infinity)
1613             stride_for_teases = BM_tab[j];
1614
1615           BM_tab[j] = dirlen - i;
1616           /* A translation table is accompanied by its inverse -- see */
1617           /* comment following downcase_table for details */
1618           if (this_translated)
1619             {
1620               int starting_ch = ch;
1621               int starting_j = j;
1622               while (1)
1623                 {
1624                   TRANSLATE (ch, inverse_trt, ch);
1625                   if (ch > 0200)
1626                     j = (ch & 0x3F) | 0200;
1627                   else
1628                     j = (unsigned char) ch;
1629
1630                   /* For all the characters that map into CH,
1631                      set up simple_translate to map the last byte
1632                      into STARTING_J.  */
1633                   simple_translate[j] = starting_j;
1634                   if (ch == starting_ch)
1635                     break;
1636                   BM_tab[j] = dirlen - i;
1637                 }
1638             }
1639         }
1640       else
1641         {
1642           j = *ptr;
1643
1644           if (i == infinity)
1645             stride_for_teases = BM_tab[j];
1646           BM_tab[j] = dirlen - i;
1647         }
1648       /* stride_for_teases tells how much to stride if we get a */
1649       /* match on the far character but are subsequently */
1650       /* disappointed, by recording what the stride would have been */
1651       /* for that character if the last character had been */
1652       /* different. */
1653     }
1654   infinity = dirlen - infinity;
1655   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1656   /* loop invariant - POS_BYTE points at where last char (first
1657      char if reverse) of pattern would align in a possible match.  */
1658   while (n != 0)
1659     {
1660       int tail_end;
1661       unsigned char *tail_end_ptr;
1662
1663       /* It's been reported that some (broken) compiler thinks that
1664          Boolean expressions in an arithmetic context are unsigned.
1665          Using an explicit ?1:0 prevents this.  */
1666       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1667           < 0)
1668         return (n * (0 - direction));
1669       /* First we do the part we can by pointers (maybe nothing) */
1670       QUIT;
1671       pat = base_pat;
1672       limit = pos_byte - dirlen + direction;
1673       if (direction > 0)
1674         {
1675           limit = BUFFER_CEILING_OF (limit);
1676           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1677              can take on without hitting edge of buffer or the gap.  */
1678           limit = min (limit, pos_byte + 20000);
1679           limit = min (limit, lim_byte - 1);
1680         }
1681       else
1682         {
1683           limit = BUFFER_FLOOR_OF (limit);
1684           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1685              can take on without hitting edge of buffer or the gap.  */
1686           limit = max (limit, pos_byte - 20000);
1687           limit = max (limit, lim_byte);
1688         }
1689       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1690       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1691
1692       if ((limit - pos_byte) * direction > 20)
1693         {
1694           unsigned char *p2;
1695
1696           p_limit = BYTE_POS_ADDR (limit);
1697           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1698           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1699           while (1)             /* use one cursor setting as long as i can */
1700             {
1701               if (direction > 0) /* worth duplicating */
1702                 {
1703                   /* Use signed comparison if appropriate
1704                      to make cursor+infinity sure to be > p_limit.
1705                      Assuming that the buffer lies in a range of addresses
1706                      that are all "positive" (as ints) or all "negative",
1707                      either kind of comparison will work as long
1708                      as we don't step by infinity.  So pick the kind
1709                      that works when we do step by infinity.  */
1710                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1711                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1712                       cursor += BM_tab[*cursor];
1713                   else
1714                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1715                       cursor += BM_tab[*cursor];
1716                 }
1717               else
1718                 {
1719                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1720                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1721                       cursor += BM_tab[*cursor];
1722                   else
1723                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1724                       cursor += BM_tab[*cursor];
1725                 }
1726 /* If you are here, cursor is beyond the end of the searched region. */
1727 /* This can happen if you match on the far character of the pattern, */
1728 /* because the "stride" of that character is infinity, a number able */
1729 /* to throw you well beyond the end of the search.  It can also */
1730 /* happen if you fail to match within the permitted region and would */
1731 /* otherwise try a character beyond that region */
1732               if ((cursor - p_limit) * direction <= len_byte)
1733                 break;  /* a small overrun is genuine */
1734               cursor -= infinity; /* large overrun = hit */
1735               i = dirlen - direction;
1736               if (! NILP (trt))
1737                 {
1738                   while ((i -= direction) + direction != 0)
1739                     {
1740                       int ch;
1741                       cursor -= direction;
1742                       /* Translate only the last byte of a character.  */
1743                       if (! multibyte
1744                           || ((cursor == tail_end_ptr
1745                                || CHAR_HEAD_P (cursor[1]))
1746                               && (CHAR_HEAD_P (cursor[0])
1747                                   || (translate_prev_byte == cursor[-1]
1748                                       && (CHAR_HEAD_P (translate_prev_byte)
1749                                           || translate_anteprev_byte == cursor[-2])))))
1750                         ch = simple_translate[*cursor];
1751                       else
1752                         ch = *cursor;
1753                       if (pat[i] != ch)
1754                         break;
1755                     }
1756                 }
1757               else
1758                 {
1759                   while ((i -= direction) + direction != 0)
1760                     {
1761                       cursor -= direction;
1762                       if (pat[i] != *cursor)
1763                         break;
1764                     }
1765                 }
1766               cursor += dirlen - i - direction; /* fix cursor */
1767               if (i + direction == 0)
1768                 {
1769                   int position;
1770
1771                   cursor -= direction;
1772
1773                   position = pos_byte + cursor - p2 + ((direction > 0)
1774                                                        ? 1 - len_byte : 0);
1775                   set_search_regs (position, len_byte);
1776
1777                   if ((n -= direction) != 0)
1778                     cursor += dirlen; /* to resume search */
1779                   else
1780                     return ((direction > 0)
1781                             ? search_regs.end[0] : search_regs.start[0]);
1782                 }
1783               else
1784                 cursor += stride_for_teases; /* <sigh> we lose -  */
1785             }
1786           pos_byte += cursor - p2;
1787         }
1788       else
1789         /* Now we'll pick up a clump that has to be done the hard */
1790         /* way because it covers a discontinuity */
1791         {
1792           limit = ((direction > 0)
1793                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1794                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1795           limit = ((direction > 0)
1796                    ? min (limit + len_byte, lim_byte - 1)
1797                    : max (limit - len_byte, lim_byte));
1798           /* LIMIT is now the last value POS_BYTE can have
1799              and still be valid for a possible match.  */
1800           while (1)
1801             {
1802               /* This loop can be coded for space rather than */
1803               /* speed because it will usually run only once. */
1804               /* (the reach is at most len + 21, and typically */
1805               /* does not exceed len) */
1806               while ((limit - pos_byte) * direction >= 0)
1807                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1808               /* now run the same tests to distinguish going off the */
1809               /* end, a match or a phony match. */
1810               if ((pos_byte - limit) * direction <= len_byte)
1811                 break;  /* ran off the end */
1812               /* Found what might be a match.
1813                  Set POS_BYTE back to last (first if reverse) pos.  */
1814               pos_byte -= infinity;
1815               i = dirlen - direction;
1816               while ((i -= direction) + direction != 0)
1817                 {
1818                   int ch;
1819                   unsigned char *ptr;
1820                   pos_byte -= direction;
1821                   ptr = BYTE_POS_ADDR (pos_byte);
1822                   /* Translate only the last byte of a character.  */
1823                   if (! multibyte
1824                       || ((ptr == tail_end_ptr
1825                            || CHAR_HEAD_P (ptr[1]))
1826                           && (CHAR_HEAD_P (ptr[0])
1827                               || (translate_prev_byte == ptr[-1]
1828                                   && (CHAR_HEAD_P (translate_prev_byte)
1829                                       || translate_anteprev_byte == ptr[-2])))))
1830                     ch = simple_translate[*ptr];
1831                   else
1832                     ch = *ptr;
1833                   if (pat[i] != ch)
1834                     break;
1835                 }
1836               /* Above loop has moved POS_BYTE part or all the way
1837                  back to the first pos (last pos if reverse).
1838                  Set it once again at the last (first if reverse) char.  */
1839               pos_byte += dirlen - i- direction;
1840               if (i + direction == 0)
1841                 {
1842                   int position;
1843                   pos_byte -= direction;
1844
1845                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1846
1847                   set_search_regs (position, len_byte);
1848
1849                   if ((n -= direction) != 0)
1850                     pos_byte += dirlen; /* to resume search */
1851                   else
1852                     return ((direction > 0)
1853                             ? search_regs.end[0] : search_regs.start[0]);
1854                 }
1855               else
1856                 pos_byte += stride_for_teases;
1857             }
1858           }
1859       /* We have done one clump.  Can we continue? */
1860       if ((lim_byte - pos_byte) * direction < 0)
1861         return ((0 - n) * direction);
1862     }
1863   return BYTE_TO_CHAR (pos_byte);
1864 }
1865
1866 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1867    for the overall match just found in the current buffer.
1868    Also clear out the match data for registers 1 and up.  */
1869
1870 static void
1871 set_search_regs (beg_byte, nbytes)
1872      int beg_byte, nbytes;
1873 {
1874   int i;
1875
1876   /* Make sure we have registers in which to store
1877      the match position.  */
1878   if (search_regs.num_regs == 0)
1879     {
1880       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1881       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1882       search_regs.num_regs = 2;
1883     }
1884
1885   /* Clear out the other registers.  */
1886   for (i = 1; i < search_regs.num_regs; i++)
1887     {
1888       search_regs.start[i] = -1;
1889       search_regs.end[i] = -1;
1890     }
1891
1892   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1893   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1894   XSETBUFFER (last_thing_searched, current_buffer);
1895 }
1896 \f
1897 /* Given a string of words separated by word delimiters,
1898   compute a regexp that matches those exact words
1899   separated by arbitrary punctuation.  */
1900
1901 static Lisp_Object
1902 wordify (string)
1903      Lisp_Object string;
1904 {
1905   register unsigned char *p, *o;
1906   register int i, i_byte, len, punct_count = 0, word_count = 0;
1907   Lisp_Object val;
1908   int prev_c = 0;
1909   int adjust;
1910
1911   CHECK_STRING (string);
1912   p = SDATA (string);
1913   len = SCHARS (string);
1914
1915   for (i = 0, i_byte = 0; i < len; )
1916     {
1917       int c;
1918
1919       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1920
1921       if (SYNTAX (c) != Sword)
1922         {
1923           punct_count++;
1924           if (i > 0 && SYNTAX (prev_c) == Sword)
1925             word_count++;
1926         }
1927
1928       prev_c = c;
1929     }
1930
1931   if (SYNTAX (prev_c) == Sword)
1932     word_count++;
1933   if (!word_count)
1934     return empty_string;
1935
1936   adjust = - punct_count + 5 * (word_count - 1) + 4;
1937   if (STRING_MULTIBYTE (string))
1938     val = make_uninit_multibyte_string (len + adjust,
1939                                         SBYTES (string)
1940                                         + adjust);
1941   else
1942     val = make_uninit_string (len + adjust);
1943
1944   o = SDATA (val);
1945   *o++ = '\\';
1946   *o++ = 'b';
1947   prev_c = 0;
1948
1949   for (i = 0, i_byte = 0; i < len; )
1950     {
1951       int c;
1952       int i_byte_orig = i_byte;
1953
1954       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1955
1956       if (SYNTAX (c) == Sword)
1957         {
1958           bcopy (SDATA (string) + i_byte_orig, o,
1959                  i_byte - i_byte_orig);
1960           o += i_byte - i_byte_orig;
1961         }
1962       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
1963         {
1964           *o++ = '\\';
1965           *o++ = 'W';
1966           *o++ = '\\';
1967           *o++ = 'W';
1968           *o++ = '*';
1969         }
1970
1971       prev_c = c;
1972     }
1973
1974   *o++ = '\\';
1975   *o++ = 'b';
1976
1977   return val;
1978 }
1979 \f
1980 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1981        "MSearch backward: ",
1982        doc: /* Search backward from point for STRING.
1983 Set point to the beginning of the occurrence found, and return point.
1984 An optional second argument bounds the search; it is a buffer position.
1985 The match found must not extend before that position.
1986 Optional third argument, if t, means if fail just return nil (no error).
1987  If not nil and not t, position at limit of search and return nil.
1988 Optional fourth argument is repeat count--search for successive occurrences.
1989
1990 Search case-sensitivity is determined by the value of the variable
1991 `case-fold-search', which see.
1992
1993 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
1994      (string, bound, noerror, count)
1995      Lisp_Object string, bound, noerror, count;
1996 {
1997   return search_command (string, bound, noerror, count, -1, 0, 0);
1998 }
1999
2000 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2001        doc: /* Search forward from point for STRING.
2002 Set point to the end of the occurrence found, and return point.
2003 An optional second argument bounds the search; it is a buffer position.
2004 The match found must not extend after that position.  nil is equivalent
2005   to (point-max).
2006 Optional third argument, if t, means if fail just return nil (no error).
2007   If not nil and not t, move to limit of search and return nil.
2008 Optional fourth argument is repeat count--search for successive occurrences.
2009
2010 Search case-sensitivity is determined by the value of the variable
2011 `case-fold-search', which see.
2012
2013 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2014      (string, bound, noerror, count)
2015      Lisp_Object string, bound, noerror, count;
2016 {
2017   return search_command (string, bound, noerror, count, 1, 0, 0);
2018 }
2019
2020 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2021        "sWord search backward: ",
2022        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2023 Set point to the beginning of the occurrence found, and return point.
2024 An optional second argument bounds the search; it is a buffer position.
2025 The match found must not extend before that position.
2026 Optional third argument, if t, means if fail just return nil (no error).
2027   If not nil and not t, move to limit of search and return nil.
2028 Optional fourth argument is repeat count--search for successive occurrences.  */)
2029      (string, bound, noerror, count)
2030      Lisp_Object string, bound, noerror, count;
2031 {
2032   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2033 }
2034
2035 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2036        "sWord search: ",
2037        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2038 Set point to the end of the occurrence found, and return point.
2039 An optional second argument bounds the search; it is a buffer position.
2040 The match found must not extend after that position.
2041 Optional third argument, if t, means if fail just return nil (no error).
2042   If not nil and not t, move to limit of search and return nil.
2043 Optional fourth argument is repeat count--search for successive occurrences.  */)
2044      (string, bound, noerror, count)
2045      Lisp_Object string, bound, noerror, count;
2046 {
2047   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2048 }
2049
2050 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2051        "sRE search backward: ",
2052        doc: /* Search backward from point for match for regular expression REGEXP.
2053 Set point to the beginning of the match, and return point.
2054 The match found is the one starting last in the buffer
2055 and yet ending before the origin of the search.
2056 An optional second argument bounds the search; it is a buffer position.
2057 The match found must start at or after that position.
2058 Optional third argument, if t, means if fail just return nil (no error).
2059   If not nil and not t, move to limit of search and return nil.
2060 Optional fourth argument is repeat count--search for successive occurrences.
2061 See also the functions `match-beginning', `match-end', `match-string',
2062 and `replace-match'.  */)
2063      (regexp, bound, noerror, count)
2064      Lisp_Object regexp, bound, noerror, count;
2065 {
2066   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2067 }
2068
2069 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2070        "sRE search: ",
2071        doc: /* Search forward from point for regular expression REGEXP.
2072 Set point to the end of the occurrence found, and return point.
2073 An optional second argument bounds the search; it is a buffer position.
2074 The match found must not extend after that position.
2075 Optional third argument, if t, means if fail just return nil (no error).
2076   If not nil and not t, move to limit of search and return nil.
2077 Optional fourth argument is repeat count--search for successive occurrences.
2078 See also the functions `match-beginning', `match-end', `match-string',
2079 and `replace-match'.  */)
2080      (regexp, bound, noerror, count)
2081      Lisp_Object regexp, bound, noerror, count;
2082 {
2083   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2084 }
2085
2086 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2087        "sPosix search backward: ",
2088        doc: /* Search backward from point for match for regular expression REGEXP.
2089 Find the longest match in accord with Posix regular expression rules.
2090 Set point to the beginning of the match, and return point.
2091 The match found is the one starting last in the buffer
2092 and yet ending before the origin of the search.
2093 An optional second argument bounds the search; it is a buffer position.
2094 The match found must start at or after that position.
2095 Optional third argument, if t, means if fail just return nil (no error).
2096   If not nil and not t, move to limit of search and return nil.
2097 Optional fourth argument is repeat count--search for successive occurrences.
2098 See also the functions `match-beginning', `match-end', `match-string',
2099 and `replace-match'.  */)
2100      (regexp, bound, noerror, count)
2101      Lisp_Object regexp, bound, noerror, count;
2102 {
2103   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2104 }
2105
2106 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2107        "sPosix search: ",
2108        doc: /* Search forward from point for regular expression REGEXP.
2109 Find the longest match in accord with Posix regular expression rules.
2110 Set point to the end of the occurrence found, and return point.
2111 An optional second argument bounds the search; it is a buffer position.
2112 The match found must not extend after that position.
2113 Optional third argument, if t, means if fail just return nil (no error).
2114   If not nil and not t, move to limit of search and return nil.
2115 Optional fourth argument is repeat count--search for successive occurrences.
2116 See also the functions `match-beginning', `match-end', `match-string',
2117 and `replace-match'.  */)
2118      (regexp, bound, noerror, count)
2119      Lisp_Object regexp, bound, noerror, count;
2120 {
2121   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2122 }
2123 \f
2124 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2125        doc: /* Replace text matched by last search with NEWTEXT.
2126 Leave point at the end of the replacement text.
2127
2128 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2129 Otherwise maybe capitalize the whole text, or maybe just word initials,
2130 based on the replaced text.
2131 If the replaced text has only capital letters
2132 and has at least one multiletter word, convert NEWTEXT to all caps.
2133 Otherwise if all words are capitalized in the replaced text,
2134 capitalize each word in NEWTEXT.
2135
2136 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2137 Otherwise treat `\\' as special:
2138   `\\&' in NEWTEXT means substitute original matched text.
2139   `\\N' means substitute what matched the Nth `\\(...\\)'.
2140        If Nth parens didn't match, substitute nothing.
2141   `\\\\' means insert one `\\'.
2142 Case conversion does not apply to these substitutions.
2143
2144 FIXEDCASE and LITERAL are optional arguments.
2145
2146 The optional fourth argument STRING can be a string to modify.
2147 This is meaningful when the previous match was done against STRING,
2148 using `string-match'.  When used this way, `replace-match'
2149 creates and returns a new string made by copying STRING and replacing
2150 the part of STRING that was matched.
2151
2152 The optional fifth argument SUBEXP specifies a subexpression;
2153 it says to replace just that subexpression with NEWTEXT,
2154 rather than replacing the entire matched text.
2155 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2156 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2157 NEWTEXT in place of subexp N.
2158 This is useful only after a regular expression search or match,
2159 since only regular expressions have distinguished subexpressions.  */)
2160      (newtext, fixedcase, literal, string, subexp)
2161      Lisp_Object newtext, fixedcase, literal, string, subexp;
2162 {
2163   enum { nochange, all_caps, cap_initial } case_action;
2164   register int pos, pos_byte;
2165   int some_multiletter_word;
2166   int some_lowercase;
2167   int some_uppercase;
2168   int some_nonuppercase_initial;
2169   register int c, prevc;
2170   int sub;
2171   int opoint, newpoint;
2172
2173   CHECK_STRING (newtext);
2174
2175   if (! NILP (string))
2176     CHECK_STRING (string);
2177
2178   case_action = nochange;       /* We tried an initialization */
2179                                 /* but some C compilers blew it */
2180
2181   if (search_regs.num_regs <= 0)
2182     error ("replace-match called before any match found");
2183
2184   if (NILP (subexp))
2185     sub = 0;
2186   else
2187     {
2188       CHECK_NUMBER (subexp);
2189       sub = XINT (subexp);
2190       if (sub < 0 || sub >= search_regs.num_regs)
2191         args_out_of_range (subexp, make_number (search_regs.num_regs));
2192     }
2193
2194   if (NILP (string))
2195     {
2196       if (search_regs.start[sub] < BEGV
2197           || search_regs.start[sub] > search_regs.end[sub]
2198           || search_regs.end[sub] > ZV)
2199         args_out_of_range (make_number (search_regs.start[sub]),
2200                            make_number (search_regs.end[sub]));
2201     }
2202   else
2203     {
2204       if (search_regs.start[sub] < 0
2205           || search_regs.start[sub] > search_regs.end[sub]
2206           || search_regs.end[sub] > SCHARS (string))
2207         args_out_of_range (make_number (search_regs.start[sub]),
2208                            make_number (search_regs.end[sub]));
2209     }
2210
2211   if (NILP (fixedcase))
2212     {
2213       /* Decide how to casify by examining the matched text. */
2214       int last;
2215
2216       pos = search_regs.start[sub];
2217       last = search_regs.end[sub];
2218
2219       if (NILP (string))
2220         pos_byte = CHAR_TO_BYTE (pos);
2221       else
2222         pos_byte = string_char_to_byte (string, pos);
2223
2224       prevc = '\n';
2225       case_action = all_caps;
2226
2227       /* some_multiletter_word is set nonzero if any original word
2228          is more than one letter long. */
2229       some_multiletter_word = 0;
2230       some_lowercase = 0;
2231       some_nonuppercase_initial = 0;
2232       some_uppercase = 0;
2233
2234       while (pos < last)
2235         {
2236           if (NILP (string))
2237             {
2238               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2239               INC_BOTH (pos, pos_byte);
2240             }
2241           else
2242             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2243
2244           if (LOWERCASEP (c))
2245             {
2246               /* Cannot be all caps if any original char is lower case */
2247
2248               some_lowercase = 1;
2249               if (SYNTAX (prevc) != Sword)
2250                 some_nonuppercase_initial = 1;
2251               else
2252                 some_multiletter_word = 1;
2253             }
2254           else if (!NOCASEP (c))
2255             {
2256               some_uppercase = 1;
2257               if (SYNTAX (prevc) != Sword)
2258                 ;
2259               else
2260                 some_multiletter_word = 1;
2261             }
2262           else
2263             {
2264               /* If the initial is a caseless word constituent,
2265                  treat that like a lowercase initial.  */
2266               if (SYNTAX (prevc) != Sword)
2267                 some_nonuppercase_initial = 1;
2268             }
2269
2270           prevc = c;
2271         }
2272
2273       /* Convert to all caps if the old text is all caps
2274          and has at least one multiletter word.  */
2275       if (! some_lowercase && some_multiletter_word)
2276         case_action = all_caps;
2277       /* Capitalize each word, if the old text has all capitalized words.  */
2278       else if (!some_nonuppercase_initial && some_multiletter_word)
2279         case_action = cap_initial;
2280       else if (!some_nonuppercase_initial && some_uppercase)
2281         /* Should x -> yz, operating on X, give Yz or YZ?
2282            We'll assume the latter.  */
2283         case_action = all_caps;
2284       else
2285         case_action = nochange;
2286     }
2287
2288   /* Do replacement in a string.  */
2289   if (!NILP (string))
2290     {
2291       Lisp_Object before, after;
2292
2293       before = Fsubstring (string, make_number (0),
2294                            make_number (search_regs.start[sub]));
2295       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2296
2297       /* Substitute parts of the match into NEWTEXT
2298          if desired.  */
2299       if (NILP (literal))
2300         {
2301           int lastpos = 0;
2302           int lastpos_byte = 0;
2303           /* We build up the substituted string in ACCUM.  */
2304           Lisp_Object accum;
2305           Lisp_Object middle;
2306           int length = SBYTES (newtext);
2307
2308           accum = Qnil;
2309
2310           for (pos_byte = 0, pos = 0; pos_byte < length;)
2311             {
2312               int substart = -1;
2313               int subend = 0;
2314               int delbackslash = 0;
2315
2316               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2317
2318               if (c == '\\')
2319                 {
2320                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2321
2322                   if (c == '&')
2323                     {
2324                       substart = search_regs.start[sub];
2325                       subend = search_regs.end[sub];
2326                     }
2327                   else if (c >= '1' && c <= '9')
2328                     {
2329                       if (search_regs.start[c - '0'] >= 0
2330                           && c <= search_regs.num_regs + '0')
2331                         {
2332                           substart = search_regs.start[c - '0'];
2333                           subend = search_regs.end[c - '0'];
2334                         }
2335                       else
2336                         {
2337                           /* If that subexp did not match,
2338                              replace \\N with nothing.  */
2339                           substart = 0;
2340                           subend = 0;
2341                         }
2342                     }
2343                   else if (c == '\\')
2344                     delbackslash = 1;
2345                   else
2346                     error ("Invalid use of `\\' in replacement text");
2347                 }
2348               if (substart >= 0)
2349                 {
2350                   if (pos - 2 != lastpos)
2351                     middle = substring_both (newtext, lastpos,
2352                                              lastpos_byte,
2353                                              pos - 2, pos_byte - 2);
2354                   else
2355                     middle = Qnil;
2356                   accum = concat3 (accum, middle,
2357                                    Fsubstring (string,
2358                                                make_number (substart),
2359                                                make_number (subend)));
2360                   lastpos = pos;
2361                   lastpos_byte = pos_byte;
2362                 }
2363               else if (delbackslash)
2364                 {
2365                   middle = substring_both (newtext, lastpos,
2366                                            lastpos_byte,
2367                                            pos - 1, pos_byte - 1);
2368
2369                   accum = concat2 (accum, middle);
2370                   lastpos = pos;
2371                   lastpos_byte = pos_byte;
2372                 }
2373             }
2374
2375           if (pos != lastpos)
2376             middle = substring_both (newtext, lastpos,
2377                                      lastpos_byte,
2378                                      pos, pos_byte);
2379           else
2380             middle = Qnil;
2381
2382           newtext = concat2 (accum, middle);
2383         }
2384
2385       /* Do case substitution in NEWTEXT if desired.  */
2386       if (case_action == all_caps)
2387         newtext = Fupcase (newtext);
2388       else if (case_action == cap_initial)
2389         newtext = Fupcase_initials (newtext);
2390
2391       return concat3 (before, newtext, after);
2392     }
2393
2394   /* Record point, then move (quietly) to the start of the match.  */
2395   if (PT >= search_regs.end[sub])
2396     opoint = PT - ZV;
2397   else if (PT > search_regs.start[sub])
2398     opoint = search_regs.end[sub] - ZV;
2399   else
2400     opoint = PT;
2401
2402   /* If we want non-literal replacement,
2403      perform substitution on the replacement string.  */
2404   if (NILP (literal))
2405     {
2406       int length = SBYTES (newtext);
2407       unsigned char *substed;
2408       int substed_alloc_size, substed_len;
2409       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2410       int str_multibyte = STRING_MULTIBYTE (newtext);
2411       Lisp_Object rev_tbl;
2412       int really_changed = 0;
2413
2414       rev_tbl = Qnil;
2415
2416       substed_alloc_size = length * 2 + 100;
2417       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2418       substed_len = 0;
2419
2420       /* Go thru NEWTEXT, producing the actual text to insert in
2421          SUBSTED while adjusting multibyteness to that of the current
2422          buffer.  */
2423
2424       for (pos_byte = 0, pos = 0; pos_byte < length;)
2425         {
2426           unsigned char str[MAX_MULTIBYTE_LENGTH];
2427           unsigned char *add_stuff = NULL;
2428           int add_len = 0;
2429           int idx = -1;
2430
2431           if (str_multibyte)
2432             {
2433               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2434               if (!buf_multibyte)
2435                 c = multibyte_char_to_unibyte (c, rev_tbl);
2436             }
2437           else
2438             {
2439               /* Note that we don't have to increment POS.  */
2440               c = SREF (newtext, pos_byte++);
2441               if (buf_multibyte)
2442                 c = unibyte_char_to_multibyte (c);
2443             }
2444
2445           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2446              or set IDX to a match index, which means put that part
2447              of the buffer text into SUBSTED.  */
2448
2449           if (c == '\\')
2450             {
2451               really_changed = 1;
2452
2453               if (str_multibyte)
2454                 {
2455                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2456                                                       pos, pos_byte);
2457                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2458                     c = multibyte_char_to_unibyte (c, rev_tbl);
2459                 }
2460               else
2461                 {
2462                   c = SREF (newtext, pos_byte++);
2463                   if (buf_multibyte)
2464                     c = unibyte_char_to_multibyte (c);
2465                 }
2466
2467               if (c == '&')
2468                 idx = sub;
2469               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2470                 {
2471                   if (search_regs.start[c - '0'] >= 1)
2472                     idx = c - '0';
2473                 }
2474               else if (c == '\\')
2475                 add_len = 1, add_stuff = "\\";
2476               else
2477                 {
2478                   xfree (substed);
2479                   error ("Invalid use of `\\' in replacement text");
2480                 }
2481             }
2482           else
2483             {
2484               add_len = CHAR_STRING (c, str);
2485               add_stuff = str;
2486             }
2487
2488           /* If we want to copy part of a previous match,
2489              set up ADD_STUFF and ADD_LEN to point to it.  */
2490           if (idx >= 0)
2491             {
2492               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2493               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2494               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2495                 move_gap (search_regs.start[idx]);
2496               add_stuff = BYTE_POS_ADDR (begbyte);
2497             }
2498
2499           /* Now the stuff we want to add to SUBSTED
2500              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2501
2502           /* Make sure SUBSTED is big enough.  */
2503           if (substed_len + add_len >= substed_alloc_size)
2504             {
2505               substed_alloc_size = substed_len + add_len + 500;
2506               substed = (unsigned char *) xrealloc (substed,
2507                                                     substed_alloc_size + 1);
2508             }
2509
2510           /* Now add to the end of SUBSTED.  */
2511           if (add_stuff)
2512             {
2513               bcopy (add_stuff, substed + substed_len, add_len);
2514               substed_len += add_len;
2515             }
2516         }
2517
2518       if (really_changed)
2519         {
2520           if (buf_multibyte)
2521             {
2522               int nchars = multibyte_chars_in_text (substed, substed_len);
2523
2524               newtext = make_multibyte_string (substed, nchars, substed_len);
2525             }
2526           else
2527             newtext = make_unibyte_string (substed, substed_len);
2528         }
2529       xfree (substed);
2530     }
2531
2532   /* Replace the old text with the new in the cleanest possible way.  */
2533   replace_range (search_regs.start[sub], search_regs.end[sub],
2534                  newtext, 1, 0, 1);
2535   newpoint = search_regs.start[sub] + SCHARS (newtext);
2536
2537   if (case_action == all_caps)
2538     Fupcase_region (make_number (search_regs.start[sub]),
2539                     make_number (newpoint));
2540   else if (case_action == cap_initial)
2541     Fupcase_initials_region (make_number (search_regs.start[sub]),
2542                              make_number (newpoint));
2543
2544   /* Adjust search data for this change.  */
2545   {
2546     int oldend = search_regs.end[sub];
2547     int oldstart = search_regs.start[sub];
2548     int change = newpoint - search_regs.end[sub];
2549     int i;
2550
2551     for (i = 0; i < search_regs.num_regs; i++)
2552       {
2553         if (search_regs.start[i] >= oldend)
2554           search_regs.start[i] += change;
2555         else if (search_regs.start[i] > oldstart)
2556           search_regs.start[i] = oldstart;
2557         if (search_regs.end[i] >= oldend)
2558           search_regs.end[i] += change;
2559         else if (search_regs.end[i] > oldstart)
2560           search_regs.end[i] = oldstart;
2561       }
2562   }
2563
2564   /* Put point back where it was in the text.  */
2565   if (opoint <= 0)
2566     TEMP_SET_PT (opoint + ZV);
2567   else
2568     TEMP_SET_PT (opoint);
2569
2570   /* Now move point "officially" to the start of the inserted replacement.  */
2571   move_if_not_intangible (newpoint);
2572
2573   return Qnil;
2574 }
2575 \f
2576 static Lisp_Object
2577 match_limit (num, beginningp)
2578      Lisp_Object num;
2579      int beginningp;
2580 {
2581   register int n;
2582
2583   CHECK_NUMBER (num);
2584   n = XINT (num);
2585   if (n < 0)
2586     args_out_of_range (num, make_number (0));
2587   if (search_regs.num_regs <= 0)
2588     error ("No match data, because no search succeeded");
2589   if (n >= search_regs.num_regs
2590       || search_regs.start[n] < 0)
2591     return Qnil;
2592   return (make_number ((beginningp) ? search_regs.start[n]
2593                                     : search_regs.end[n]));
2594 }
2595
2596 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2597        doc: /* Return position of start of text matched by last search.
2598 SUBEXP, a number, specifies which parenthesized expression in the last
2599   regexp.
2600 Value is nil if SUBEXPth pair didn't match, or there were less than
2601   SUBEXP pairs.
2602 Zero means the entire text matched by the whole regexp or whole string.  */)
2603      (subexp)
2604      Lisp_Object subexp;
2605 {
2606   return match_limit (subexp, 1);
2607 }
2608
2609 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2610        doc: /* Return position of end of text matched by last search.
2611 SUBEXP, a number, specifies which parenthesized expression in the last
2612   regexp.
2613 Value is nil if SUBEXPth pair didn't match, or there were less than
2614   SUBEXP pairs.
2615 Zero means the entire text matched by the whole regexp or whole string.  */)
2616      (subexp)
2617      Lisp_Object subexp;
2618 {
2619   return match_limit (subexp, 0);
2620 }
2621
2622 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2623        doc: /* Return a list containing all info on what the last search matched.
2624 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2625 All the elements are markers or nil (nil if the Nth pair didn't match)
2626 if the last match was on a buffer; integers or nil if a string was matched.
2627 Use `store-match-data' to reinstate the data in this list.
2628
2629 If INTEGERS (the optional first argument) is non-nil, always use
2630 integers \(rather than markers) to represent buffer positions.  In
2631 this case, and if the last match was in a buffer, the buffer will get
2632 stored as one additional element at the end of the list.
2633
2634 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2635 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2636
2637 Return value is undefined if the last search failed.  */)
2638      (integers, reuse)
2639      Lisp_Object integers, reuse;
2640 {
2641   Lisp_Object tail, prev;
2642   Lisp_Object *data;
2643   int i, len;
2644
2645   if (NILP (last_thing_searched))
2646     return Qnil;
2647
2648   prev = Qnil;
2649
2650   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2651                                  * sizeof (Lisp_Object));
2652
2653   len = 0;
2654   for (i = 0; i < search_regs.num_regs; i++)
2655     {
2656       int start = search_regs.start[i];
2657       if (start >= 0)
2658         {
2659           if (EQ (last_thing_searched, Qt)
2660               || ! NILP (integers))
2661             {
2662               XSETFASTINT (data[2 * i], start);
2663               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2664             }
2665           else if (BUFFERP (last_thing_searched))
2666             {
2667               data[2 * i] = Fmake_marker ();
2668               Fset_marker (data[2 * i],
2669                            make_number (start),
2670                            last_thing_searched);
2671               data[2 * i + 1] = Fmake_marker ();
2672               Fset_marker (data[2 * i + 1],
2673                            make_number (search_regs.end[i]),
2674                            last_thing_searched);
2675             }
2676           else
2677             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2678             abort ();
2679
2680           len = 2*(i+1);
2681         }
2682       else
2683         data[2 * i] = data [2 * i + 1] = Qnil;
2684     }
2685
2686   if (BUFFERP (last_thing_searched) && !NILP (integers))
2687     {
2688       data[len] = last_thing_searched;
2689       len++;
2690     }
2691
2692   /* If REUSE is not usable, cons up the values and return them.  */
2693   if (! CONSP (reuse))
2694     return Flist (len, data);
2695
2696   /* If REUSE is a list, store as many value elements as will fit
2697      into the elements of REUSE.  */
2698   for (i = 0, tail = reuse; CONSP (tail);
2699        i++, tail = XCDR (tail))
2700     {
2701       if (i < len)
2702         XSETCAR (tail, data[i]);
2703       else
2704         XSETCAR (tail, Qnil);
2705       prev = tail;
2706     }
2707
2708   /* If we couldn't fit all value elements into REUSE,
2709      cons up the rest of them and add them to the end of REUSE.  */
2710   if (i < len)
2711     XSETCDR (prev, Flist (len - i, data + i));
2712
2713   return reuse;
2714 }
2715
2716
2717 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2718        doc: /* Set internal data on last search match from elements of LIST.
2719 LIST should have been created by calling `match-data' previously.  */)
2720      (list)
2721      register Lisp_Object list;
2722 {
2723   register int i;
2724   register Lisp_Object marker;
2725
2726   if (running_asynch_code)
2727     save_search_regs ();
2728
2729   if (!CONSP (list) && !NILP (list))
2730     list = wrong_type_argument (Qconsp, list);
2731
2732   /* Unless we find a marker with a buffer or an explicit buffer
2733      in LIST, assume that this match data came from a string.  */
2734   last_thing_searched = Qt;
2735
2736   /* Allocate registers if they don't already exist.  */
2737   {
2738     int length = XFASTINT (Flength (list)) / 2;
2739
2740     if (length > search_regs.num_regs)
2741       {
2742         if (search_regs.num_regs == 0)
2743           {
2744             search_regs.start
2745               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2746             search_regs.end
2747               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2748           }
2749         else
2750           {
2751             search_regs.start
2752               = (regoff_t *) xrealloc (search_regs.start,
2753                                        length * sizeof (regoff_t));
2754             search_regs.end
2755               = (regoff_t *) xrealloc (search_regs.end,
2756                                        length * sizeof (regoff_t));
2757           }
2758
2759         for (i = search_regs.num_regs; i < length; i++)
2760           search_regs.start[i] = -1;
2761
2762         search_regs.num_regs = length;
2763       }
2764
2765     for (i = 0;; i++)
2766       {
2767         marker = Fcar (list);
2768         if (BUFFERP (marker))
2769           {
2770             last_thing_searched = marker;
2771             break;
2772           }
2773         if (i >= length)
2774           break;
2775         if (NILP (marker))
2776           {
2777             search_regs.start[i] = -1;
2778             list = Fcdr (list);
2779           }
2780         else
2781           {
2782             int from;
2783
2784             if (MARKERP (marker))
2785               {
2786                 if (XMARKER (marker)->buffer == 0)
2787                   XSETFASTINT (marker, 0);
2788                 else
2789                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2790               }
2791
2792             CHECK_NUMBER_COERCE_MARKER (marker);
2793             from = XINT (marker);
2794             list = Fcdr (list);
2795
2796             marker = Fcar (list);
2797             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2798               XSETFASTINT (marker, 0);
2799
2800             CHECK_NUMBER_COERCE_MARKER (marker);
2801             search_regs.start[i] = from;
2802             search_regs.end[i] = XINT (marker);
2803           }
2804         list = Fcdr (list);
2805       }
2806
2807     for (; i < search_regs.num_regs; i++)
2808       search_regs.start[i] = -1;
2809   }
2810
2811   return Qnil;
2812 }
2813
2814 /* If non-zero the match data have been saved in saved_search_regs
2815    during the execution of a sentinel or filter. */
2816 static int search_regs_saved;
2817 static struct re_registers saved_search_regs;
2818 static Lisp_Object saved_last_thing_searched;
2819
2820 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2821    if asynchronous code (filter or sentinel) is running. */
2822 static void
2823 save_search_regs ()
2824 {
2825   if (!search_regs_saved)
2826     {
2827       saved_search_regs.num_regs = search_regs.num_regs;
2828       saved_search_regs.start = search_regs.start;
2829       saved_search_regs.end = search_regs.end;
2830       saved_last_thing_searched = last_thing_searched;
2831       last_thing_searched = Qnil;
2832       search_regs.num_regs = 0;
2833       search_regs.start = 0;
2834       search_regs.end = 0;
2835
2836       search_regs_saved = 1;
2837     }
2838 }
2839
2840 /* Called upon exit from filters and sentinels. */
2841 void
2842 restore_match_data ()
2843 {
2844   if (search_regs_saved)
2845     {
2846       if (search_regs.num_regs > 0)
2847         {
2848           xfree (search_regs.start);
2849           xfree (search_regs.end);
2850         }
2851       search_regs.num_regs = saved_search_regs.num_regs;
2852       search_regs.start = saved_search_regs.start;
2853       search_regs.end = saved_search_regs.end;
2854       last_thing_searched = saved_last_thing_searched;
2855       saved_last_thing_searched = Qnil;
2856       search_regs_saved = 0;
2857     }
2858 }
2859
2860 /* Quote a string to inactivate reg-expr chars */
2861
2862 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2863        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
2864      (string)
2865      Lisp_Object string;
2866 {
2867   register unsigned char *in, *out, *end;
2868   register unsigned char *temp;
2869   int backslashes_added = 0;
2870
2871   CHECK_STRING (string);
2872
2873   temp = (unsigned char *) alloca (SBYTES (string) * 2);
2874
2875   /* Now copy the data into the new string, inserting escapes. */
2876
2877   in = SDATA (string);
2878   end = in + SBYTES (string);
2879   out = temp;
2880
2881   for (; in != end; in++)
2882     {
2883       if (*in == '[' || *in == ']'
2884           || *in == '*' || *in == '.' || *in == '\\'
2885           || *in == '?' || *in == '+'
2886           || *in == '^' || *in == '$')
2887         *out++ = '\\', backslashes_added++;
2888       *out++ = *in;
2889     }
2890
2891   return make_specified_string (temp,
2892                                 SCHARS (string) + backslashes_added,
2893                                 out - temp,
2894                                 STRING_MULTIBYTE (string));
2895 }
2896 \f
2897 void
2898 syms_of_search ()
2899 {
2900   register int i;
2901
2902   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2903     {
2904       searchbufs[i].buf.allocated = 100;
2905       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
2906       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2907       searchbufs[i].regexp = Qnil;
2908       staticpro (&searchbufs[i].regexp);
2909       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2910     }
2911   searchbuf_head = &searchbufs[0];
2912
2913   Qsearch_failed = intern ("search-failed");
2914   staticpro (&Qsearch_failed);
2915   Qinvalid_regexp = intern ("invalid-regexp");
2916   staticpro (&Qinvalid_regexp);
2917
2918   Fput (Qsearch_failed, Qerror_conditions,
2919         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2920   Fput (Qsearch_failed, Qerror_message,
2921         build_string ("Search failed"));
2922
2923   Fput (Qinvalid_regexp, Qerror_conditions,
2924         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2925   Fput (Qinvalid_regexp, Qerror_message,
2926         build_string ("Invalid regexp"));
2927
2928   last_thing_searched = Qnil;
2929   staticpro (&last_thing_searched);
2930
2931   saved_last_thing_searched = Qnil;
2932   staticpro (&saved_last_thing_searched);
2933
2934   defsubr (&Slooking_at);
2935   defsubr (&Sposix_looking_at);
2936   defsubr (&Sstring_match);
2937   defsubr (&Sposix_string_match);
2938   defsubr (&Ssearch_forward);
2939   defsubr (&Ssearch_backward);
2940   defsubr (&Sword_search_forward);
2941   defsubr (&Sword_search_backward);
2942   defsubr (&Sre_search_forward);
2943   defsubr (&Sre_search_backward);
2944   defsubr (&Sposix_search_forward);
2945   defsubr (&Sposix_search_backward);
2946   defsubr (&Sreplace_match);
2947   defsubr (&Smatch_beginning);
2948   defsubr (&Smatch_end);
2949   defsubr (&Smatch_data);
2950   defsubr (&Sset_match_data);
2951   defsubr (&Sregexp_quote);
2952 }
2953
2954 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
2955    (do not change this comment) */