src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "charset.h"
  28 #include "region-cache.h"
  29 #include "commands.h"
  30 #include "blockinput.h"
  31 #include "intervals.h"
  32
  33 #include <sys/types.h>
  34 #include "regex.h"
  35
  36 #define REGEXP_CACHE_SIZE 20
  37
  38 /* If the regexp is non-nil, then the buffer contains the compiled form
  39    of that regexp, suitable for searching.  */
  40 struct regexp_cache
  41 {
  42   struct regexp_cache *next;
  43   Lisp_Object regexp;
  44   struct re_pattern_buffer buf;
  45   char fastmap[0400];
  46   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  47   char posix;
  48 };
  49
  50 /* The instances of that struct.  */
  51 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  52
  53 /* The head of the linked list; points to the most recently used buffer.  */
  54 struct regexp_cache *searchbuf_head;
  55
  56
  57 /* Every call to re_match, etc., must pass &search_regs as the regs
  58    argument unless you can show it is unnecessary (i.e., if re_match
  59    is certainly going to be called again before region-around-match
  60    can be called).
  61
  62    Since the registers are now dynamically allocated, we need to make
  63    sure not to refer to the Nth register before checking that it has
  64    been allocated by checking search_regs.num_regs.
  65
  66    The regex code keeps track of whether it has allocated the search
  67    buffer using bits in the re_pattern_buffer.  This means that whenever
  68    you compile a new pattern, it completely forgets whether it has
  69    allocated any registers, and will allocate new registers the next
  70    time you call a searching or matching function.  Therefore, we need
  71    to call re_set_registers after compiling a new pattern or after
  72    setting the match registers, so that the regex functions will be
  73    able to free or re-allocate it properly.  */
  74 static struct re_registers search_regs;
  75
  76 /* The buffer in which the last search was performed, or
  77    Qt if the last search was done in a string;
  78    Qnil if no searching has been done yet.  */
  79 static Lisp_Object last_thing_searched;
  80
  81 /* error condition signaled when regexp compile_pattern fails */
  82
  83 Lisp_Object Qinvalid_regexp;
  84
  85 static void set_search_regs ();
  86 static void save_search_regs ();
  87 static int simple_search ();
  88 static int boyer_moore ();
  89 static int search_buffer ();
  90
  91 static void
  92 matcher_overflow ()
  93 {
  94   error ("Stack overflow in regexp matcher");
  95 }
  96
  97 /* Compile a regexp and signal a Lisp error if anything goes wrong.
  98    PATTERN is the pattern to compile.
  99    CP is the place to put the result.
 100    TRANSLATE is a translation table for ignoring case, or nil for none.
 101    REGP is the structure that says where to store the "register"
 102    values that will result from matching this pattern.
 103    If it is 0, we should compile the pattern not to record any
 104    subexpression bounds.
 105    POSIX is nonzero if we want full backtracking (POSIX style)
 106    for this pattern.  0 means backtrack only enough to get a valid match.
 107    MULTIBYTE is nonzero if we want to handle multibyte characters in
 108    PATTERN.  0 means all multibyte characters are recognized just as
 109    sequences of binary data.  */
 110
 111 static void
 112 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 113      struct regexp_cache *cp;
 114      Lisp_Object pattern;
 115      Lisp_Object translate;
 116      struct re_registers *regp;
 117      int posix;
 118      int multibyte;
 119 {
 120   unsigned char *raw_pattern;
 121   int raw_pattern_size;
 122   char *val;
 123   reg_syntax_t old;
 124
 125   /* MULTIBYTE says whether the text to be searched is multibyte.
 126      We must convert PATTERN to match that, or we will not really
 127      find things right.  */
 128
 129   if (multibyte == STRING_MULTIBYTE (pattern))
 130     {
 131       raw_pattern = (unsigned char *) SDATA (pattern);
 132       raw_pattern_size = SBYTES (pattern);
 133     }
 134   else if (multibyte)
 135     {
 136       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 137                                                   SCHARS (pattern));
 138       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 139       copy_text (SDATA (pattern), raw_pattern,
 140                  SCHARS (pattern), 0, 1);
 141     }
 142   else
 143     {
 144       /* Converting multibyte to single-byte.
 145
 146          ??? Perhaps this conversion should be done in a special way
 147          by subtracting nonascii-insert-offset from each non-ASCII char,
 148          so that only the multibyte chars which really correspond to
 149          the chosen single-byte character set can possibly match.  */
 150       raw_pattern_size = SCHARS (pattern);
 151       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 152       copy_text (SDATA (pattern), raw_pattern,
 153                  SBYTES (pattern), 1, 0);
 154     }
 155
 156   cp->regexp = Qnil;
 157   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 158   cp->posix = posix;
 159   cp->buf.multibyte = multibyte;
 160   BLOCK_INPUT;
 161   old = re_set_syntax (RE_SYNTAX_EMACS
 162                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 163   val = (char *) re_compile_pattern ((char *)raw_pattern,
 164                                      raw_pattern_size, &cp->buf);
 165   re_set_syntax (old);
 166   UNBLOCK_INPUT;
 167   if (val)
 168     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 169
 170   cp->regexp = Fcopy_sequence (pattern);
 171 }
 172
 173 /* Shrink each compiled regexp buffer in the cache
 174    to the size actually used right now.
 175    This is called from garbage collection.  */
 176
 177 void
 178 shrink_regexp_cache ()
 179 {
 180   struct regexp_cache *cp;
 181
 182   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 183     {
 184       cp->buf.allocated = cp->buf.used;
 185       cp->buf.buffer
 186         = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
 187     }
 188 }
 189
 190 /* Compile a regexp if necessary, but first check to see if there's one in
 191    the cache.
 192    PATTERN is the pattern to compile.
 193    TRANSLATE is a translation table for ignoring case, or nil for none.
 194    REGP is the structure that says where to store the "register"
 195    values that will result from matching this pattern.
 196    If it is 0, we should compile the pattern not to record any
 197    subexpression bounds.
 198    POSIX is nonzero if we want full backtracking (POSIX style)
 199    for this pattern.  0 means backtrack only enough to get a valid match.  */
 200
 201 struct re_pattern_buffer *
 202 compile_pattern (pattern, regp, translate, posix, multibyte)
 203      Lisp_Object pattern;
 204      struct re_registers *regp;
 205      Lisp_Object translate;
 206      int posix, multibyte;
 207 {
 208   struct regexp_cache *cp, **cpp;
 209
 210   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 211     {
 212       cp = *cpp;
 213       /* Entries are initialized to nil, and may be set to nil by
 214          compile_pattern_1 if the pattern isn't valid.  Don't apply
 215          string accessors in those cases.  However, compile_pattern_1
 216          is only applied to the cache entry we pick here to reuse.  So
 217          nil should never appear before a non-nil entry.  */
 218       if (NILP (cp->regexp))
 219         goto compile_it;
 220       if (SCHARS (cp->regexp) == SCHARS (pattern)
 221           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 222           && !NILP (Fstring_equal (cp->regexp, pattern))
 223           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 224           && cp->posix == posix
 225           && cp->buf.multibyte == multibyte)
 226         break;
 227
 228       /* If we're at the end of the cache, compile into the nil cell
 229          we found, or the last (least recently used) cell with a
 230          string value.  */
 231       if (cp->next == 0)
 232         {
 233         compile_it:
 234           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 235           break;
 236         }
 237     }
 238
 239   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 240      either because we found it in the cache or because we just compiled it.
 241      Move it to the front of the queue to mark it as most recently used.  */
 242   *cpp = cp->next;
 243   cp->next = searchbuf_head;
 244   searchbuf_head = cp;
 245
 246   /* Advise the searching functions about the space we have allocated
 247      for register data.  */
 248   if (regp)
 249     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 250
 251   return &cp->buf;
 252 }
 253
 254 /* Error condition used for failing searches */
 255 Lisp_Object Qsearch_failed;
 256
 257 Lisp_Object
 258 signal_failure (arg)
 259      Lisp_Object arg;
 260 {
 261   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 262   return Qnil;
 263 }
 264 \f
 265 static Lisp_Object
 266 looking_at_1 (string, posix)
 267      Lisp_Object string;
 268      int posix;
 269 {
 270   Lisp_Object val;
 271   unsigned char *p1, *p2;
 272   int s1, s2;
 273   register int i;
 274   struct re_pattern_buffer *bufp;
 275
 276   if (running_asynch_code)
 277     save_search_regs ();
 278
 279   CHECK_STRING (string);
 280   bufp = compile_pattern (string, &search_regs,
 281                           (!NILP (current_buffer->case_fold_search)
 282                            ? DOWNCASE_TABLE : Qnil),
 283                           posix,
 284                           !NILP (current_buffer->enable_multibyte_characters));
 285
 286   immediate_quit = 1;
 287   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 288
 289   /* Get pointers and sizes of the two strings
 290      that make up the visible portion of the buffer. */
 291
 292   p1 = BEGV_ADDR;
 293   s1 = GPT_BYTE - BEGV_BYTE;
 294   p2 = GAP_END_ADDR;
 295   s2 = ZV_BYTE - GPT_BYTE;
 296   if (s1 < 0)
 297     {
 298       p2 = p1;
 299       s2 = ZV_BYTE - BEGV_BYTE;
 300       s1 = 0;
 301     }
 302   if (s2 < 0)
 303     {
 304       s1 = ZV_BYTE - BEGV_BYTE;
 305       s2 = 0;
 306     }
 307
 308   re_match_object = Qnil;
 309
 310   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 311                   PT_BYTE - BEGV_BYTE, &search_regs,
 312                   ZV_BYTE - BEGV_BYTE);
 313   immediate_quit = 0;
 314
 315   if (i == -2)
 316     matcher_overflow ();
 317
 318   val = (0 <= i ? Qt : Qnil);
 319   if (i >= 0)
 320     for (i = 0; i < search_regs.num_regs; i++)
 321       if (search_regs.start[i] >= 0)
 322         {
 323           search_regs.start[i]
 324             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 325           search_regs.end[i]
 326             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 327         }
 328   XSETBUFFER (last_thing_searched, current_buffer);
 329   return val;
 330 }
 331
 332 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 333        doc: /* Return t if text after point matches regular expression REGEXP.
 334 This function modifies the match data that `match-beginning',
 335 `match-end' and `match-data' access; save and restore the match
 336 data if you want to preserve them.  */)
 337      (regexp)
 338      Lisp_Object regexp;
 339 {
 340   return looking_at_1 (regexp, 0);
 341 }
 342
 343 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 344        doc: /* Return t if text after point matches regular expression REGEXP.
 345 Find the longest match, in accord with Posix regular expression rules.
 346 This function modifies the match data that `match-beginning',
 347 `match-end' and `match-data' access; save and restore the match
 348 data if you want to preserve them.  */)
 349      (regexp)
 350      Lisp_Object regexp;
 351 {
 352   return looking_at_1 (regexp, 1);
 353 }
 354 \f
 355 static Lisp_Object
 356 string_match_1 (regexp, string, start, posix)
 357      Lisp_Object regexp, string, start;
 358      int posix;
 359 {
 360   int val;
 361   struct re_pattern_buffer *bufp;
 362   int pos, pos_byte;
 363   int i;
 364
 365   if (running_asynch_code)
 366     save_search_regs ();
 367
 368   CHECK_STRING (regexp);
 369   CHECK_STRING (string);
 370
 371   if (NILP (start))
 372     pos = 0, pos_byte = 0;
 373   else
 374     {
 375       int len = SCHARS (string);
 376
 377       CHECK_NUMBER (start);
 378       pos = XINT (start);
 379       if (pos < 0 && -pos <= len)
 380         pos = len + pos;
 381       else if (0 > pos || pos > len)
 382         args_out_of_range (string, start);
 383       pos_byte = string_char_to_byte (string, pos);
 384     }
 385
 386   bufp = compile_pattern (regexp, &search_regs,
 387                           (!NILP (current_buffer->case_fold_search)
 388                            ? DOWNCASE_TABLE : Qnil),
 389                           posix,
 390                           STRING_MULTIBYTE (string));
 391   immediate_quit = 1;
 392   re_match_object = string;
 393
 394   val = re_search (bufp, (char *) SDATA (string),
 395                    SBYTES (string), pos_byte,
 396                    SBYTES (string) - pos_byte,
 397                    &search_regs);
 398   immediate_quit = 0;
 399   last_thing_searched = Qt;
 400   if (val == -2)
 401     matcher_overflow ();
 402   if (val < 0) return Qnil;
 403
 404   for (i = 0; i < search_regs.num_regs; i++)
 405     if (search_regs.start[i] >= 0)
 406       {
 407         search_regs.start[i]
 408           = string_byte_to_char (string, search_regs.start[i]);
 409         search_regs.end[i]
 410           = string_byte_to_char (string, search_regs.end[i]);
 411       }
 412
 413   return make_number (string_byte_to_char (string, val));
 414 }
 415
 416 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 417        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 418 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 419 If third arg START is non-nil, start search at that index in STRING.
 420 For index of first char beyond the match, do (match-end 0).
 421 `match-end' and `match-beginning' also give indices of substrings
 422 matched by parenthesis constructs in the pattern.
 423
 424 You can use the function `match-string' to extract the substrings
 425 matched by the parenthesis constructions in REGEXP. */)
 426      (regexp, string, start)
 427      Lisp_Object regexp, string, start;
 428 {
 429   return string_match_1 (regexp, string, start, 0);
 430 }
 431
 432 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 433        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 434 Find the longest match, in accord with Posix regular expression rules.
 435 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 436 If third arg START is non-nil, start search at that index in STRING.
 437 For index of first char beyond the match, do (match-end 0).
 438 `match-end' and `match-beginning' also give indices of substrings
 439 matched by parenthesis constructs in the pattern.  */)
 440      (regexp, string, start)
 441      Lisp_Object regexp, string, start;
 442 {
 443   return string_match_1 (regexp, string, start, 1);
 444 }
 445
 446 /* Match REGEXP against STRING, searching all of STRING,
 447    and return the index of the match, or negative on failure.
 448    This does not clobber the match data.  */
 449
 450 int
 451 fast_string_match (regexp, string)
 452      Lisp_Object regexp, string;
 453 {
 454   int val;
 455   struct re_pattern_buffer *bufp;
 456
 457   bufp = compile_pattern (regexp, 0, Qnil,
 458                           0, STRING_MULTIBYTE (string));
 459   immediate_quit = 1;
 460   re_match_object = string;
 461
 462   val = re_search (bufp, (char *) SDATA (string),
 463                    SBYTES (string), 0,
 464                    SBYTES (string), 0);
 465   immediate_quit = 0;
 466   return val;
 467 }
 468
 469 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 470    and return the index of the match, or negative on failure.
 471    This does not clobber the match data.
 472    We assume that STRING contains single-byte characters.  */
 473
 474 extern Lisp_Object Vascii_downcase_table;
 475
 476 int
 477 fast_c_string_match_ignore_case (regexp, string)
 478      Lisp_Object regexp;
 479      const char *string;
 480 {
 481   int val;
 482   struct re_pattern_buffer *bufp;
 483   int len = strlen (string);
 484
 485   regexp = string_make_unibyte (regexp);
 486   re_match_object = Qt;
 487   bufp = compile_pattern (regexp, 0,
 488                           Vascii_downcase_table, 0,
 489                           0);
 490   immediate_quit = 1;
 491   val = re_search (bufp, string, len, 0, len, 0);
 492   immediate_quit = 0;
 493   return val;
 494 }
 495 \f
 496 /* The newline cache: remembering which sections of text have no newlines.  */
 497
 498 /* If the user has requested newline caching, make sure it's on.
 499    Otherwise, make sure it's off.
 500    This is our cheezy way of associating an action with the change of
 501    state of a buffer-local variable.  */
 502 static void
 503 newline_cache_on_off (buf)
 504      struct buffer *buf;
 505 {
 506   if (NILP (buf->cache_long_line_scans))
 507     {
 508       /* It should be off.  */
 509       if (buf->newline_cache)
 510         {
 511           free_region_cache (buf->newline_cache);
 512           buf->newline_cache = 0;
 513         }
 514     }
 515   else
 516     {
 517       /* It should be on.  */
 518       if (buf->newline_cache == 0)
 519         buf->newline_cache = new_region_cache ();
 520     }
 521 }
 522
 523 \f
 524 /* Search for COUNT instances of the character TARGET between START and END.
 525
 526    If COUNT is positive, search forwards; END must be >= START.
 527    If COUNT is negative, search backwards for the -COUNTth instance;
 528       END must be <= START.
 529    If COUNT is zero, do anything you please; run rogue, for all I care.
 530
 531    If END is zero, use BEGV or ZV instead, as appropriate for the
 532    direction indicated by COUNT.
 533
 534    If we find COUNT instances, set *SHORTAGE to zero, and return the
 535    position after the COUNTth match.  Note that for reverse motion
 536    this is not the same as the usual convention for Emacs motion commands.
 537
 538    If we don't find COUNT instances before reaching END, set *SHORTAGE
 539    to the number of TARGETs left unfound, and return END.
 540
 541    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 542    except when inside redisplay.  */
 543
 544 int
 545 scan_buffer (target, start, end, count, shortage, allow_quit)
 546      register int target;
 547      int start, end;
 548      int count;
 549      int *shortage;
 550      int allow_quit;
 551 {
 552   struct region_cache *newline_cache;
 553   int direction;
 554
 555   if (count > 0)
 556     {
 557       direction = 1;
 558       if (! end) end = ZV;
 559     }
 560   else
 561     {
 562       direction = -1;
 563       if (! end) end = BEGV;
 564     }
 565
 566   newline_cache_on_off (current_buffer);
 567   newline_cache = current_buffer->newline_cache;
 568
 569   if (shortage != 0)
 570     *shortage = 0;
 571
 572   immediate_quit = allow_quit;
 573
 574   if (count > 0)
 575     while (start != end)
 576       {
 577         /* Our innermost scanning loop is very simple; it doesn't know
 578            about gaps, buffer ends, or the newline cache.  ceiling is
 579            the position of the last character before the next such
 580            obstacle --- the last character the dumb search loop should
 581            examine.  */
 582         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 583         int start_byte = CHAR_TO_BYTE (start);
 584         int tem;
 585
 586         /* If we're looking for a newline, consult the newline cache
 587            to see where we can avoid some scanning.  */
 588         if (target == '\n' && newline_cache)
 589           {
 590             int next_change;
 591             immediate_quit = 0;
 592             while (region_cache_forward
 593                    (current_buffer, newline_cache, start_byte, &next_change))
 594               start_byte = next_change;
 595             immediate_quit = allow_quit;
 596
 597             /* START should never be after END.  */
 598             if (start_byte > ceiling_byte)
 599               start_byte = ceiling_byte;
 600
 601             /* Now the text after start is an unknown region, and
 602                next_change is the position of the next known region. */
 603             ceiling_byte = min (next_change - 1, ceiling_byte);
 604           }
 605
 606         /* The dumb loop can only scan text stored in contiguous
 607            bytes. BUFFER_CEILING_OF returns the last character
 608            position that is contiguous, so the ceiling is the
 609            position after that.  */
 610         tem = BUFFER_CEILING_OF (start_byte);
 611         ceiling_byte = min (tem, ceiling_byte);
 612
 613         {
 614           /* The termination address of the dumb loop.  */
 615           register unsigned char *ceiling_addr
 616             = BYTE_POS_ADDR (ceiling_byte) + 1;
 617           register unsigned char *cursor
 618             = BYTE_POS_ADDR (start_byte);
 619           unsigned char *base = cursor;
 620
 621           while (cursor < ceiling_addr)
 622             {
 623               unsigned char *scan_start = cursor;
 624
 625               /* The dumb loop.  */
 626               while (*cursor != target && ++cursor < ceiling_addr)
 627                 ;
 628
 629               /* If we're looking for newlines, cache the fact that
 630                  the region from start to cursor is free of them. */
 631               if (target == '\n' && newline_cache)
 632                 know_region_cache (current_buffer, newline_cache,
 633                                    start_byte + scan_start - base,
 634                                    start_byte + cursor - base);
 635
 636               /* Did we find the target character?  */
 637               if (cursor < ceiling_addr)
 638                 {
 639                   if (--count == 0)
 640                     {
 641                       immediate_quit = 0;
 642                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 643                     }
 644                   cursor++;
 645                 }
 646             }
 647
 648           start = BYTE_TO_CHAR (start_byte + cursor - base);
 649         }
 650       }
 651   else
 652     while (start > end)
 653       {
 654         /* The last character to check before the next obstacle.  */
 655         int ceiling_byte = CHAR_TO_BYTE (end);
 656         int start_byte = CHAR_TO_BYTE (start);
 657         int tem;
 658
 659         /* Consult the newline cache, if appropriate.  */
 660         if (target == '\n' && newline_cache)
 661           {
 662             int next_change;
 663             immediate_quit = 0;
 664             while (region_cache_backward
 665                    (current_buffer, newline_cache, start_byte, &next_change))
 666               start_byte = next_change;
 667             immediate_quit = allow_quit;
 668
 669             /* Start should never be at or before end.  */
 670             if (start_byte <= ceiling_byte)
 671               start_byte = ceiling_byte + 1;
 672
 673             /* Now the text before start is an unknown region, and
 674                next_change is the position of the next known region. */
 675             ceiling_byte = max (next_change, ceiling_byte);
 676           }
 677
 678         /* Stop scanning before the gap.  */
 679         tem = BUFFER_FLOOR_OF (start_byte - 1);
 680         ceiling_byte = max (tem, ceiling_byte);
 681
 682         {
 683           /* The termination address of the dumb loop.  */
 684           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 685           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 686           unsigned char *base = cursor;
 687
 688           while (cursor >= ceiling_addr)
 689             {
 690               unsigned char *scan_start = cursor;
 691
 692               while (*cursor != target && --cursor >= ceiling_addr)
 693                 ;
 694
 695               /* If we're looking for newlines, cache the fact that
 696                  the region from after the cursor to start is free of them.  */
 697               if (target == '\n' && newline_cache)
 698                 know_region_cache (current_buffer, newline_cache,
 699                                    start_byte + cursor - base,
 700                                    start_byte + scan_start - base);
 701
 702               /* Did we find the target character?  */
 703               if (cursor >= ceiling_addr)
 704                 {
 705                   if (++count >= 0)
 706                     {
 707                       immediate_quit = 0;
 708                       return BYTE_TO_CHAR (start_byte + cursor - base);
 709                     }
 710                   cursor--;
 711                 }
 712             }
 713
 714           start = BYTE_TO_CHAR (start_byte + cursor - base);
 715         }
 716       }
 717
 718   immediate_quit = 0;
 719   if (shortage != 0)
 720     *shortage = count * direction;
 721   return start;
 722 }
 723 \f
 724 /* Search for COUNT instances of a line boundary, which means either a
 725    newline or (if selective display enabled) a carriage return.
 726    Start at START.  If COUNT is negative, search backwards.
 727
 728    We report the resulting position by calling TEMP_SET_PT_BOTH.
 729
 730    If we find COUNT instances. we position after (always after,
 731    even if scanning backwards) the COUNTth match, and return 0.
 732
 733    If we don't find COUNT instances before reaching the end of the
 734    buffer (or the beginning, if scanning backwards), we return
 735    the number of line boundaries left unfound, and position at
 736    the limit we bumped up against.
 737
 738    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 739    except in special cases.  */
 740
 741 int
 742 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 743      int start, start_byte;
 744      int limit, limit_byte;
 745      register int count;
 746      int allow_quit;
 747 {
 748   int direction = ((count > 0) ? 1 : -1);
 749
 750   register unsigned char *cursor;
 751   unsigned char *base;
 752
 753   register int ceiling;
 754   register unsigned char *ceiling_addr;
 755
 756   int old_immediate_quit = immediate_quit;
 757
 758   /* The code that follows is like scan_buffer
 759      but checks for either newline or carriage return.  */
 760
 761   if (allow_quit)
 762     immediate_quit++;
 763
 764   start_byte = CHAR_TO_BYTE (start);
 765
 766   if (count > 0)
 767     {
 768       while (start_byte < limit_byte)
 769         {
 770           ceiling =  BUFFER_CEILING_OF (start_byte);
 771           ceiling = min (limit_byte - 1, ceiling);
 772           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 773           base = (cursor = BYTE_POS_ADDR (start_byte));
 774           while (1)
 775             {
 776               while (*cursor != '\n' && ++cursor != ceiling_addr)
 777                 ;
 778
 779               if (cursor != ceiling_addr)
 780                 {
 781                   if (--count == 0)
 782                     {
 783                       immediate_quit = old_immediate_quit;
 784                       start_byte = start_byte + cursor - base + 1;
 785                       start = BYTE_TO_CHAR (start_byte);
 786                       TEMP_SET_PT_BOTH (start, start_byte);
 787                       return 0;
 788                     }
 789                   else
 790                     if (++cursor == ceiling_addr)
 791                       break;
 792                 }
 793               else
 794                 break;
 795             }
 796           start_byte += cursor - base;
 797         }
 798     }
 799   else
 800     {
 801       while (start_byte > limit_byte)
 802         {
 803           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 804           ceiling = max (limit_byte, ceiling);
 805           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 806           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 807           while (1)
 808             {
 809               while (--cursor != ceiling_addr && *cursor != '\n')
 810                 ;
 811
 812               if (cursor != ceiling_addr)
 813                 {
 814                   if (++count == 0)
 815                     {
 816                       immediate_quit = old_immediate_quit;
 817                       /* Return the position AFTER the match we found.  */
 818                       start_byte = start_byte + cursor - base + 1;
 819                       start = BYTE_TO_CHAR (start_byte);
 820                       TEMP_SET_PT_BOTH (start, start_byte);
 821                       return 0;
 822                     }
 823                 }
 824               else
 825                 break;
 826             }
 827           /* Here we add 1 to compensate for the last decrement
 828              of CURSOR, which took it past the valid range.  */
 829           start_byte += cursor - base + 1;
 830         }
 831     }
 832
 833   TEMP_SET_PT_BOTH (limit, limit_byte);
 834   immediate_quit = old_immediate_quit;
 835
 836   return count * direction;
 837 }
 838
 839 int
 840 find_next_newline_no_quit (from, cnt)
 841      register int from, cnt;
 842 {
 843   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 844 }
 845
 846 /* Like find_next_newline, but returns position before the newline,
 847    not after, and only search up to TO.  This isn't just
 848    find_next_newline (...)-1, because you might hit TO.  */
 849
 850 int
 851 find_before_next_newline (from, to, cnt)
 852      int from, to, cnt;
 853 {
 854   int shortage;
 855   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 856
 857   if (shortage == 0)
 858     pos--;
 859
 860   return pos;
 861 }
 862 \f
 863 /* Subroutines of Lisp buffer search functions. */
 864
 865 static Lisp_Object
 866 search_command (string, bound, noerror, count, direction, RE, posix)
 867      Lisp_Object string, bound, noerror, count;
 868      int direction;
 869      int RE;
 870      int posix;
 871 {
 872   register int np;
 873   int lim, lim_byte;
 874   int n = direction;
 875
 876   if (!NILP (count))
 877     {
 878       CHECK_NUMBER (count);
 879       n *= XINT (count);
 880     }
 881
 882   CHECK_STRING (string);
 883   if (NILP (bound))
 884     {
 885       if (n > 0)
 886         lim = ZV, lim_byte = ZV_BYTE;
 887       else
 888         lim = BEGV, lim_byte = BEGV_BYTE;
 889     }
 890   else
 891     {
 892       CHECK_NUMBER_COERCE_MARKER (bound);
 893       lim = XINT (bound);
 894       if (n > 0 ? lim < PT : lim > PT)
 895         error ("Invalid search bound (wrong side of point)");
 896       if (lim > ZV)
 897         lim = ZV, lim_byte = ZV_BYTE;
 898       else if (lim < BEGV)
 899         lim = BEGV, lim_byte = BEGV_BYTE;
 900       else
 901         lim_byte = CHAR_TO_BYTE (lim);
 902     }
 903
 904   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 905                       (!NILP (current_buffer->case_fold_search)
 906                        ? current_buffer->case_canon_table
 907                        : Qnil),
 908                       (!NILP (current_buffer->case_fold_search)
 909                        ? current_buffer->case_eqv_table
 910                        : Qnil),
 911                       posix);
 912   if (np <= 0)
 913     {
 914       if (NILP (noerror))
 915         return signal_failure (string);
 916       if (!EQ (noerror, Qt))
 917         {
 918           if (lim < BEGV || lim > ZV)
 919             abort ();
 920           SET_PT_BOTH (lim, lim_byte);
 921           return Qnil;
 922 #if 0 /* This would be clean, but maybe programs depend on
 923          a value of nil here.  */
 924           np = lim;
 925 #endif
 926         }
 927       else
 928         return Qnil;
 929     }
 930
 931   if (np < BEGV || np > ZV)
 932     abort ();
 933
 934   SET_PT (np);
 935
 936   return make_number (np);
 937 }
 938 \f
 939 /* Return 1 if REGEXP it matches just one constant string.  */
 940
 941 static int
 942 trivial_regexp_p (regexp)
 943      Lisp_Object regexp;
 944 {
 945   int len = SBYTES (regexp);
 946   unsigned char *s = SDATA (regexp);
 947   while (--len >= 0)
 948     {
 949       switch (*s++)
 950         {
 951         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 952           return 0;
 953         case '\\':
 954           if (--len < 0)
 955             return 0;
 956           switch (*s++)
 957             {
 958             case '|': case '(': case ')': case '`': case '\'': case 'b':
 959             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 960             case 'S': case '=': case '{': case '}':
 961             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 962             case '1': case '2': case '3': case '4': case '5':
 963             case '6': case '7': case '8': case '9':
 964               return 0;
 965             }
 966         }
 967     }
 968   return 1;
 969 }
 970
 971 /* Search for the n'th occurrence of STRING in the current buffer,
 972    starting at position POS and stopping at position LIM,
 973    treating STRING as a literal string if RE is false or as
 974    a regular expression if RE is true.
 975
 976    If N is positive, searching is forward and LIM must be greater than POS.
 977    If N is negative, searching is backward and LIM must be less than POS.
 978
 979    Returns -x if x occurrences remain to be found (x > 0),
 980    or else the position at the beginning of the Nth occurrence
 981    (if searching backward) or the end (if searching forward).
 982
 983    POSIX is nonzero if we want full backtracking (POSIX style)
 984    for this pattern.  0 means backtrack only enough to get a valid match.  */
 985
 986 #define TRANSLATE(out, trt, d)                  \
 987 do                                              \
 988   {                                             \
 989     if (! NILP (trt))                           \
 990       {                                         \
 991         Lisp_Object temp;                       \
 992         temp = Faref (trt, make_number (d));    \
 993         if (INTEGERP (temp))                    \
 994           out = XINT (temp);                    \
 995         else                                    \
 996           out = d;                              \
 997       }                                         \
 998     else                                        \
 999       out = d;                                  \
1000   }                                             \
1001 while (0)
1002
1003 static int
1004 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1005                RE, trt, inverse_trt, posix)
1006      Lisp_Object string;
1007      int pos;
1008      int pos_byte;
1009      int lim;
1010      int lim_byte;
1011      int n;
1012      int RE;
1013      Lisp_Object trt;
1014      Lisp_Object inverse_trt;
1015      int posix;
1016 {
1017   int len = SCHARS (string);
1018   int len_byte = SBYTES (string);
1019   register int i;
1020
1021   if (running_asynch_code)
1022     save_search_regs ();
1023
1024   /* Searching 0 times means don't move.  */
1025   /* Null string is found at starting position.  */
1026   if (len == 0 || n == 0)
1027     {
1028       set_search_regs (pos_byte, 0);
1029       return pos;
1030     }
1031
1032   if (RE && !trivial_regexp_p (string))
1033     {
1034       unsigned char *p1, *p2;
1035       int s1, s2;
1036       struct re_pattern_buffer *bufp;
1037
1038       bufp = compile_pattern (string, &search_regs, trt, posix,
1039                               !NILP (current_buffer->enable_multibyte_characters));
1040
1041       immediate_quit = 1;       /* Quit immediately if user types ^G,
1042                                    because letting this function finish
1043                                    can take too long. */
1044       QUIT;                     /* Do a pending quit right away,
1045                                    to avoid paradoxical behavior */
1046       /* Get pointers and sizes of the two strings
1047          that make up the visible portion of the buffer. */
1048
1049       p1 = BEGV_ADDR;
1050       s1 = GPT_BYTE - BEGV_BYTE;
1051       p2 = GAP_END_ADDR;
1052       s2 = ZV_BYTE - GPT_BYTE;
1053       if (s1 < 0)
1054         {
1055           p2 = p1;
1056           s2 = ZV_BYTE - BEGV_BYTE;
1057           s1 = 0;
1058         }
1059       if (s2 < 0)
1060         {
1061           s1 = ZV_BYTE - BEGV_BYTE;
1062           s2 = 0;
1063         }
1064       re_match_object = Qnil;
1065
1066       while (n < 0)
1067         {
1068           int val;
1069           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1070                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1071                              &search_regs,
1072                              /* Don't allow match past current point */
1073                              pos_byte - BEGV_BYTE);
1074           if (val == -2)
1075             {
1076               matcher_overflow ();
1077             }
1078           if (val >= 0)
1079             {
1080               pos_byte = search_regs.start[0] + BEGV_BYTE;
1081               for (i = 0; i < search_regs.num_regs; i++)
1082                 if (search_regs.start[i] >= 0)
1083                   {
1084                     search_regs.start[i]
1085                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1086                     search_regs.end[i]
1087                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1088                   }
1089               XSETBUFFER (last_thing_searched, current_buffer);
1090               /* Set pos to the new position. */
1091               pos = search_regs.start[0];
1092             }
1093           else
1094             {
1095               immediate_quit = 0;
1096               return (n);
1097             }
1098           n++;
1099         }
1100       while (n > 0)
1101         {
1102           int val;
1103           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1104                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1105                              &search_regs,
1106                              lim_byte - BEGV_BYTE);
1107           if (val == -2)
1108             {
1109               matcher_overflow ();
1110             }
1111           if (val >= 0)
1112             {
1113               pos_byte = search_regs.end[0] + BEGV_BYTE;
1114               for (i = 0; i < search_regs.num_regs; i++)
1115                 if (search_regs.start[i] >= 0)
1116                   {
1117                     search_regs.start[i]
1118                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1119                     search_regs.end[i]
1120                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1121                   }
1122               XSETBUFFER (last_thing_searched, current_buffer);
1123               pos = search_regs.end[0];
1124             }
1125           else
1126             {
1127               immediate_quit = 0;
1128               return (0 - n);
1129             }
1130           n--;
1131         }
1132       immediate_quit = 0;
1133       return (pos);
1134     }
1135   else                          /* non-RE case */
1136     {
1137       unsigned char *raw_pattern, *pat;
1138       int raw_pattern_size;
1139       int raw_pattern_size_byte;
1140       unsigned char *patbuf;
1141       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1142       unsigned char *base_pat = SDATA (string);
1143       int charset_base = -1;
1144       int boyer_moore_ok = 1;
1145
1146       /* MULTIBYTE says whether the text to be searched is multibyte.
1147          We must convert PATTERN to match that, or we will not really
1148          find things right.  */
1149
1150       if (multibyte == STRING_MULTIBYTE (string))
1151         {
1152           raw_pattern = (unsigned char *) SDATA (string);
1153           raw_pattern_size = SCHARS (string);
1154           raw_pattern_size_byte = SBYTES (string);
1155         }
1156       else if (multibyte)
1157         {
1158           raw_pattern_size = SCHARS (string);
1159           raw_pattern_size_byte
1160             = count_size_as_multibyte (SDATA (string),
1161                                        raw_pattern_size);
1162           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1163           copy_text (SDATA (string), raw_pattern,
1164                      SCHARS (string), 0, 1);
1165         }
1166       else
1167         {
1168           /* Converting multibyte to single-byte.
1169
1170              ??? Perhaps this conversion should be done in a special way
1171              by subtracting nonascii-insert-offset from each non-ASCII char,
1172              so that only the multibyte chars which really correspond to
1173              the chosen single-byte character set can possibly match.  */
1174           raw_pattern_size = SCHARS (string);
1175           raw_pattern_size_byte = SCHARS (string);
1176           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1177           copy_text (SDATA (string), raw_pattern,
1178                      SBYTES (string), 1, 0);
1179         }
1180
1181       /* Copy and optionally translate the pattern.  */
1182       len = raw_pattern_size;
1183       len_byte = raw_pattern_size_byte;
1184       patbuf = (unsigned char *) alloca (len_byte);
1185       pat = patbuf;
1186       base_pat = raw_pattern;
1187       if (multibyte)
1188         {
1189           while (--len >= 0)
1190             {
1191               unsigned char str[MAX_MULTIBYTE_LENGTH];
1192               int c, translated, inverse;
1193               int in_charlen, charlen;
1194
1195               /* If we got here and the RE flag is set, it's because we're
1196                  dealing with a regexp known to be trivial, so the backslash
1197                  just quotes the next character.  */
1198               if (RE && *base_pat == '\\')
1199                 {
1200                   len--;
1201                   len_byte--;
1202                   base_pat++;
1203                 }
1204
1205               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1206
1207               /* Translate the character, if requested.  */
1208               TRANSLATE (translated, trt, c);
1209               /* If translation changed the byte-length, go back
1210                  to the original character.  */
1211               charlen = CHAR_STRING (translated, str);
1212               if (in_charlen != charlen)
1213                 {
1214                   translated = c;
1215                   charlen = CHAR_STRING (c, str);
1216                 }
1217
1218               /* If we are searching for something strange,
1219                  an invalid multibyte code, don't use boyer-moore.  */
1220               if (! ASCII_BYTE_P (translated)
1221                   && (charlen == 1 /* 8bit code */
1222                       || charlen != in_charlen /* invalid multibyte code */
1223                       ))
1224                 boyer_moore_ok = 0;
1225
1226               TRANSLATE (inverse, inverse_trt, c);
1227
1228               /* Did this char actually get translated?
1229                  Would any other char get translated into it?  */
1230               if (translated != c || inverse != c)
1231                 {
1232                   /* Keep track of which character set row
1233                      contains the characters that need translation.  */
1234                   int charset_base_code = c & ~CHAR_FIELD3_MASK;
1235                   int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;
1236
1237                   if (charset_base_code != inverse_charset_base)
1238                     boyer_moore_ok = 0;
1239                   else if (charset_base == -1)
1240                     charset_base = charset_base_code;
1241                   else if (charset_base != charset_base_code)
1242                     /* If two different rows appear, needing translation,
1243                        then we cannot use boyer_moore search.  */
1244                     boyer_moore_ok = 0;
1245                 }
1246
1247               /* Store this character into the translated pattern.  */
1248               bcopy (str, pat, charlen);
1249               pat += charlen;
1250               base_pat += in_charlen;
1251               len_byte -= in_charlen;
1252             }
1253         }
1254       else
1255         {
1256           /* Unibyte buffer.  */
1257           charset_base = 0;
1258           while (--len >= 0)
1259             {
1260               int c, translated;
1261
1262               /* If we got here and the RE flag is set, it's because we're
1263                  dealing with a regexp known to be trivial, so the backslash
1264                  just quotes the next character.  */
1265               if (RE && *base_pat == '\\')
1266                 {
1267                   len--;
1268                   base_pat++;
1269                 }
1270               c = *base_pat++;
1271               TRANSLATE (translated, trt, c);
1272               *pat++ = translated;
1273             }
1274         }
1275
1276       len_byte = pat - patbuf;
1277       len = raw_pattern_size;
1278       pat = base_pat = patbuf;
1279
1280       if (boyer_moore_ok)
1281         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1282                             pos, pos_byte, lim, lim_byte,
1283                             charset_base);
1284       else
1285         return simple_search (n, pat, len, len_byte, trt,
1286                               pos, pos_byte, lim, lim_byte);
1287     }
1288 }
1289 \f
1290 /* Do a simple string search N times for the string PAT,
1291    whose length is LEN/LEN_BYTE,
1292    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1293    TRT is the translation table.
1294
1295    Return the character position where the match is found.
1296    Otherwise, if M matches remained to be found, return -M.
1297
1298    This kind of search works regardless of what is in PAT and
1299    regardless of what is in TRT.  It is used in cases where
1300    boyer_moore cannot work.  */
1301
1302 static int
1303 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1304      int n;
1305      unsigned char *pat;
1306      int len, len_byte;
1307      Lisp_Object trt;
1308      int pos, pos_byte;
1309      int lim, lim_byte;
1310 {
1311   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1312   int forward = n > 0;
1313
1314   if (lim > pos && multibyte)
1315     while (n > 0)
1316       {
1317         while (1)
1318           {
1319             /* Try matching at position POS.  */
1320             int this_pos = pos;
1321             int this_pos_byte = pos_byte;
1322             int this_len = len;
1323             int this_len_byte = len_byte;
1324             unsigned char *p = pat;
1325             if (pos + len > lim)
1326               goto stop;
1327
1328             while (this_len > 0)
1329               {
1330                 int charlen, buf_charlen;
1331                 int pat_ch, buf_ch;
1332
1333                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1334                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1335                                                  ZV_BYTE - this_pos_byte,
1336                                                  buf_charlen);
1337                 TRANSLATE (buf_ch, trt, buf_ch);
1338
1339                 if (buf_ch != pat_ch)
1340                   break;
1341
1342                 this_len_byte -= charlen;
1343                 this_len--;
1344                 p += charlen;
1345
1346                 this_pos_byte += buf_charlen;
1347                 this_pos++;
1348               }
1349
1350             if (this_len == 0)
1351               {
1352                 pos += len;
1353                 pos_byte += len_byte;
1354                 break;
1355               }
1356
1357             INC_BOTH (pos, pos_byte);
1358           }
1359
1360         n--;
1361       }
1362   else if (lim > pos)
1363     while (n > 0)
1364       {
1365         while (1)
1366           {
1367             /* Try matching at position POS.  */
1368             int this_pos = pos;
1369             int this_len = len;
1370             unsigned char *p = pat;
1371
1372             if (pos + len > lim)
1373               goto stop;
1374
1375             while (this_len > 0)
1376               {
1377                 int pat_ch = *p++;
1378                 int buf_ch = FETCH_BYTE (this_pos);
1379                 TRANSLATE (buf_ch, trt, buf_ch);
1380
1381                 if (buf_ch != pat_ch)
1382                   break;
1383
1384                 this_len--;
1385                 this_pos++;
1386               }
1387
1388             if (this_len == 0)
1389               {
1390                 pos += len;
1391                 break;
1392               }
1393
1394             pos++;
1395           }
1396
1397         n--;
1398       }
1399   /* Backwards search.  */
1400   else if (lim < pos && multibyte)
1401     while (n < 0)
1402       {
1403         while (1)
1404           {
1405             /* Try matching at position POS.  */
1406             int this_pos = pos - len;
1407             int this_pos_byte = pos_byte - len_byte;
1408             int this_len = len;
1409             int this_len_byte = len_byte;
1410             unsigned char *p = pat;
1411
1412             if (pos - len < lim)
1413               goto stop;
1414
1415             while (this_len > 0)
1416               {
1417                 int charlen, buf_charlen;
1418                 int pat_ch, buf_ch;
1419
1420                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1421                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1422                                                  ZV_BYTE - this_pos_byte,
1423                                                  buf_charlen);
1424                 TRANSLATE (buf_ch, trt, buf_ch);
1425
1426                 if (buf_ch != pat_ch)
1427                   break;
1428
1429                 this_len_byte -= charlen;
1430                 this_len--;
1431                 p += charlen;
1432                 this_pos_byte += buf_charlen;
1433                 this_pos++;
1434               }
1435
1436             if (this_len == 0)
1437               {
1438                 pos -= len;
1439                 pos_byte -= len_byte;
1440                 break;
1441               }
1442
1443             DEC_BOTH (pos, pos_byte);
1444           }
1445
1446         n++;
1447       }
1448   else if (lim < pos)
1449     while (n < 0)
1450       {
1451         while (1)
1452           {
1453             /* Try matching at position POS.  */
1454             int this_pos = pos - len;
1455             int this_len = len;
1456             unsigned char *p = pat;
1457
1458             if (pos - len < lim)
1459               goto stop;
1460
1461             while (this_len > 0)
1462               {
1463                 int pat_ch = *p++;
1464                 int buf_ch = FETCH_BYTE (this_pos);
1465                 TRANSLATE (buf_ch, trt, buf_ch);
1466
1467                 if (buf_ch != pat_ch)
1468                   break;
1469                 this_len--;
1470                 this_pos++;
1471               }
1472
1473             if (this_len == 0)
1474               {
1475                 pos -= len;
1476                 break;
1477               }
1478
1479             pos--;
1480           }
1481
1482         n++;
1483       }
1484
1485  stop:
1486   if (n == 0)
1487     {
1488       if (forward)
1489         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1490       else
1491         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1492
1493       return pos;
1494     }
1495   else if (n > 0)
1496     return -n;
1497   else
1498     return n;
1499 }
1500 \f
1501 /* Do Boyer-Moore search N times for the string PAT,
1502    whose length is LEN/LEN_BYTE,
1503    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1504    DIRECTION says which direction we search in.
1505    TRT and INVERSE_TRT are translation tables.
1506
1507    This kind of search works if all the characters in PAT that have
1508    nontrivial translation are the same aside from the last byte.  This
1509    makes it possible to translate just the last byte of a character,
1510    and do so after just a simple test of the context.
1511
1512    If that criterion is not satisfied, do not call this function.  */
1513
1514 static int
1515 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1516              pos, pos_byte, lim, lim_byte, charset_base)
1517      int n;
1518      unsigned char *base_pat;
1519      int len, len_byte;
1520      Lisp_Object trt;
1521      Lisp_Object inverse_trt;
1522      int pos, pos_byte;
1523      int lim, lim_byte;
1524      int charset_base;
1525 {
1526   int direction = ((n > 0) ? 1 : -1);
1527   register int dirlen;
1528   int infinity, limit, stride_for_teases = 0;
1529   register int *BM_tab;
1530   int *BM_tab_base;
1531   register unsigned char *cursor, *p_limit;
1532   register int i, j;
1533   unsigned char *pat, *pat_end;
1534   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1535
1536   unsigned char simple_translate[0400];
1537   int translate_prev_byte = 0;
1538   int translate_anteprev_byte = 0;
1539
1540 #ifdef C_ALLOCA
1541   int BM_tab_space[0400];
1542   BM_tab = &BM_tab_space[0];
1543 #else
1544   BM_tab = (int *) alloca (0400 * sizeof (int));
1545 #endif
1546   /* The general approach is that we are going to maintain that we know */
1547   /* the first (closest to the present position, in whatever direction */
1548   /* we're searching) character that could possibly be the last */
1549   /* (furthest from present position) character of a valid match.  We */
1550   /* advance the state of our knowledge by looking at that character */
1551   /* and seeing whether it indeed matches the last character of the */
1552   /* pattern.  If it does, we take a closer look.  If it does not, we */
1553   /* move our pointer (to putative last characters) as far as is */
1554   /* logically possible.  This amount of movement, which I call a */
1555   /* stride, will be the length of the pattern if the actual character */
1556   /* appears nowhere in the pattern, otherwise it will be the distance */
1557   /* from the last occurrence of that character to the end of the */
1558   /* pattern. */
1559   /* As a coding trick, an enormous stride is coded into the table for */
1560   /* characters that match the last character.  This allows use of only */
1561   /* a single test, a test for having gone past the end of the */
1562   /* permissible match region, to test for both possible matches (when */
1563   /* the stride goes past the end immediately) and failure to */
1564   /* match (where you get nudged past the end one stride at a time). */
1565
1566   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1567   /* is determined only by the last character of the putative match. */
1568   /* If that character does not match, we will stride the proper */
1569   /* distance to propose a match that superimposes it on the last */
1570   /* instance of a character that matches it (per trt), or misses */
1571   /* it entirely if there is none. */
1572
1573   dirlen = len_byte * direction;
1574   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1575
1576   /* Record position after the end of the pattern.  */
1577   pat_end = base_pat + len_byte;
1578   /* BASE_PAT points to a character that we start scanning from.
1579      It is the first character in a forward search,
1580      the last character in a backward search.  */
1581   if (direction < 0)
1582     base_pat = pat_end - 1;
1583
1584   BM_tab_base = BM_tab;
1585   BM_tab += 0400;
1586   j = dirlen;           /* to get it in a register */
1587   /* A character that does not appear in the pattern induces a */
1588   /* stride equal to the pattern length. */
1589   while (BM_tab_base != BM_tab)
1590     {
1591       *--BM_tab = j;
1592       *--BM_tab = j;
1593       *--BM_tab = j;
1594       *--BM_tab = j;
1595     }
1596
1597   /* We use this for translation, instead of TRT itself.
1598      We fill this in to handle the characters that actually
1599      occur in the pattern.  Others don't matter anyway!  */
1600   bzero (simple_translate, sizeof simple_translate);
1601   for (i = 0; i < 0400; i++)
1602     simple_translate[i] = i;
1603
1604   i = 0;
1605   while (i != infinity)
1606     {
1607       unsigned char *ptr = base_pat + i;
1608       i += direction;
1609       if (i == dirlen)
1610         i = infinity;
1611       if (! NILP (trt))
1612         {
1613           int ch;
1614           int untranslated;
1615           int this_translated = 1;
1616
1617           if (multibyte
1618               /* Is *PTR the last byte of a character?  */
1619               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1620             {
1621               unsigned char *charstart = ptr;
1622               while (! CHAR_HEAD_P (*charstart))
1623                 charstart--;
1624               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1625               if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1626                 {
1627                   TRANSLATE (ch, trt, untranslated);
1628                   if (! CHAR_HEAD_P (*ptr))
1629                     {
1630                       translate_prev_byte = ptr[-1];
1631                       if (! CHAR_HEAD_P (translate_prev_byte))
1632                         translate_anteprev_byte = ptr[-2];
1633                     }
1634                 }
1635               else
1636                 {
1637                   this_translated = 0;
1638                   ch = *ptr;
1639                 }
1640             }
1641           else if (!multibyte)
1642             TRANSLATE (ch, trt, *ptr);
1643           else
1644             {
1645               ch = *ptr;
1646               this_translated = 0;
1647             }
1648
1649           if (ch > 0400)
1650             j = ((unsigned char) ch) | 0200;
1651           else
1652             j = (unsigned char) ch;
1653
1654           if (i == infinity)
1655             stride_for_teases = BM_tab[j];
1656
1657           BM_tab[j] = dirlen - i;
1658           /* A translation table is accompanied by its inverse -- see */
1659           /* comment following downcase_table for details */
1660           if (this_translated)
1661             {
1662               int starting_ch = ch;
1663               int starting_j = j;
1664               while (1)
1665                 {
1666                   TRANSLATE (ch, inverse_trt, ch);
1667                   if (ch > 0400)
1668                     j = ((unsigned char) ch) | 0200;
1669                   else
1670                     j = (unsigned char) ch;
1671
1672                   /* For all the characters that map into CH,
1673                      set up simple_translate to map the last byte
1674                      into STARTING_J.  */
1675                   simple_translate[j] = starting_j;
1676                   if (ch == starting_ch)
1677                     break;
1678                   BM_tab[j] = dirlen - i;
1679                 }
1680             }
1681         }
1682       else
1683         {
1684           j = *ptr;
1685
1686           if (i == infinity)
1687             stride_for_teases = BM_tab[j];
1688           BM_tab[j] = dirlen - i;
1689         }
1690       /* stride_for_teases tells how much to stride if we get a */
1691       /* match on the far character but are subsequently */
1692       /* disappointed, by recording what the stride would have been */
1693       /* for that character if the last character had been */
1694       /* different. */
1695     }
1696   infinity = dirlen - infinity;
1697   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1698   /* loop invariant - POS_BYTE points at where last char (first
1699      char if reverse) of pattern would align in a possible match.  */
1700   while (n != 0)
1701     {
1702       int tail_end;
1703       unsigned char *tail_end_ptr;
1704
1705       /* It's been reported that some (broken) compiler thinks that
1706          Boolean expressions in an arithmetic context are unsigned.
1707          Using an explicit ?1:0 prevents this.  */
1708       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1709           < 0)
1710         return (n * (0 - direction));
1711       /* First we do the part we can by pointers (maybe nothing) */
1712       QUIT;
1713       pat = base_pat;
1714       limit = pos_byte - dirlen + direction;
1715       if (direction > 0)
1716         {
1717           limit = BUFFER_CEILING_OF (limit);
1718           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1719              can take on without hitting edge of buffer or the gap.  */
1720           limit = min (limit, pos_byte + 20000);
1721           limit = min (limit, lim_byte - 1);
1722         }
1723       else
1724         {
1725           limit = BUFFER_FLOOR_OF (limit);
1726           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1727              can take on without hitting edge of buffer or the gap.  */
1728           limit = max (limit, pos_byte - 20000);
1729           limit = max (limit, lim_byte);
1730         }
1731       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1732       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1733
1734       if ((limit - pos_byte) * direction > 20)
1735         {
1736           unsigned char *p2;
1737
1738           p_limit = BYTE_POS_ADDR (limit);
1739           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1740           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1741           while (1)             /* use one cursor setting as long as i can */
1742             {
1743               if (direction > 0) /* worth duplicating */
1744                 {
1745                   /* Use signed comparison if appropriate
1746                      to make cursor+infinity sure to be > p_limit.
1747                      Assuming that the buffer lies in a range of addresses
1748                      that are all "positive" (as ints) or all "negative",
1749                      either kind of comparison will work as long
1750                      as we don't step by infinity.  So pick the kind
1751                      that works when we do step by infinity.  */
1752                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1753                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1754                       cursor += BM_tab[*cursor];
1755                   else
1756                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1757                       cursor += BM_tab[*cursor];
1758                 }
1759               else
1760                 {
1761                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1762                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1763                       cursor += BM_tab[*cursor];
1764                   else
1765                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1766                       cursor += BM_tab[*cursor];
1767                 }
1768 /* If you are here, cursor is beyond the end of the searched region. */
1769 /* This can happen if you match on the far character of the pattern, */
1770 /* because the "stride" of that character is infinity, a number able */
1771 /* to throw you well beyond the end of the search.  It can also */
1772 /* happen if you fail to match within the permitted region and would */
1773 /* otherwise try a character beyond that region */
1774               if ((cursor - p_limit) * direction <= len_byte)
1775                 break;  /* a small overrun is genuine */
1776               cursor -= infinity; /* large overrun = hit */
1777               i = dirlen - direction;
1778               if (! NILP (trt))
1779                 {
1780                   while ((i -= direction) + direction != 0)
1781                     {
1782                       int ch;
1783                       cursor -= direction;
1784                       /* Translate only the last byte of a character.  */
1785                       if (! multibyte
1786                           || ((cursor == tail_end_ptr
1787                                || CHAR_HEAD_P (cursor[1]))
1788                               && (CHAR_HEAD_P (cursor[0])
1789                                   || (translate_prev_byte == cursor[-1]
1790                                       && (CHAR_HEAD_P (translate_prev_byte)
1791                                           || translate_anteprev_byte == cursor[-2])))))
1792                         ch = simple_translate[*cursor];
1793                       else
1794                         ch = *cursor;
1795                       if (pat[i] != ch)
1796                         break;
1797                     }
1798                 }
1799               else
1800                 {
1801                   while ((i -= direction) + direction != 0)
1802                     {
1803                       cursor -= direction;
1804                       if (pat[i] != *cursor)
1805                         break;
1806                     }
1807                 }
1808               cursor += dirlen - i - direction; /* fix cursor */
1809               if (i + direction == 0)
1810                 {
1811                   int position;
1812
1813                   cursor -= direction;
1814
1815                   position = pos_byte + cursor - p2 + ((direction > 0)
1816                                                        ? 1 - len_byte : 0);
1817                   set_search_regs (position, len_byte);
1818
1819                   if ((n -= direction) != 0)
1820                     cursor += dirlen; /* to resume search */
1821                   else
1822                     return ((direction > 0)
1823                             ? search_regs.end[0] : search_regs.start[0]);
1824                 }
1825               else
1826                 cursor += stride_for_teases; /* <sigh> we lose -  */
1827             }
1828           pos_byte += cursor - p2;
1829         }
1830       else
1831         /* Now we'll pick up a clump that has to be done the hard */
1832         /* way because it covers a discontinuity */
1833         {
1834           limit = ((direction > 0)
1835                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1836                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1837           limit = ((direction > 0)
1838                    ? min (limit + len_byte, lim_byte - 1)
1839                    : max (limit - len_byte, lim_byte));
1840           /* LIMIT is now the last value POS_BYTE can have
1841              and still be valid for a possible match.  */
1842           while (1)
1843             {
1844               /* This loop can be coded for space rather than */
1845               /* speed because it will usually run only once. */
1846               /* (the reach is at most len + 21, and typically */
1847               /* does not exceed len) */
1848               while ((limit - pos_byte) * direction >= 0)
1849                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1850               /* now run the same tests to distinguish going off the */
1851               /* end, a match or a phony match. */
1852               if ((pos_byte - limit) * direction <= len_byte)
1853                 break;  /* ran off the end */
1854               /* Found what might be a match.
1855                  Set POS_BYTE back to last (first if reverse) pos.  */
1856               pos_byte -= infinity;
1857               i = dirlen - direction;
1858               while ((i -= direction) + direction != 0)
1859                 {
1860                   int ch;
1861                   unsigned char *ptr;
1862                   pos_byte -= direction;
1863                   ptr = BYTE_POS_ADDR (pos_byte);
1864                   /* Translate only the last byte of a character.  */
1865                   if (! multibyte
1866                       || ((ptr == tail_end_ptr
1867                            || CHAR_HEAD_P (ptr[1]))
1868                           && (CHAR_HEAD_P (ptr[0])
1869                               || (translate_prev_byte == ptr[-1]
1870                                   && (CHAR_HEAD_P (translate_prev_byte)
1871                                       || translate_anteprev_byte == ptr[-2])))))
1872                     ch = simple_translate[*ptr];
1873                   else
1874                     ch = *ptr;
1875                   if (pat[i] != ch)
1876                     break;
1877                 }
1878               /* Above loop has moved POS_BYTE part or all the way
1879                  back to the first pos (last pos if reverse).
1880                  Set it once again at the last (first if reverse) char.  */
1881               pos_byte += dirlen - i- direction;
1882               if (i + direction == 0)
1883                 {
1884                   int position;
1885                   pos_byte -= direction;
1886
1887                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1888
1889                   set_search_regs (position, len_byte);
1890
1891                   if ((n -= direction) != 0)
1892                     pos_byte += dirlen; /* to resume search */
1893                   else
1894                     return ((direction > 0)
1895                             ? search_regs.end[0] : search_regs.start[0]);
1896                 }
1897               else
1898                 pos_byte += stride_for_teases;
1899             }
1900           }
1901       /* We have done one clump.  Can we continue? */
1902       if ((lim_byte - pos_byte) * direction < 0)
1903         return ((0 - n) * direction);
1904     }
1905   return BYTE_TO_CHAR (pos_byte);
1906 }
1907
1908 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1909    for the overall match just found in the current buffer.
1910    Also clear out the match data for registers 1 and up.  */
1911
1912 static void
1913 set_search_regs (beg_byte, nbytes)
1914      int beg_byte, nbytes;
1915 {
1916   int i;
1917
1918   /* Make sure we have registers in which to store
1919      the match position.  */
1920   if (search_regs.num_regs == 0)
1921     {
1922       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1923       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1924       search_regs.num_regs = 2;
1925     }
1926
1927   /* Clear out the other registers.  */
1928   for (i = 1; i < search_regs.num_regs; i++)
1929     {
1930       search_regs.start[i] = -1;
1931       search_regs.end[i] = -1;
1932     }
1933
1934   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1935   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1936   XSETBUFFER (last_thing_searched, current_buffer);
1937 }
1938 \f
1939 /* Given a string of words separated by word delimiters,
1940   compute a regexp that matches those exact words
1941   separated by arbitrary punctuation.  */
1942
1943 static Lisp_Object
1944 wordify (string)
1945      Lisp_Object string;
1946 {
1947   register unsigned char *p, *o;
1948   register int i, i_byte, len, punct_count = 0, word_count = 0;
1949   Lisp_Object val;
1950   int prev_c = 0;
1951   int adjust;
1952
1953   CHECK_STRING (string);
1954   p = SDATA (string);
1955   len = SCHARS (string);
1956
1957   for (i = 0, i_byte = 0; i < len; )
1958     {
1959       int c;
1960
1961       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1962
1963       if (SYNTAX (c) != Sword)
1964         {
1965           punct_count++;
1966           if (i > 0 && SYNTAX (prev_c) == Sword)
1967             word_count++;
1968         }
1969
1970       prev_c = c;
1971     }
1972
1973   if (SYNTAX (prev_c) == Sword)
1974     word_count++;
1975   if (!word_count)
1976     return empty_string;
1977
1978   adjust = - punct_count + 5 * (word_count - 1) + 4;
1979   if (STRING_MULTIBYTE (string))
1980     val = make_uninit_multibyte_string (len + adjust,
1981                                         SBYTES (string)
1982                                         + adjust);
1983   else
1984     val = make_uninit_string (len + adjust);
1985
1986   o = SDATA (val);
1987   *o++ = '\\';
1988   *o++ = 'b';
1989   prev_c = 0;
1990
1991   for (i = 0, i_byte = 0; i < len; )
1992     {
1993       int c;
1994       int i_byte_orig = i_byte;
1995
1996       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1997
1998       if (SYNTAX (c) == Sword)
1999         {
2000           bcopy (SDATA (string) + i_byte_orig, o,
2001                  i_byte - i_byte_orig);
2002           o += i_byte - i_byte_orig;
2003         }
2004       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2005         {
2006           *o++ = '\\';
2007           *o++ = 'W';
2008           *o++ = '\\';
2009           *o++ = 'W';
2010           *o++ = '*';
2011         }
2012
2013       prev_c = c;
2014     }
2015
2016   *o++ = '\\';
2017   *o++ = 'b';
2018
2019   return val;
2020 }
2021 \f
2022 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2023        "MSearch backward: ",
2024        doc: /* Search backward from point for STRING.
2025 Set point to the beginning of the occurrence found, and return point.
2026 An optional second argument bounds the search; it is a buffer position.
2027 The match found must not extend before that position.
2028 Optional third argument, if t, means if fail just return nil (no error).
2029  If not nil and not t, position at limit of search and return nil.
2030 Optional fourth argument is repeat count--search for successive occurrences.
2031
2032 Search case-sensitivity is determined by the value of the variable
2033 `case-fold-search', which see.
2034
2035 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2036      (string, bound, noerror, count)
2037      Lisp_Object string, bound, noerror, count;
2038 {
2039   return search_command (string, bound, noerror, count, -1, 0, 0);
2040 }
2041
2042 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2043        doc: /* Search forward from point for STRING.
2044 Set point to the end of the occurrence found, and return point.
2045 An optional second argument bounds the search; it is a buffer position.
2046 The match found must not extend after that position.  nil is equivalent
2047   to (point-max).
2048 Optional third argument, if t, means if fail just return nil (no error).
2049   If not nil and not t, move to limit of search and return nil.
2050 Optional fourth argument is repeat count--search for successive occurrences.
2051
2052 Search case-sensitivity is determined by the value of the variable
2053 `case-fold-search', which see.
2054
2055 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2056      (string, bound, noerror, count)
2057      Lisp_Object string, bound, noerror, count;
2058 {
2059   return search_command (string, bound, noerror, count, 1, 0, 0);
2060 }
2061
2062 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2063        "sWord search backward: ",
2064        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2065 Set point to the beginning of the occurrence found, and return point.
2066 An optional second argument bounds the search; it is a buffer position.
2067 The match found must not extend before that position.
2068 Optional third argument, if t, means if fail just return nil (no error).
2069   If not nil and not t, move to limit of search and return nil.
2070 Optional fourth argument is repeat count--search for successive occurrences.  */)
2071      (string, bound, noerror, count)
2072      Lisp_Object string, bound, noerror, count;
2073 {
2074   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2075 }
2076
2077 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2078        "sWord search: ",
2079        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2080 Set point to the end of the occurrence found, and return point.
2081 An optional second argument bounds the search; it is a buffer position.
2082 The match found must not extend after that position.
2083 Optional third argument, if t, means if fail just return nil (no error).
2084   If not nil and not t, move to limit of search and return nil.
2085 Optional fourth argument is repeat count--search for successive occurrences.  */)
2086      (string, bound, noerror, count)
2087      Lisp_Object string, bound, noerror, count;
2088 {
2089   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2090 }
2091
2092 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2093        "sRE search backward: ",
2094        doc: /* Search backward from point for match for regular expression REGEXP.
2095 Set point to the beginning of the match, and return point.
2096 The match found is the one starting last in the buffer
2097 and yet ending before the origin of the search.
2098 An optional second argument bounds the search; it is a buffer position.
2099 The match found must start at or after that position.
2100 Optional third argument, if t, means if fail just return nil (no error).
2101   If not nil and not t, move to limit of search and return nil.
2102 Optional fourth argument is repeat count--search for successive occurrences.
2103 See also the functions `match-beginning', `match-end', `match-string',
2104 and `replace-match'.  */)
2105      (regexp, bound, noerror, count)
2106      Lisp_Object regexp, bound, noerror, count;
2107 {
2108   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2109 }
2110
2111 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2112        "sRE search: ",
2113        doc: /* Search forward from point for regular expression REGEXP.
2114 Set point to the end of the occurrence found, and return point.
2115 An optional second argument bounds the search; it is a buffer position.
2116 The match found must not extend after that position.
2117 Optional third argument, if t, means if fail just return nil (no error).
2118   If not nil and not t, move to limit of search and return nil.
2119 Optional fourth argument is repeat count--search for successive occurrences.
2120 See also the functions `match-beginning', `match-end', `match-string',
2121 and `replace-match'.  */)
2122      (regexp, bound, noerror, count)
2123      Lisp_Object regexp, bound, noerror, count;
2124 {
2125   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2126 }
2127
2128 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2129        "sPosix search backward: ",
2130        doc: /* Search backward from point for match for regular expression REGEXP.
2131 Find the longest match in accord with Posix regular expression rules.
2132 Set point to the beginning of the match, and return point.
2133 The match found is the one starting last in the buffer
2134 and yet ending before the origin of the search.
2135 An optional second argument bounds the search; it is a buffer position.
2136 The match found must start at or after that position.
2137 Optional third argument, if t, means if fail just return nil (no error).
2138   If not nil and not t, move to limit of search and return nil.
2139 Optional fourth argument is repeat count--search for successive occurrences.
2140 See also the functions `match-beginning', `match-end', `match-string',
2141 and `replace-match'.  */)
2142      (regexp, bound, noerror, count)
2143      Lisp_Object regexp, bound, noerror, count;
2144 {
2145   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2146 }
2147
2148 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2149        "sPosix search: ",
2150        doc: /* Search forward from point for regular expression REGEXP.
2151 Find the longest match in accord with Posix regular expression rules.
2152 Set point to the end of the occurrence found, and return point.
2153 An optional second argument bounds the search; it is a buffer position.
2154 The match found must not extend after that position.
2155 Optional third argument, if t, means if fail just return nil (no error).
2156   If not nil and not t, move to limit of search and return nil.
2157 Optional fourth argument is repeat count--search for successive occurrences.
2158 See also the functions `match-beginning', `match-end', `match-string',
2159 and `replace-match'.  */)
2160      (regexp, bound, noerror, count)
2161      Lisp_Object regexp, bound, noerror, count;
2162 {
2163   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2164 }
2165 \f
2166 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2167        doc: /* Replace text matched by last search with NEWTEXT.
2168 Leave point at the end of the replacement text.
2169
2170 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2171 Otherwise maybe capitalize the whole text, or maybe just word initials,
2172 based on the replaced text.
2173 If the replaced text has only capital letters
2174 and has at least one multiletter word, convert NEWTEXT to all caps.
2175 Otherwise if all words are capitalized in the replaced text,
2176 capitalize each word in NEWTEXT.
2177
2178 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2179 Otherwise treat `\\' as special:
2180   `\\&' in NEWTEXT means substitute original matched text.
2181   `\\N' means substitute what matched the Nth `\\(...\\)'.
2182        If Nth parens didn't match, substitute nothing.
2183   `\\\\' means insert one `\\'.
2184 Case conversion does not apply to these substitutions.
2185
2186 FIXEDCASE and LITERAL are optional arguments.
2187
2188 The optional fourth argument STRING can be a string to modify.
2189 This is meaningful when the previous match was done against STRING,
2190 using `string-match'.  When used this way, `replace-match'
2191 creates and returns a new string made by copying STRING and replacing
2192 the part of STRING that was matched.
2193
2194 The optional fifth argument SUBEXP specifies a subexpression;
2195 it says to replace just that subexpression with NEWTEXT,
2196 rather than replacing the entire matched text.
2197 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2198 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2199 NEWTEXT in place of subexp N.
2200 This is useful only after a regular expression search or match,
2201 since only regular expressions have distinguished subexpressions.  */)
2202      (newtext, fixedcase, literal, string, subexp)
2203      Lisp_Object newtext, fixedcase, literal, string, subexp;
2204 {
2205   enum { nochange, all_caps, cap_initial } case_action;
2206   register int pos, pos_byte;
2207   int some_multiletter_word;
2208   int some_lowercase;
2209   int some_uppercase;
2210   int some_nonuppercase_initial;
2211   register int c, prevc;
2212   int sub;
2213   int opoint, newpoint;
2214
2215   CHECK_STRING (newtext);
2216
2217   if (! NILP (string))
2218     CHECK_STRING (string);
2219
2220   case_action = nochange;       /* We tried an initialization */
2221                                 /* but some C compilers blew it */
2222
2223   if (search_regs.num_regs <= 0)
2224     error ("replace-match called before any match found");
2225
2226   if (NILP (subexp))
2227     sub = 0;
2228   else
2229     {
2230       CHECK_NUMBER (subexp);
2231       sub = XINT (subexp);
2232       if (sub < 0 || sub >= search_regs.num_regs)
2233         args_out_of_range (subexp, make_number (search_regs.num_regs));
2234     }
2235
2236   if (NILP (string))
2237     {
2238       if (search_regs.start[sub] < BEGV
2239           || search_regs.start[sub] > search_regs.end[sub]
2240           || search_regs.end[sub] > ZV)
2241         args_out_of_range (make_number (search_regs.start[sub]),
2242                            make_number (search_regs.end[sub]));
2243     }
2244   else
2245     {
2246       if (search_regs.start[sub] < 0
2247           || search_regs.start[sub] > search_regs.end[sub]
2248           || search_regs.end[sub] > SCHARS (string))
2249         args_out_of_range (make_number (search_regs.start[sub]),
2250                            make_number (search_regs.end[sub]));
2251     }
2252
2253   if (NILP (fixedcase))
2254     {
2255       /* Decide how to casify by examining the matched text. */
2256       int last;
2257
2258       pos = search_regs.start[sub];
2259       last = search_regs.end[sub];
2260
2261       if (NILP (string))
2262         pos_byte = CHAR_TO_BYTE (pos);
2263       else
2264         pos_byte = string_char_to_byte (string, pos);
2265
2266       prevc = '\n';
2267       case_action = all_caps;
2268
2269       /* some_multiletter_word is set nonzero if any original word
2270          is more than one letter long. */
2271       some_multiletter_word = 0;
2272       some_lowercase = 0;
2273       some_nonuppercase_initial = 0;
2274       some_uppercase = 0;
2275
2276       while (pos < last)
2277         {
2278           if (NILP (string))
2279             {
2280               c = FETCH_CHAR (pos_byte);
2281               INC_BOTH (pos, pos_byte);
2282             }
2283           else
2284             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2285
2286           if (LOWERCASEP (c))
2287             {
2288               /* Cannot be all caps if any original char is lower case */
2289
2290               some_lowercase = 1;
2291               if (SYNTAX (prevc) != Sword)
2292                 some_nonuppercase_initial = 1;
2293               else
2294                 some_multiletter_word = 1;
2295             }
2296           else if (!NOCASEP (c))
2297             {
2298               some_uppercase = 1;
2299               if (SYNTAX (prevc) != Sword)
2300                 ;
2301               else
2302                 some_multiletter_word = 1;
2303             }
2304           else
2305             {
2306               /* If the initial is a caseless word constituent,
2307                  treat that like a lowercase initial.  */
2308               if (SYNTAX (prevc) != Sword)
2309                 some_nonuppercase_initial = 1;
2310             }
2311
2312           prevc = c;
2313         }
2314
2315       /* Convert to all caps if the old text is all caps
2316          and has at least one multiletter word.  */
2317       if (! some_lowercase && some_multiletter_word)
2318         case_action = all_caps;
2319       /* Capitalize each word, if the old text has all capitalized words.  */
2320       else if (!some_nonuppercase_initial && some_multiletter_word)
2321         case_action = cap_initial;
2322       else if (!some_nonuppercase_initial && some_uppercase)
2323         /* Should x -> yz, operating on X, give Yz or YZ?
2324            We'll assume the latter.  */
2325         case_action = all_caps;
2326       else
2327         case_action = nochange;
2328     }
2329
2330   /* Do replacement in a string.  */
2331   if (!NILP (string))
2332     {
2333       Lisp_Object before, after;
2334
2335       before = Fsubstring (string, make_number (0),
2336                            make_number (search_regs.start[sub]));
2337       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2338
2339       /* Substitute parts of the match into NEWTEXT
2340          if desired.  */
2341       if (NILP (literal))
2342         {
2343           int lastpos = 0;
2344           int lastpos_byte = 0;
2345           /* We build up the substituted string in ACCUM.  */
2346           Lisp_Object accum;
2347           Lisp_Object middle;
2348           int length = SBYTES (newtext);
2349
2350           accum = Qnil;
2351
2352           for (pos_byte = 0, pos = 0; pos_byte < length;)
2353             {
2354               int substart = -1;
2355               int subend = 0;
2356               int delbackslash = 0;
2357
2358               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2359
2360               if (c == '\\')
2361                 {
2362                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2363
2364                   if (c == '&')
2365                     {
2366                       substart = search_regs.start[sub];
2367                       subend = search_regs.end[sub];
2368                     }
2369                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2370                     {
2371                       if (search_regs.start[c - '0'] >= 0)
2372                         {
2373                           substart = search_regs.start[c - '0'];
2374                           subend = search_regs.end[c - '0'];
2375                         }
2376                     }
2377                   else if (c == '\\')
2378                     delbackslash = 1;
2379                   else
2380                     error ("Invalid use of `\\' in replacement text");
2381                 }
2382               if (substart >= 0)
2383                 {
2384                   if (pos - 2 != lastpos)
2385                     middle = substring_both (newtext, lastpos,
2386                                              lastpos_byte,
2387                                              pos - 2, pos_byte - 2);
2388                   else
2389                     middle = Qnil;
2390                   accum = concat3 (accum, middle,
2391                                    Fsubstring (string,
2392                                                make_number (substart),
2393                                                make_number (subend)));
2394                   lastpos = pos;
2395                   lastpos_byte = pos_byte;
2396                 }
2397               else if (delbackslash)
2398                 {
2399                   middle = substring_both (newtext, lastpos,
2400                                            lastpos_byte,
2401                                            pos - 1, pos_byte - 1);
2402
2403                   accum = concat2 (accum, middle);
2404                   lastpos = pos;
2405                   lastpos_byte = pos_byte;
2406                 }
2407             }
2408
2409           if (pos != lastpos)
2410             middle = substring_both (newtext, lastpos,
2411                                      lastpos_byte,
2412                                      pos, pos_byte);
2413           else
2414             middle = Qnil;
2415
2416           newtext = concat2 (accum, middle);
2417         }
2418
2419       /* Do case substitution in NEWTEXT if desired.  */
2420       if (case_action == all_caps)
2421         newtext = Fupcase (newtext);
2422       else if (case_action == cap_initial)
2423         newtext = Fupcase_initials (newtext);
2424
2425       return concat3 (before, newtext, after);
2426     }
2427
2428   /* Record point, then move (quietly) to the start of the match.  */
2429   if (PT >= search_regs.end[sub])
2430     opoint = PT - ZV;
2431   else if (PT > search_regs.start[sub])
2432     opoint = search_regs.end[sub] - ZV;
2433   else
2434     opoint = PT;
2435
2436   /* If we want non-literal replacement,
2437      perform substitution on the replacement string.  */
2438   if (NILP (literal))
2439     {
2440       int length = SBYTES (newtext);
2441       unsigned char *substed;
2442       int substed_alloc_size, substed_len;
2443       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2444       int str_multibyte = STRING_MULTIBYTE (newtext);
2445       Lisp_Object rev_tbl;
2446       int really_changed = 0;
2447
2448       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2449                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2450                                           make_number (0))
2451                 : Qnil);
2452
2453       substed_alloc_size = length * 2 + 100;
2454       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2455       substed_len = 0;
2456
2457       /* Go thru NEWTEXT, producing the actual text to insert in
2458          SUBSTED while adjusting multibyteness to that of the current
2459          buffer.  */
2460
2461       for (pos_byte = 0, pos = 0; pos_byte < length;)
2462         {
2463           unsigned char str[MAX_MULTIBYTE_LENGTH];
2464           unsigned char *add_stuff = NULL;
2465           int add_len = 0;
2466           int idx = -1;
2467
2468           if (str_multibyte)
2469             {
2470               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2471               if (!buf_multibyte)
2472                 c = multibyte_char_to_unibyte (c, rev_tbl);
2473             }
2474           else
2475             {
2476               /* Note that we don't have to increment POS.  */
2477               c = SREF (newtext, pos_byte++);
2478               if (buf_multibyte)
2479                 c = unibyte_char_to_multibyte (c);
2480             }
2481
2482           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2483              or set IDX to a match index, which means put that part
2484              of the buffer text into SUBSTED.  */
2485
2486           if (c == '\\')
2487             {
2488               really_changed = 1;
2489
2490               if (str_multibyte)
2491                 {
2492                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2493                                                       pos, pos_byte);
2494                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2495                     c = multibyte_char_to_unibyte (c, rev_tbl);
2496                 }
2497               else
2498                 {
2499                   c = SREF (newtext, pos_byte++);
2500                   if (buf_multibyte)
2501                     c = unibyte_char_to_multibyte (c);
2502                 }
2503
2504               if (c == '&')
2505                 idx = sub;
2506               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2507                 {
2508                   if (search_regs.start[c - '0'] >= 1)
2509                     idx = c - '0';
2510                 }
2511               else if (c == '\\')
2512                 add_len = 1, add_stuff = "\\";
2513               else
2514                 {
2515                   xfree (substed);
2516                   error ("Invalid use of `\\' in replacement text");
2517                 }
2518             }
2519           else
2520             {
2521               add_len = CHAR_STRING (c, str);
2522               add_stuff = str;
2523             }
2524
2525           /* If we want to copy part of a previous match,
2526              set up ADD_STUFF and ADD_LEN to point to it.  */
2527           if (idx >= 0)
2528             {
2529               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2530               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2531               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2532                 move_gap (search_regs.start[idx]);
2533               add_stuff = BYTE_POS_ADDR (begbyte);
2534             }
2535
2536           /* Now the stuff we want to add to SUBSTED
2537              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2538
2539           /* Make sure SUBSTED is big enough.  */
2540           if (substed_len + add_len >= substed_alloc_size)
2541             {
2542               substed_alloc_size = substed_len + add_len + 500;
2543               substed = (unsigned char *) xrealloc (substed,
2544                                                     substed_alloc_size + 1);
2545             }
2546
2547           /* Now add to the end of SUBSTED.  */
2548           if (add_stuff)
2549             {
2550               bcopy (add_stuff, substed + substed_len, add_len);
2551               substed_len += add_len;
2552             }
2553         }
2554
2555       if (really_changed)
2556         newtext = make_string (substed, substed_len);
2557
2558       xfree (substed);
2559     }
2560
2561   /* Replace the old text with the new in the cleanest possible way.  */
2562   replace_range (search_regs.start[sub], search_regs.end[sub],
2563                  newtext, 1, 0, 1);
2564   newpoint = search_regs.start[sub] + SCHARS (newtext);
2565
2566   if (case_action == all_caps)
2567     Fupcase_region (make_number (search_regs.start[sub]),
2568                     make_number (newpoint));
2569   else if (case_action == cap_initial)
2570     Fupcase_initials_region (make_number (search_regs.start[sub]),
2571                              make_number (newpoint));
2572
2573   /* Adjust search data for this change.  */
2574   {
2575     int oldend = search_regs.end[sub];
2576     int change = newpoint - search_regs.end[sub];
2577     int i;
2578
2579     for (i = 0; i < search_regs.num_regs; i++)
2580       {
2581         if (search_regs.start[i] > oldend)
2582           search_regs.start[i] += change;
2583         if (search_regs.end[i] > oldend)
2584           search_regs.end[i] += change;
2585       }
2586   }
2587
2588   /* Put point back where it was in the text.  */
2589   if (opoint <= 0)
2590     TEMP_SET_PT (opoint + ZV);
2591   else
2592     TEMP_SET_PT (opoint);
2593
2594   /* Now move point "officially" to the start of the inserted replacement.  */
2595   move_if_not_intangible (newpoint);
2596
2597   return Qnil;
2598 }
2599 \f
2600 static Lisp_Object
2601 match_limit (num, beginningp)
2602      Lisp_Object num;
2603      int beginningp;
2604 {
2605   register int n;
2606
2607   CHECK_NUMBER (num);
2608   n = XINT (num);
2609   if (n < 0 || n >= search_regs.num_regs)
2610     args_out_of_range (num, make_number (search_regs.num_regs));
2611   if (search_regs.num_regs <= 0
2612       || search_regs.start[n] < 0)
2613     return Qnil;
2614   return (make_number ((beginningp) ? search_regs.start[n]
2615                                     : search_regs.end[n]));
2616 }
2617
2618 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2619        doc: /* Return position of start of text matched by last search.
2620 SUBEXP, a number, specifies which parenthesized expression in the last
2621   regexp.
2622 Value is nil if SUBEXPth pair didn't match, or there were less than
2623   SUBEXP pairs.
2624 Zero means the entire text matched by the whole regexp or whole string.  */)
2625      (subexp)
2626      Lisp_Object subexp;
2627 {
2628   return match_limit (subexp, 1);
2629 }
2630
2631 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2632        doc: /* Return position of end of text matched by last search.
2633 SUBEXP, a number, specifies which parenthesized expression in the last
2634   regexp.
2635 Value is nil if SUBEXPth pair didn't match, or there were less than
2636   SUBEXP pairs.
2637 Zero means the entire text matched by the whole regexp or whole string.  */)
2638      (subexp)
2639      Lisp_Object subexp;
2640 {
2641   return match_limit (subexp, 0);
2642 }
2643
2644 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2645        doc: /* Return a list containing all info on what the last search matched.
2646 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2647 All the elements are markers or nil (nil if the Nth pair didn't match)
2648 if the last match was on a buffer; integers or nil if a string was matched.
2649 Use `store-match-data' to reinstate the data in this list.
2650
2651 If INTEGERS (the optional first argument) is non-nil, always use integers
2652 \(rather than markers) to represent buffer positions.
2653 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2654 to hold all the values, and if INTEGERS is non-nil, no consing is done.  */)
2655      (integers, reuse)
2656      Lisp_Object integers, reuse;
2657 {
2658   Lisp_Object tail, prev;
2659   Lisp_Object *data;
2660   int i, len;
2661
2662   if (NILP (last_thing_searched))
2663     return Qnil;
2664
2665   prev = Qnil;
2666
2667   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2668                                  * sizeof (Lisp_Object));
2669
2670   len = -1;
2671   for (i = 0; i < search_regs.num_regs; i++)
2672     {
2673       int start = search_regs.start[i];
2674       if (start >= 0)
2675         {
2676           if (EQ (last_thing_searched, Qt)
2677               || ! NILP (integers))
2678             {
2679               XSETFASTINT (data[2 * i], start);
2680               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2681             }
2682           else if (BUFFERP (last_thing_searched))
2683             {
2684               data[2 * i] = Fmake_marker ();
2685               Fset_marker (data[2 * i],
2686                            make_number (start),
2687                            last_thing_searched);
2688               data[2 * i + 1] = Fmake_marker ();
2689               Fset_marker (data[2 * i + 1],
2690                            make_number (search_regs.end[i]),
2691                            last_thing_searched);
2692             }
2693           else
2694             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2695             abort ();
2696
2697           len = i;
2698         }
2699       else
2700         data[2 * i] = data [2 * i + 1] = Qnil;
2701     }
2702
2703   /* If REUSE is not usable, cons up the values and return them.  */
2704   if (! CONSP (reuse))
2705     return Flist (2 * len + 2, data);
2706
2707   /* If REUSE is a list, store as many value elements as will fit
2708      into the elements of REUSE.  */
2709   for (i = 0, tail = reuse; CONSP (tail);
2710        i++, tail = XCDR (tail))
2711     {
2712       if (i < 2 * len + 2)
2713         XSETCAR (tail, data[i]);
2714       else
2715         XSETCAR (tail, Qnil);
2716       prev = tail;
2717     }
2718
2719   /* If we couldn't fit all value elements into REUSE,
2720      cons up the rest of them and add them to the end of REUSE.  */
2721   if (i < 2 * len + 2)
2722     XSETCDR (prev, Flist (2 * len + 2 - i, data + i));
2723
2724   return reuse;
2725 }
2726
2727
2728 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2729        doc: /* Set internal data on last search match from elements of LIST.
2730 LIST should have been created by calling `match-data' previously.  */)
2731      (list)
2732      register Lisp_Object list;
2733 {
2734   register int i;
2735   register Lisp_Object marker;
2736
2737   if (running_asynch_code)
2738     save_search_regs ();
2739
2740   if (!CONSP (list) && !NILP (list))
2741     list = wrong_type_argument (Qconsp, list);
2742
2743   /* Unless we find a marker with a buffer in LIST, assume that this
2744      match data came from a string.  */
2745   last_thing_searched = Qt;
2746
2747   /* Allocate registers if they don't already exist.  */
2748   {
2749     int length = XFASTINT (Flength (list)) / 2;
2750
2751     if (length > search_regs.num_regs)
2752       {
2753         if (search_regs.num_regs == 0)
2754           {
2755             search_regs.start
2756               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2757             search_regs.end
2758               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2759           }
2760         else
2761           {
2762             search_regs.start
2763               = (regoff_t *) xrealloc (search_regs.start,
2764                                        length * sizeof (regoff_t));
2765             search_regs.end
2766               = (regoff_t *) xrealloc (search_regs.end,
2767                                        length * sizeof (regoff_t));
2768           }
2769
2770         for (i = search_regs.num_regs; i < length; i++)
2771           search_regs.start[i] = -1;
2772
2773         search_regs.num_regs = length;
2774       }
2775   }
2776
2777   for (i = 0; i < search_regs.num_regs; i++)
2778     {
2779       marker = Fcar (list);
2780       if (NILP (marker))
2781         {
2782           search_regs.start[i] = -1;
2783           list = Fcdr (list);
2784         }
2785       else
2786         {
2787           int from;
2788
2789           if (MARKERP (marker))
2790             {
2791               if (XMARKER (marker)->buffer == 0)
2792                 XSETFASTINT (marker, 0);
2793               else
2794                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2795             }
2796
2797           CHECK_NUMBER_COERCE_MARKER (marker);
2798           from = XINT (marker);
2799           list = Fcdr (list);
2800
2801           marker = Fcar (list);
2802           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2803             XSETFASTINT (marker, 0);
2804
2805           CHECK_NUMBER_COERCE_MARKER (marker);
2806           search_regs.start[i] = from;
2807           search_regs.end[i] = XINT (marker);
2808         }
2809       list = Fcdr (list);
2810     }
2811
2812   return Qnil;
2813 }
2814
2815 /* If non-zero the match data have been saved in saved_search_regs
2816    during the execution of a sentinel or filter. */
2817 static int search_regs_saved;
2818 static struct re_registers saved_search_regs;
2819
2820 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2821    if asynchronous code (filter or sentinel) is running. */
2822 static void
2823 save_search_regs ()
2824 {
2825   if (!search_regs_saved)
2826     {
2827       saved_search_regs.num_regs = search_regs.num_regs;
2828       saved_search_regs.start = search_regs.start;
2829       saved_search_regs.end = search_regs.end;
2830       search_regs.num_regs = 0;
2831       search_regs.start = 0;
2832       search_regs.end = 0;
2833
2834       search_regs_saved = 1;
2835     }
2836 }
2837
2838 /* Called upon exit from filters and sentinels. */
2839 void
2840 restore_match_data ()
2841 {
2842   if (search_regs_saved)
2843     {
2844       if (search_regs.num_regs > 0)
2845         {
2846           xfree (search_regs.start);
2847           xfree (search_regs.end);
2848         }
2849       search_regs.num_regs = saved_search_regs.num_regs;
2850       search_regs.start = saved_search_regs.start;
2851       search_regs.end = saved_search_regs.end;
2852
2853       search_regs_saved = 0;
2854     }
2855 }
2856
2857 /* Quote a string to inactivate reg-expr chars */
2858
2859 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2860        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
2861      (string)
2862      Lisp_Object string;
2863 {
2864   register unsigned char *in, *out, *end;
2865   register unsigned char *temp;
2866   int backslashes_added = 0;
2867
2868   CHECK_STRING (string);
2869
2870   temp = (unsigned char *) alloca (SBYTES (string) * 2);
2871
2872   /* Now copy the data into the new string, inserting escapes. */
2873
2874   in = SDATA (string);
2875   end = in + SBYTES (string);
2876   out = temp;
2877
2878   for (; in != end; in++)
2879     {
2880       if (*in == '[' || *in == ']'
2881           || *in == '*' || *in == '.' || *in == '\\'
2882           || *in == '?' || *in == '+'
2883           || *in == '^' || *in == '$')
2884         *out++ = '\\', backslashes_added++;
2885       *out++ = *in;
2886     }
2887
2888   return make_specified_string (temp,
2889                                 SCHARS (string) + backslashes_added,
2890                                 out - temp,
2891                                 STRING_MULTIBYTE (string));
2892 }
2893 \f
2894 void
2895 syms_of_search ()
2896 {
2897   register int i;
2898
2899   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2900     {
2901       searchbufs[i].buf.allocated = 100;
2902       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2903       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2904       searchbufs[i].regexp = Qnil;
2905       staticpro (&searchbufs[i].regexp);
2906       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2907     }
2908   searchbuf_head = &searchbufs[0];
2909
2910   Qsearch_failed = intern ("search-failed");
2911   staticpro (&Qsearch_failed);
2912   Qinvalid_regexp = intern ("invalid-regexp");
2913   staticpro (&Qinvalid_regexp);
2914
2915   Fput (Qsearch_failed, Qerror_conditions,
2916         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2917   Fput (Qsearch_failed, Qerror_message,
2918         build_string ("Search failed"));
2919
2920   Fput (Qinvalid_regexp, Qerror_conditions,
2921         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2922   Fput (Qinvalid_regexp, Qerror_message,
2923         build_string ("Invalid regexp"));
2924
2925   last_thing_searched = Qnil;
2926   staticpro (&last_thing_searched);
2927
2928   defsubr (&Slooking_at);
2929   defsubr (&Sposix_looking_at);
2930   defsubr (&Sstring_match);
2931   defsubr (&Sposix_string_match);
2932   defsubr (&Ssearch_forward);
2933   defsubr (&Ssearch_backward);
2934   defsubr (&Sword_search_forward);
2935   defsubr (&Sword_search_backward);
2936   defsubr (&Sre_search_forward);
2937   defsubr (&Sre_search_backward);
2938   defsubr (&Sposix_search_forward);
2939   defsubr (&Sposix_search_backward);
2940   defsubr (&Sreplace_match);
2941   defsubr (&Smatch_beginning);
2942   defsubr (&Smatch_end);
2943   defsubr (&Smatch_data);
2944   defsubr (&Sset_match_data);
2945   defsubr (&Sregexp_quote);
2946 }