src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "character.h"
  28 #include "region-cache.h"
  29 #include "commands.h"
  30 #include "blockinput.h"
  31 #include "intervals.h"
  32
  33 #include <sys/types.h>
  34 #include "regex.h"
  35
  36 #define REGEXP_CACHE_SIZE 20
  37
  38 /* If the regexp is non-nil, then the buffer contains the compiled form
  39    of that regexp, suitable for searching.  */
  40 struct regexp_cache
  41 {
  42   struct regexp_cache *next;
  43   Lisp_Object regexp;
  44   struct re_pattern_buffer buf;
  45   char fastmap[0400];
  46   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  47   char posix;
  48 };
  49
  50 /* The instances of that struct.  */
  51 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  52
  53 /* The head of the linked list; points to the most recently used buffer.  */
  54 struct regexp_cache *searchbuf_head;
  55
  56
  57 /* Every call to re_match, etc., must pass &search_regs as the regs
  58    argument unless you can show it is unnecessary (i.e., if re_match
  59    is certainly going to be called again before region-around-match
  60    can be called).
  61
  62    Since the registers are now dynamically allocated, we need to make
  63    sure not to refer to the Nth register before checking that it has
  64    been allocated by checking search_regs.num_regs.
  65
  66    The regex code keeps track of whether it has allocated the search
  67    buffer using bits in the re_pattern_buffer.  This means that whenever
  68    you compile a new pattern, it completely forgets whether it has
  69    allocated any registers, and will allocate new registers the next
  70    time you call a searching or matching function.  Therefore, we need
  71    to call re_set_registers after compiling a new pattern or after
  72    setting the match registers, so that the regex functions will be
  73    able to free or re-allocate it properly.  */
  74 static struct re_registers search_regs;
  75
  76 /* The buffer in which the last search was performed, or
  77    Qt if the last search was done in a string;
  78    Qnil if no searching has been done yet.  */
  79 static Lisp_Object last_thing_searched;
  80
  81 /* error condition signaled when regexp compile_pattern fails */
  82
  83 Lisp_Object Qinvalid_regexp;
  84
  85 static void set_search_regs ();
  86 static void save_search_regs ();
  87 static int simple_search ();
  88 static int boyer_moore ();
  89 static int search_buffer ();
  90
  91 static void
  92 matcher_overflow ()
  93 {
  94   error ("Stack overflow in regexp matcher");
  95 }
  96
  97 /* Compile a regexp and signal a Lisp error if anything goes wrong.
  98    PATTERN is the pattern to compile.
  99    CP is the place to put the result.
 100    TRANSLATE is a translation table for ignoring case, or nil for none.
 101    REGP is the structure that says where to store the "register"
 102    values that will result from matching this pattern.
 103    If it is 0, we should compile the pattern not to record any
 104    subexpression bounds.
 105    POSIX is nonzero if we want full backtracking (POSIX style)
 106    for this pattern.  0 means backtrack only enough to get a valid match.
 107    MULTIBYTE is nonzero iff a target of match is a multibyte buffer or
 108    string.  */
 109
 110 static void
 111 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 112      struct regexp_cache *cp;
 113      Lisp_Object pattern;
 114      Lisp_Object translate;
 115      struct re_registers *regp;
 116      int posix;
 117      int multibyte;
 118 {
 119   char *val;
 120   reg_syntax_t old;
 121
 122   cp->regexp = Qnil;
 123   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 124   cp->posix = posix;
 125   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 126   cp->buf.target_multibyte = multibyte;
 127   BLOCK_INPUT;
 128   old = re_set_syntax (RE_SYNTAX_EMACS
 129                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 130   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 131                                      SBYTES (pattern), &cp->buf);
 132   re_set_syntax (old);
 133   UNBLOCK_INPUT;
 134   if (val)
 135     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 136
 137   cp->regexp = Fcopy_sequence (pattern);
 138 }
 139
 140 /* Shrink each compiled regexp buffer in the cache
 141    to the size actually used right now.
 142    This is called from garbage collection.  */
 143
 144 void
 145 shrink_regexp_cache ()
 146 {
 147   struct regexp_cache *cp;
 148
 149   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 150     {
 151       cp->buf.allocated = cp->buf.used;
 152       cp->buf.buffer
 153         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 154     }
 155 }
 156
 157 /* Compile a regexp if necessary, but first check to see if there's one in
 158    the cache.
 159    PATTERN is the pattern to compile.
 160    TRANSLATE is a translation table for ignoring case, or nil for none.
 161    REGP is the structure that says where to store the "register"
 162    values that will result from matching this pattern.
 163    If it is 0, we should compile the pattern not to record any
 164    subexpression bounds.
 165    POSIX is nonzero if we want full backtracking (POSIX style)
 166    for this pattern.  0 means backtrack only enough to get a valid match.  */
 167
 168 struct re_pattern_buffer *
 169 compile_pattern (pattern, regp, translate, posix, multibyte)
 170      Lisp_Object pattern;
 171      struct re_registers *regp;
 172      Lisp_Object translate;
 173      int posix, multibyte;
 174 {
 175   struct regexp_cache *cp, **cpp;
 176
 177   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 178     {
 179       cp = *cpp;
 180       /* Entries are initialized to nil, and may be set to nil by
 181          compile_pattern_1 if the pattern isn't valid.  Don't apply
 182          string accessors in those cases.  However, compile_pattern_1
 183          is only applied to the cache entry we pick here to reuse.  So
 184          nil should never appear before a non-nil entry.  */
 185       if (NILP (cp->regexp))
 186         goto compile_it;
 187       if (SCHARS (cp->regexp) == SCHARS (pattern)
 188           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 189           && !NILP (Fstring_equal (cp->regexp, pattern))
 190           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 191           && cp->posix == posix
 192           && cp->buf.target_multibyte == multibyte)
 193         break;
 194
 195       /* If we're at the end of the cache, compile into the nil cell
 196          we found, or the last (least recently used) cell with a
 197          string value.  */
 198       if (cp->next == 0)
 199         {
 200         compile_it:
 201           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 202           break;
 203         }
 204     }
 205
 206   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 207      either because we found it in the cache or because we just compiled it.
 208      Move it to the front of the queue to mark it as most recently used.  */
 209   *cpp = cp->next;
 210   cp->next = searchbuf_head;
 211   searchbuf_head = cp;
 212
 213   /* Advise the searching functions about the space we have allocated
 214      for register data.  */
 215   if (regp)
 216     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 217
 218   return &cp->buf;
 219 }
 220
 221 /* Error condition used for failing searches */
 222 Lisp_Object Qsearch_failed;
 223
 224 Lisp_Object
 225 signal_failure (arg)
 226      Lisp_Object arg;
 227 {
 228   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 229   return Qnil;
 230 }
 231 \f
 232 static Lisp_Object
 233 looking_at_1 (string, posix)
 234      Lisp_Object string;
 235      int posix;
 236 {
 237   Lisp_Object val;
 238   unsigned char *p1, *p2;
 239   int s1, s2;
 240   register int i;
 241   struct re_pattern_buffer *bufp;
 242
 243   if (running_asynch_code)
 244     save_search_regs ();
 245
 246   CHECK_STRING (string);
 247   bufp = compile_pattern (string, &search_regs,
 248                           (!NILP (current_buffer->case_fold_search)
 249                            ? DOWNCASE_TABLE : Qnil),
 250                           posix,
 251                           !NILP (current_buffer->enable_multibyte_characters));
 252
 253   immediate_quit = 1;
 254   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 255
 256   /* Get pointers and sizes of the two strings
 257      that make up the visible portion of the buffer. */
 258
 259   p1 = BEGV_ADDR;
 260   s1 = GPT_BYTE - BEGV_BYTE;
 261   p2 = GAP_END_ADDR;
 262   s2 = ZV_BYTE - GPT_BYTE;
 263   if (s1 < 0)
 264     {
 265       p2 = p1;
 266       s2 = ZV_BYTE - BEGV_BYTE;
 267       s1 = 0;
 268     }
 269   if (s2 < 0)
 270     {
 271       s1 = ZV_BYTE - BEGV_BYTE;
 272       s2 = 0;
 273     }
 274
 275   re_match_object = Qnil;
 276
 277   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 278                   PT_BYTE - BEGV_BYTE, &search_regs,
 279                   ZV_BYTE - BEGV_BYTE);
 280   immediate_quit = 0;
 281
 282   if (i == -2)
 283     matcher_overflow ();
 284
 285   val = (0 <= i ? Qt : Qnil);
 286   if (i >= 0)
 287     for (i = 0; i < search_regs.num_regs; i++)
 288       if (search_regs.start[i] >= 0)
 289         {
 290           search_regs.start[i]
 291             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 292           search_regs.end[i]
 293             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 294         }
 295   XSETBUFFER (last_thing_searched, current_buffer);
 296   return val;
 297 }
 298
 299 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 300        doc: /* Return t if text after point matches regular expression REGEXP.
 301 This function modifies the match data that `match-beginning',
 302 `match-end' and `match-data' access; save and restore the match
 303 data if you want to preserve them.  */)
 304      (regexp)
 305      Lisp_Object regexp;
 306 {
 307   return looking_at_1 (regexp, 0);
 308 }
 309
 310 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 311        doc: /* Return t if text after point matches regular expression REGEXP.
 312 Find the longest match, in accord with Posix regular expression rules.
 313 This function modifies the match data that `match-beginning',
 314 `match-end' and `match-data' access; save and restore the match
 315 data if you want to preserve them.  */)
 316      (regexp)
 317      Lisp_Object regexp;
 318 {
 319   return looking_at_1 (regexp, 1);
 320 }
 321 \f
 322 static Lisp_Object
 323 string_match_1 (regexp, string, start, posix)
 324      Lisp_Object regexp, string, start;
 325      int posix;
 326 {
 327   int val;
 328   struct re_pattern_buffer *bufp;
 329   int pos, pos_byte;
 330   int i;
 331
 332   if (running_asynch_code)
 333     save_search_regs ();
 334
 335   CHECK_STRING (regexp);
 336   CHECK_STRING (string);
 337
 338   if (NILP (start))
 339     pos = 0, pos_byte = 0;
 340   else
 341     {
 342       int len = SCHARS (string);
 343
 344       CHECK_NUMBER (start);
 345       pos = XINT (start);
 346       if (pos < 0 && -pos <= len)
 347         pos = len + pos;
 348       else if (0 > pos || pos > len)
 349         args_out_of_range (string, start);
 350       pos_byte = string_char_to_byte (string, pos);
 351     }
 352
 353   bufp = compile_pattern (regexp, &search_regs,
 354                           (!NILP (current_buffer->case_fold_search)
 355                            ? DOWNCASE_TABLE : Qnil),
 356                           posix,
 357                           STRING_MULTIBYTE (string));
 358   immediate_quit = 1;
 359   re_match_object = string;
 360
 361   val = re_search (bufp, (char *) SDATA (string),
 362                    SBYTES (string), pos_byte,
 363                    SBYTES (string) - pos_byte,
 364                    &search_regs);
 365   immediate_quit = 0;
 366   last_thing_searched = Qt;
 367   if (val == -2)
 368     matcher_overflow ();
 369   if (val < 0) return Qnil;
 370
 371   for (i = 0; i < search_regs.num_regs; i++)
 372     if (search_regs.start[i] >= 0)
 373       {
 374         search_regs.start[i]
 375           = string_byte_to_char (string, search_regs.start[i]);
 376         search_regs.end[i]
 377           = string_byte_to_char (string, search_regs.end[i]);
 378       }
 379
 380   return make_number (string_byte_to_char (string, val));
 381 }
 382
 383 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 384        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 385 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 386 If third arg START is non-nil, start search at that index in STRING.
 387 For index of first char beyond the match, do (match-end 0).
 388 `match-end' and `match-beginning' also give indices of substrings
 389 matched by parenthesis constructs in the pattern.
 390
 391 You can use the function `match-string' to extract the substrings
 392 matched by the parenthesis constructions in REGEXP. */)
 393      (regexp, string, start)
 394      Lisp_Object regexp, string, start;
 395 {
 396   return string_match_1 (regexp, string, start, 0);
 397 }
 398
 399 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 400        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 401 Find the longest match, in accord with Posix regular expression rules.
 402 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 403 If third arg START is non-nil, start search at that index in STRING.
 404 For index of first char beyond the match, do (match-end 0).
 405 `match-end' and `match-beginning' also give indices of substrings
 406 matched by parenthesis constructs in the pattern.  */)
 407      (regexp, string, start)
 408      Lisp_Object regexp, string, start;
 409 {
 410   return string_match_1 (regexp, string, start, 1);
 411 }
 412
 413 /* Match REGEXP against STRING, searching all of STRING,
 414    and return the index of the match, or negative on failure.
 415    This does not clobber the match data.  */
 416
 417 int
 418 fast_string_match (regexp, string)
 419      Lisp_Object regexp, string;
 420 {
 421   int val;
 422   struct re_pattern_buffer *bufp;
 423
 424   bufp = compile_pattern (regexp, 0, Qnil,
 425                           0, STRING_MULTIBYTE (string));
 426   immediate_quit = 1;
 427   re_match_object = string;
 428
 429   val = re_search (bufp, (char *) SDATA (string),
 430                    SBYTES (string), 0,
 431                    SBYTES (string), 0);
 432   immediate_quit = 0;
 433   return val;
 434 }
 435
 436 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 437    and return the index of the match, or negative on failure.
 438    This does not clobber the match data.
 439    We assume that STRING contains single-byte characters.  */
 440
 441 extern Lisp_Object Vascii_downcase_table;
 442
 443 int
 444 fast_c_string_match_ignore_case (regexp, string)
 445      Lisp_Object regexp;
 446      const char *string;
 447 {
 448   int val;
 449   struct re_pattern_buffer *bufp;
 450   int len = strlen (string);
 451
 452   regexp = string_make_unibyte (regexp);
 453   re_match_object = Qt;
 454   bufp = compile_pattern (regexp, 0,
 455                           Vascii_downcase_table, 0,
 456                           0);
 457   immediate_quit = 1;
 458   val = re_search (bufp, string, len, 0, len, 0);
 459   immediate_quit = 0;
 460   return val;
 461 }
 462 \f
 463 /* The newline cache: remembering which sections of text have no newlines.  */
 464
 465 /* If the user has requested newline caching, make sure it's on.
 466    Otherwise, make sure it's off.
 467    This is our cheezy way of associating an action with the change of
 468    state of a buffer-local variable.  */
 469 static void
 470 newline_cache_on_off (buf)
 471      struct buffer *buf;
 472 {
 473   if (NILP (buf->cache_long_line_scans))
 474     {
 475       /* It should be off.  */
 476       if (buf->newline_cache)
 477         {
 478           free_region_cache (buf->newline_cache);
 479           buf->newline_cache = 0;
 480         }
 481     }
 482   else
 483     {
 484       /* It should be on.  */
 485       if (buf->newline_cache == 0)
 486         buf->newline_cache = new_region_cache ();
 487     }
 488 }
 489
 490 \f
 491 /* Search for COUNT instances of the character TARGET between START and END.
 492
 493    If COUNT is positive, search forwards; END must be >= START.
 494    If COUNT is negative, search backwards for the -COUNTth instance;
 495       END must be <= START.
 496    If COUNT is zero, do anything you please; run rogue, for all I care.
 497
 498    If END is zero, use BEGV or ZV instead, as appropriate for the
 499    direction indicated by COUNT.
 500
 501    If we find COUNT instances, set *SHORTAGE to zero, and return the
 502    position after the COUNTth match.  Note that for reverse motion
 503    this is not the same as the usual convention for Emacs motion commands.
 504
 505    If we don't find COUNT instances before reaching END, set *SHORTAGE
 506    to the number of TARGETs left unfound, and return END.
 507
 508    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 509    except when inside redisplay.  */
 510
 511 int
 512 scan_buffer (target, start, end, count, shortage, allow_quit)
 513      register int target;
 514      int start, end;
 515      int count;
 516      int *shortage;
 517      int allow_quit;
 518 {
 519   struct region_cache *newline_cache;
 520   int direction;
 521
 522   if (count > 0)
 523     {
 524       direction = 1;
 525       if (! end) end = ZV;
 526     }
 527   else
 528     {
 529       direction = -1;
 530       if (! end) end = BEGV;
 531     }
 532
 533   newline_cache_on_off (current_buffer);
 534   newline_cache = current_buffer->newline_cache;
 535
 536   if (shortage != 0)
 537     *shortage = 0;
 538
 539   immediate_quit = allow_quit;
 540
 541   if (count > 0)
 542     while (start != end)
 543       {
 544         /* Our innermost scanning loop is very simple; it doesn't know
 545            about gaps, buffer ends, or the newline cache.  ceiling is
 546            the position of the last character before the next such
 547            obstacle --- the last character the dumb search loop should
 548            examine.  */
 549         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 550         int start_byte = CHAR_TO_BYTE (start);
 551         int tem;
 552
 553         /* If we're looking for a newline, consult the newline cache
 554            to see where we can avoid some scanning.  */
 555         if (target == '\n' && newline_cache)
 556           {
 557             int next_change;
 558             immediate_quit = 0;
 559             while (region_cache_forward
 560                    (current_buffer, newline_cache, start_byte, &next_change))
 561               start_byte = next_change;
 562             immediate_quit = allow_quit;
 563
 564             /* START should never be after END.  */
 565             if (start_byte > ceiling_byte)
 566               start_byte = ceiling_byte;
 567
 568             /* Now the text after start is an unknown region, and
 569                next_change is the position of the next known region. */
 570             ceiling_byte = min (next_change - 1, ceiling_byte);
 571           }
 572
 573         /* The dumb loop can only scan text stored in contiguous
 574            bytes. BUFFER_CEILING_OF returns the last character
 575            position that is contiguous, so the ceiling is the
 576            position after that.  */
 577         tem = BUFFER_CEILING_OF (start_byte);
 578         ceiling_byte = min (tem, ceiling_byte);
 579
 580         {
 581           /* The termination address of the dumb loop.  */
 582           register unsigned char *ceiling_addr
 583             = BYTE_POS_ADDR (ceiling_byte) + 1;
 584           register unsigned char *cursor
 585             = BYTE_POS_ADDR (start_byte);
 586           unsigned char *base = cursor;
 587
 588           while (cursor < ceiling_addr)
 589             {
 590               unsigned char *scan_start = cursor;
 591
 592               /* The dumb loop.  */
 593               while (*cursor != target && ++cursor < ceiling_addr)
 594                 ;
 595
 596               /* If we're looking for newlines, cache the fact that
 597                  the region from start to cursor is free of them. */
 598               if (target == '\n' && newline_cache)
 599                 know_region_cache (current_buffer, newline_cache,
 600                                    start_byte + scan_start - base,
 601                                    start_byte + cursor - base);
 602
 603               /* Did we find the target character?  */
 604               if (cursor < ceiling_addr)
 605                 {
 606                   if (--count == 0)
 607                     {
 608                       immediate_quit = 0;
 609                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 610                     }
 611                   cursor++;
 612                 }
 613             }
 614
 615           start = BYTE_TO_CHAR (start_byte + cursor - base);
 616         }
 617       }
 618   else
 619     while (start > end)
 620       {
 621         /* The last character to check before the next obstacle.  */
 622         int ceiling_byte = CHAR_TO_BYTE (end);
 623         int start_byte = CHAR_TO_BYTE (start);
 624         int tem;
 625
 626         /* Consult the newline cache, if appropriate.  */
 627         if (target == '\n' && newline_cache)
 628           {
 629             int next_change;
 630             immediate_quit = 0;
 631             while (region_cache_backward
 632                    (current_buffer, newline_cache, start_byte, &next_change))
 633               start_byte = next_change;
 634             immediate_quit = allow_quit;
 635
 636             /* Start should never be at or before end.  */
 637             if (start_byte <= ceiling_byte)
 638               start_byte = ceiling_byte + 1;
 639
 640             /* Now the text before start is an unknown region, and
 641                next_change is the position of the next known region. */
 642             ceiling_byte = max (next_change, ceiling_byte);
 643           }
 644
 645         /* Stop scanning before the gap.  */
 646         tem = BUFFER_FLOOR_OF (start_byte - 1);
 647         ceiling_byte = max (tem, ceiling_byte);
 648
 649         {
 650           /* The termination address of the dumb loop.  */
 651           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 652           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 653           unsigned char *base = cursor;
 654
 655           while (cursor >= ceiling_addr)
 656             {
 657               unsigned char *scan_start = cursor;
 658
 659               while (*cursor != target && --cursor >= ceiling_addr)
 660                 ;
 661
 662               /* If we're looking for newlines, cache the fact that
 663                  the region from after the cursor to start is free of them.  */
 664               if (target == '\n' && newline_cache)
 665                 know_region_cache (current_buffer, newline_cache,
 666                                    start_byte + cursor - base,
 667                                    start_byte + scan_start - base);
 668
 669               /* Did we find the target character?  */
 670               if (cursor >= ceiling_addr)
 671                 {
 672                   if (++count >= 0)
 673                     {
 674                       immediate_quit = 0;
 675                       return BYTE_TO_CHAR (start_byte + cursor - base);
 676                     }
 677                   cursor--;
 678                 }
 679             }
 680
 681           start = BYTE_TO_CHAR (start_byte + cursor - base);
 682         }
 683       }
 684
 685   immediate_quit = 0;
 686   if (shortage != 0)
 687     *shortage = count * direction;
 688   return start;
 689 }
 690 \f
 691 /* Search for COUNT instances of a line boundary, which means either a
 692    newline or (if selective display enabled) a carriage return.
 693    Start at START.  If COUNT is negative, search backwards.
 694
 695    We report the resulting position by calling TEMP_SET_PT_BOTH.
 696
 697    If we find COUNT instances. we position after (always after,
 698    even if scanning backwards) the COUNTth match, and return 0.
 699
 700    If we don't find COUNT instances before reaching the end of the
 701    buffer (or the beginning, if scanning backwards), we return
 702    the number of line boundaries left unfound, and position at
 703    the limit we bumped up against.
 704
 705    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 706    except in special cases.  */
 707
 708 int
 709 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 710      int start, start_byte;
 711      int limit, limit_byte;
 712      register int count;
 713      int allow_quit;
 714 {
 715   int direction = ((count > 0) ? 1 : -1);
 716
 717   register unsigned char *cursor;
 718   unsigned char *base;
 719
 720   register int ceiling;
 721   register unsigned char *ceiling_addr;
 722
 723   int old_immediate_quit = immediate_quit;
 724
 725   /* The code that follows is like scan_buffer
 726      but checks for either newline or carriage return.  */
 727
 728   if (allow_quit)
 729     immediate_quit++;
 730
 731   start_byte = CHAR_TO_BYTE (start);
 732
 733   if (count > 0)
 734     {
 735       while (start_byte < limit_byte)
 736         {
 737           ceiling =  BUFFER_CEILING_OF (start_byte);
 738           ceiling = min (limit_byte - 1, ceiling);
 739           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 740           base = (cursor = BYTE_POS_ADDR (start_byte));
 741           while (1)
 742             {
 743               while (*cursor != '\n' && ++cursor != ceiling_addr)
 744                 ;
 745
 746               if (cursor != ceiling_addr)
 747                 {
 748                   if (--count == 0)
 749                     {
 750                       immediate_quit = old_immediate_quit;
 751                       start_byte = start_byte + cursor - base + 1;
 752                       start = BYTE_TO_CHAR (start_byte);
 753                       TEMP_SET_PT_BOTH (start, start_byte);
 754                       return 0;
 755                     }
 756                   else
 757                     if (++cursor == ceiling_addr)
 758                       break;
 759                 }
 760               else
 761                 break;
 762             }
 763           start_byte += cursor - base;
 764         }
 765     }
 766   else
 767     {
 768       while (start_byte > limit_byte)
 769         {
 770           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 771           ceiling = max (limit_byte, ceiling);
 772           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 773           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 774           while (1)
 775             {
 776               while (--cursor != ceiling_addr && *cursor != '\n')
 777                 ;
 778
 779               if (cursor != ceiling_addr)
 780                 {
 781                   if (++count == 0)
 782                     {
 783                       immediate_quit = old_immediate_quit;
 784                       /* Return the position AFTER the match we found.  */
 785                       start_byte = start_byte + cursor - base + 1;
 786                       start = BYTE_TO_CHAR (start_byte);
 787                       TEMP_SET_PT_BOTH (start, start_byte);
 788                       return 0;
 789                     }
 790                 }
 791               else
 792                 break;
 793             }
 794           /* Here we add 1 to compensate for the last decrement
 795              of CURSOR, which took it past the valid range.  */
 796           start_byte += cursor - base + 1;
 797         }
 798     }
 799
 800   TEMP_SET_PT_BOTH (limit, limit_byte);
 801   immediate_quit = old_immediate_quit;
 802
 803   return count * direction;
 804 }
 805
 806 int
 807 find_next_newline_no_quit (from, cnt)
 808      register int from, cnt;
 809 {
 810   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 811 }
 812
 813 /* Like find_next_newline, but returns position before the newline,
 814    not after, and only search up to TO.  This isn't just
 815    find_next_newline (...)-1, because you might hit TO.  */
 816
 817 int
 818 find_before_next_newline (from, to, cnt)
 819      int from, to, cnt;
 820 {
 821   int shortage;
 822   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 823
 824   if (shortage == 0)
 825     pos--;
 826
 827   return pos;
 828 }
 829 \f
 830 /* Subroutines of Lisp buffer search functions. */
 831
 832 static Lisp_Object
 833 search_command (string, bound, noerror, count, direction, RE, posix)
 834      Lisp_Object string, bound, noerror, count;
 835      int direction;
 836      int RE;
 837      int posix;
 838 {
 839   register int np;
 840   int lim, lim_byte;
 841   int n = direction;
 842
 843   if (!NILP (count))
 844     {
 845       CHECK_NUMBER (count);
 846       n *= XINT (count);
 847     }
 848
 849   CHECK_STRING (string);
 850   if (NILP (bound))
 851     {
 852       if (n > 0)
 853         lim = ZV, lim_byte = ZV_BYTE;
 854       else
 855         lim = BEGV, lim_byte = BEGV_BYTE;
 856     }
 857   else
 858     {
 859       CHECK_NUMBER_COERCE_MARKER (bound);
 860       lim = XINT (bound);
 861       if (n > 0 ? lim < PT : lim > PT)
 862         error ("Invalid search bound (wrong side of point)");
 863       if (lim > ZV)
 864         lim = ZV, lim_byte = ZV_BYTE;
 865       else if (lim < BEGV)
 866         lim = BEGV, lim_byte = BEGV_BYTE;
 867       else
 868         lim_byte = CHAR_TO_BYTE (lim);
 869     }
 870
 871   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 872                       (!NILP (current_buffer->case_fold_search)
 873                        ? current_buffer->case_canon_table
 874                        : Qnil),
 875                       (!NILP (current_buffer->case_fold_search)
 876                        ? current_buffer->case_eqv_table
 877                        : Qnil),
 878                       posix);
 879   if (np <= 0)
 880     {
 881       if (NILP (noerror))
 882         return signal_failure (string);
 883       if (!EQ (noerror, Qt))
 884         {
 885           if (lim < BEGV || lim > ZV)
 886             abort ();
 887           SET_PT_BOTH (lim, lim_byte);
 888           return Qnil;
 889 #if 0 /* This would be clean, but maybe programs depend on
 890          a value of nil here.  */
 891           np = lim;
 892 #endif
 893         }
 894       else
 895         return Qnil;
 896     }
 897
 898   if (np < BEGV || np > ZV)
 899     abort ();
 900
 901   SET_PT (np);
 902
 903   return make_number (np);
 904 }
 905 \f
 906 /* Return 1 if REGEXP it matches just one constant string.  */
 907
 908 static int
 909 trivial_regexp_p (regexp)
 910      Lisp_Object regexp;
 911 {
 912   int len = SBYTES (regexp);
 913   unsigned char *s = SDATA (regexp);
 914   while (--len >= 0)
 915     {
 916       switch (*s++)
 917         {
 918         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 919           return 0;
 920         case '\\':
 921           if (--len < 0)
 922             return 0;
 923           switch (*s++)
 924             {
 925             case '|': case '(': case ')': case '`': case '\'': case 'b':
 926             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 927             case 'S': case '=': case '{': case '}':
 928             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 929             case '1': case '2': case '3': case '4': case '5':
 930             case '6': case '7': case '8': case '9':
 931               return 0;
 932             }
 933         }
 934     }
 935   return 1;
 936 }
 937
 938 /* Search for the n'th occurrence of STRING in the current buffer,
 939    starting at position POS and stopping at position LIM,
 940    treating STRING as a literal string if RE is false or as
 941    a regular expression if RE is true.
 942
 943    If N is positive, searching is forward and LIM must be greater than POS.
 944    If N is negative, searching is backward and LIM must be less than POS.
 945
 946    Returns -x if x occurrences remain to be found (x > 0),
 947    or else the position at the beginning of the Nth occurrence
 948    (if searching backward) or the end (if searching forward).
 949
 950    POSIX is nonzero if we want full backtracking (POSIX style)
 951    for this pattern.  0 means backtrack only enough to get a valid match.  */
 952
 953 #define TRANSLATE(out, trt, d)                  \
 954 do                                              \
 955   {                                             \
 956     if (! NILP (trt))                           \
 957       {                                         \
 958         Lisp_Object temp;                       \
 959         temp = Faref (trt, make_number (d));    \
 960         if (INTEGERP (temp))                    \
 961           out = XINT (temp);                    \
 962         else                                    \
 963           out = d;                              \
 964       }                                         \
 965     else                                        \
 966       out = d;                                  \
 967   }                                             \
 968 while (0)
 969
 970 static int
 971 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
 972                RE, trt, inverse_trt, posix)
 973      Lisp_Object string;
 974      int pos;
 975      int pos_byte;
 976      int lim;
 977      int lim_byte;
 978      int n;
 979      int RE;
 980      Lisp_Object trt;
 981      Lisp_Object inverse_trt;
 982      int posix;
 983 {
 984   int len = SCHARS (string);
 985   int len_byte = SBYTES (string);
 986   register int i;
 987
 988   if (running_asynch_code)
 989     save_search_regs ();
 990
 991   /* Searching 0 times means don't move.  */
 992   /* Null string is found at starting position.  */
 993   if (len == 0 || n == 0)
 994     {
 995       set_search_regs (pos_byte, 0);
 996       return pos;
 997     }
 998
 999   if (RE && !trivial_regexp_p (string))
1000     {
1001       unsigned char *p1, *p2;
1002       int s1, s2;
1003       struct re_pattern_buffer *bufp;
1004
1005       bufp = compile_pattern (string, &search_regs, trt, posix,
1006                               !NILP (current_buffer->enable_multibyte_characters));
1007
1008       immediate_quit = 1;       /* Quit immediately if user types ^G,
1009                                    because letting this function finish
1010                                    can take too long. */
1011       QUIT;                     /* Do a pending quit right away,
1012                                    to avoid paradoxical behavior */
1013       /* Get pointers and sizes of the two strings
1014          that make up the visible portion of the buffer. */
1015
1016       p1 = BEGV_ADDR;
1017       s1 = GPT_BYTE - BEGV_BYTE;
1018       p2 = GAP_END_ADDR;
1019       s2 = ZV_BYTE - GPT_BYTE;
1020       if (s1 < 0)
1021         {
1022           p2 = p1;
1023           s2 = ZV_BYTE - BEGV_BYTE;
1024           s1 = 0;
1025         }
1026       if (s2 < 0)
1027         {
1028           s1 = ZV_BYTE - BEGV_BYTE;
1029           s2 = 0;
1030         }
1031       re_match_object = Qnil;
1032
1033       while (n < 0)
1034         {
1035           int val;
1036           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1037                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1038                              &search_regs,
1039                              /* Don't allow match past current point */
1040                              pos_byte - BEGV_BYTE);
1041           if (val == -2)
1042             {
1043               matcher_overflow ();
1044             }
1045           if (val >= 0)
1046             {
1047               pos_byte = search_regs.start[0] + BEGV_BYTE;
1048               for (i = 0; i < search_regs.num_regs; i++)
1049                 if (search_regs.start[i] >= 0)
1050                   {
1051                     search_regs.start[i]
1052                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1053                     search_regs.end[i]
1054                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1055                   }
1056               XSETBUFFER (last_thing_searched, current_buffer);
1057               /* Set pos to the new position. */
1058               pos = search_regs.start[0];
1059             }
1060           else
1061             {
1062               immediate_quit = 0;
1063               return (n);
1064             }
1065           n++;
1066         }
1067       while (n > 0)
1068         {
1069           int val;
1070           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1071                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1072                              &search_regs,
1073                              lim_byte - BEGV_BYTE);
1074           if (val == -2)
1075             {
1076               matcher_overflow ();
1077             }
1078           if (val >= 0)
1079             {
1080               pos_byte = search_regs.end[0] + BEGV_BYTE;
1081               for (i = 0; i < search_regs.num_regs; i++)
1082                 if (search_regs.start[i] >= 0)
1083                   {
1084                     search_regs.start[i]
1085                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1086                     search_regs.end[i]
1087                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1088                   }
1089               XSETBUFFER (last_thing_searched, current_buffer);
1090               pos = search_regs.end[0];
1091             }
1092           else
1093             {
1094               immediate_quit = 0;
1095               return (0 - n);
1096             }
1097           n--;
1098         }
1099       immediate_quit = 0;
1100       return (pos);
1101     }
1102   else                          /* non-RE case */
1103     {
1104       unsigned char *raw_pattern, *pat;
1105       int raw_pattern_size;
1106       int raw_pattern_size_byte;
1107       unsigned char *patbuf;
1108       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1109       unsigned char *base_pat = SDATA (string);
1110       /* High bits of char; 0 for ASCII characters, (CHAR & ~0x3F)
1111          otherwise.  Characters of the same high bits have the same
1112          sequence of bytes but last.  To do the BM search, all
1113          characters in STRING must have the same high bits (including
1114          their case translations).  */
1115       int char_high_bits = -1;
1116       int boyer_moore_ok = 1;
1117
1118       /* MULTIBYTE says whether the text to be searched is multibyte.
1119          We must convert PATTERN to match that, or we will not really
1120          find things right.  */
1121
1122       if (multibyte == STRING_MULTIBYTE (string))
1123         {
1124           raw_pattern = (unsigned char *) SDATA (string);
1125           raw_pattern_size = SCHARS (string);
1126           raw_pattern_size_byte = SBYTES (string);
1127         }
1128       else if (multibyte)
1129         {
1130           raw_pattern_size = SCHARS (string);
1131           raw_pattern_size_byte
1132             = count_size_as_multibyte (SDATA (string),
1133                                        raw_pattern_size);
1134           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1135           copy_text (SDATA (string), raw_pattern,
1136                      SCHARS (string), 0, 1);
1137         }
1138       else
1139         {
1140           /* Converting multibyte to single-byte.
1141
1142              ??? Perhaps this conversion should be done in a special way
1143              by subtracting nonascii-insert-offset from each non-ASCII char,
1144              so that only the multibyte chars which really correspond to
1145              the chosen single-byte character set can possibly match.  */
1146           raw_pattern_size = SCHARS (string);
1147           raw_pattern_size_byte = SCHARS (string);
1148           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1149           copy_text (SDATA (string), raw_pattern,
1150                      SBYTES (string), 1, 0);
1151         }
1152
1153       /* Copy and optionally translate the pattern.  */
1154       len = raw_pattern_size;
1155       len_byte = raw_pattern_size_byte;
1156       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1157       pat = patbuf;
1158       base_pat = raw_pattern;
1159       if (multibyte)
1160         {
1161           while (--len >= 0)
1162             {
1163               int c, translated, inverse;
1164               int in_charlen;
1165
1166               /* If we got here and the RE flag is set, it's because we're
1167                  dealing with a regexp known to be trivial, so the backslash
1168                  just quotes the next character.  */
1169               if (RE && *base_pat == '\\')
1170                 {
1171                   len--;
1172                   len_byte--;
1173                   base_pat++;
1174                 }
1175
1176               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1177
1178               /* Translate the character, if requested.  */
1179               TRANSLATE (translated, trt, c);
1180               TRANSLATE (inverse, inverse_trt, c);
1181
1182               /* Did this char actually get translated?
1183                  Would any other char get translated into it?  */
1184               if (translated != c || inverse != c)
1185                 {
1186                   /* Keep track of which character set row
1187                      contains the characters that need translation.  */
1188                   int this_high_bit = ASCII_CHAR_P (c) ? 0 : (c & ~0x3F);
1189                   int c1 = inverse != c ? inverse : translated;
1190                   int trt_high_bit = ASCII_CHAR_P (c1) ? 0 : (c1 & ~0x3F);
1191
1192                   if (this_high_bit != trt_high_bit)
1193                     boyer_moore_ok = 0;
1194                   else if (char_high_bits == -1)
1195                     char_high_bits = this_high_bit;
1196                   else if (char_high_bits != this_high_bit)
1197                     /* If two different rows appear, needing translation,
1198                        then we cannot use boyer_moore search.  */
1199                     boyer_moore_ok = 0;
1200                 }
1201
1202               /* Store this character into the translated pattern.  */
1203               CHAR_STRING_ADVANCE (translated, pat);
1204               base_pat += in_charlen;
1205               len_byte -= in_charlen;
1206             }
1207         }
1208       else
1209         {
1210           /* Unibyte buffer.  */
1211           char_high_bits = 0;
1212           while (--len >= 0)
1213             {
1214               int c, translated;
1215
1216               /* If we got here and the RE flag is set, it's because we're
1217                  dealing with a regexp known to be trivial, so the backslash
1218                  just quotes the next character.  */
1219               if (RE && *base_pat == '\\')
1220                 {
1221                   len--;
1222                   base_pat++;
1223                 }
1224               c = *base_pat++;
1225               TRANSLATE (translated, trt, c);
1226               *pat++ = translated;
1227             }
1228         }
1229
1230       len_byte = pat - patbuf;
1231       len = raw_pattern_size;
1232       pat = base_pat = patbuf;
1233
1234       if (boyer_moore_ok)
1235         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1236                             pos, pos_byte, lim, lim_byte,
1237                             char_high_bits);
1238       else
1239         return simple_search (n, pat, len, len_byte, trt,
1240                               pos, pos_byte, lim, lim_byte);
1241     }
1242 }
1243 \f
1244 /* Do a simple string search N times for the string PAT,
1245    whose length is LEN/LEN_BYTE,
1246    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1247    TRT is the translation table.
1248
1249    Return the character position where the match is found.
1250    Otherwise, if M matches remained to be found, return -M.
1251
1252    This kind of search works regardless of what is in PAT and
1253    regardless of what is in TRT.  It is used in cases where
1254    boyer_moore cannot work.  */
1255
1256 static int
1257 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1258      int n;
1259      unsigned char *pat;
1260      int len, len_byte;
1261      Lisp_Object trt;
1262      int pos, pos_byte;
1263      int lim, lim_byte;
1264 {
1265   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1266   int forward = n > 0;
1267
1268   if (lim > pos && multibyte)
1269     while (n > 0)
1270       {
1271         while (1)
1272           {
1273             /* Try matching at position POS.  */
1274             int this_pos = pos;
1275             int this_pos_byte = pos_byte;
1276             int this_len = len;
1277             int this_len_byte = len_byte;
1278             unsigned char *p = pat;
1279             if (pos + len > lim)
1280               goto stop;
1281
1282             while (this_len > 0)
1283               {
1284                 int charlen, buf_charlen;
1285                 int pat_ch, buf_ch;
1286
1287                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1288                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1289                                                  ZV_BYTE - this_pos_byte,
1290                                                  buf_charlen);
1291                 TRANSLATE (buf_ch, trt, buf_ch);
1292
1293                 if (buf_ch != pat_ch)
1294                   break;
1295
1296                 this_len_byte -= charlen;
1297                 this_len--;
1298                 p += charlen;
1299
1300                 this_pos_byte += buf_charlen;
1301                 this_pos++;
1302               }
1303
1304             if (this_len == 0)
1305               {
1306                 pos += len;
1307                 pos_byte += len_byte;
1308                 break;
1309               }
1310
1311             INC_BOTH (pos, pos_byte);
1312           }
1313
1314         n--;
1315       }
1316   else if (lim > pos)
1317     while (n > 0)
1318       {
1319         while (1)
1320           {
1321             /* Try matching at position POS.  */
1322             int this_pos = pos;
1323             int this_len = len;
1324             unsigned char *p = pat;
1325
1326             if (pos + len > lim)
1327               goto stop;
1328
1329             while (this_len > 0)
1330               {
1331                 int pat_ch = *p++;
1332                 int buf_ch = FETCH_BYTE (this_pos);
1333                 TRANSLATE (buf_ch, trt, buf_ch);
1334
1335                 if (buf_ch != pat_ch)
1336                   break;
1337
1338                 this_len--;
1339                 this_pos++;
1340               }
1341
1342             if (this_len == 0)
1343               {
1344                 pos += len;
1345                 break;
1346               }
1347
1348             pos++;
1349           }
1350
1351         n--;
1352       }
1353   /* Backwards search.  */
1354   else if (lim < pos && multibyte)
1355     while (n < 0)
1356       {
1357         while (1)
1358           {
1359             /* Try matching at position POS.  */
1360             int this_pos = pos - len;
1361             int this_pos_byte = pos_byte - len_byte;
1362             int this_len = len;
1363             int this_len_byte = len_byte;
1364             unsigned char *p = pat;
1365
1366             if (pos - len < lim)
1367               goto stop;
1368
1369             while (this_len > 0)
1370               {
1371                 int charlen, buf_charlen;
1372                 int pat_ch, buf_ch;
1373
1374                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1375                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1376                                                  ZV_BYTE - this_pos_byte,
1377                                                  buf_charlen);
1378                 TRANSLATE (buf_ch, trt, buf_ch);
1379
1380                 if (buf_ch != pat_ch)
1381                   break;
1382
1383                 this_len_byte -= charlen;
1384                 this_len--;
1385                 p += charlen;
1386                 this_pos_byte += buf_charlen;
1387                 this_pos++;
1388               }
1389
1390             if (this_len == 0)
1391               {
1392                 pos -= len;
1393                 pos_byte -= len_byte;
1394                 break;
1395               }
1396
1397             DEC_BOTH (pos, pos_byte);
1398           }
1399
1400         n++;
1401       }
1402   else if (lim < pos)
1403     while (n < 0)
1404       {
1405         while (1)
1406           {
1407             /* Try matching at position POS.  */
1408             int this_pos = pos - len;
1409             int this_len = len;
1410             unsigned char *p = pat;
1411
1412             if (pos - len < lim)
1413               goto stop;
1414
1415             while (this_len > 0)
1416               {
1417                 int pat_ch = *p++;
1418                 int buf_ch = FETCH_BYTE (this_pos);
1419                 TRANSLATE (buf_ch, trt, buf_ch);
1420
1421                 if (buf_ch != pat_ch)
1422                   break;
1423                 this_len--;
1424                 this_pos++;
1425               }
1426
1427             if (this_len == 0)
1428               {
1429                 pos -= len;
1430                 break;
1431               }
1432
1433             pos--;
1434           }
1435
1436         n++;
1437       }
1438
1439  stop:
1440   if (n == 0)
1441     {
1442       if (forward)
1443         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1444       else
1445         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1446
1447       return pos;
1448     }
1449   else if (n > 0)
1450     return -n;
1451   else
1452     return n;
1453 }
1454 \f
1455 /* Do Boyer-Moore search N times for the string PAT,
1456    whose length is LEN/LEN_BYTE,
1457    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1458    DIRECTION says which direction we search in.
1459    TRT and INVERSE_TRT are translation tables.
1460
1461    This kind of search works if all the characters in PAT that have
1462    nontrivial translation are the same aside from the last byte.  This
1463    makes it possible to translate just the last byte of a character,
1464    and do so after just a simple test of the context.
1465
1466    If that criterion is not satisfied, do not call this function.  */
1467
1468 static int
1469 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1470              pos, pos_byte, lim, lim_byte, char_high_bits)
1471      int n;
1472      unsigned char *base_pat;
1473      int len, len_byte;
1474      Lisp_Object trt;
1475      Lisp_Object inverse_trt;
1476      int pos, pos_byte;
1477      int lim, lim_byte;
1478      int char_high_bits;
1479 {
1480   int direction = ((n > 0) ? 1 : -1);
1481   register int dirlen;
1482   int infinity, limit, stride_for_teases = 0;
1483   register int *BM_tab;
1484   int *BM_tab_base;
1485   register unsigned char *cursor, *p_limit;
1486   register int i, j;
1487   unsigned char *pat, *pat_end;
1488   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1489
1490   unsigned char simple_translate[0400];
1491   int translate_prev_byte = 0;
1492   int translate_anteprev_byte = 0;
1493
1494 #ifdef C_ALLOCA
1495   int BM_tab_space[0400];
1496   BM_tab = &BM_tab_space[0];
1497 #else
1498   BM_tab = (int *) alloca (0400 * sizeof (int));
1499 #endif
1500   /* The general approach is that we are going to maintain that we know */
1501   /* the first (closest to the present position, in whatever direction */
1502   /* we're searching) character that could possibly be the last */
1503   /* (furthest from present position) character of a valid match.  We */
1504   /* advance the state of our knowledge by looking at that character */
1505   /* and seeing whether it indeed matches the last character of the */
1506   /* pattern.  If it does, we take a closer look.  If it does not, we */
1507   /* move our pointer (to putative last characters) as far as is */
1508   /* logically possible.  This amount of movement, which I call a */
1509   /* stride, will be the length of the pattern if the actual character */
1510   /* appears nowhere in the pattern, otherwise it will be the distance */
1511   /* from the last occurrence of that character to the end of the */
1512   /* pattern. */
1513   /* As a coding trick, an enormous stride is coded into the table for */
1514   /* characters that match the last character.  This allows use of only */
1515   /* a single test, a test for having gone past the end of the */
1516   /* permissible match region, to test for both possible matches (when */
1517   /* the stride goes past the end immediately) and failure to */
1518   /* match (where you get nudged past the end one stride at a time). */
1519
1520   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1521   /* is determined only by the last character of the putative match. */
1522   /* If that character does not match, we will stride the proper */
1523   /* distance to propose a match that superimposes it on the last */
1524   /* instance of a character that matches it (per trt), or misses */
1525   /* it entirely if there is none. */
1526
1527   dirlen = len_byte * direction;
1528   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1529
1530   /* Record position after the end of the pattern.  */
1531   pat_end = base_pat + len_byte;
1532   /* BASE_PAT points to a character that we start scanning from.
1533      It is the first character in a forward search,
1534      the last character in a backward search.  */
1535   if (direction < 0)
1536     base_pat = pat_end - 1;
1537
1538   BM_tab_base = BM_tab;
1539   BM_tab += 0400;
1540   j = dirlen;           /* to get it in a register */
1541   /* A character that does not appear in the pattern induces a */
1542   /* stride equal to the pattern length. */
1543   while (BM_tab_base != BM_tab)
1544     {
1545       *--BM_tab = j;
1546       *--BM_tab = j;
1547       *--BM_tab = j;
1548       *--BM_tab = j;
1549     }
1550
1551   /* We use this for translation, instead of TRT itself.
1552      We fill this in to handle the characters that actually
1553      occur in the pattern.  Others don't matter anyway!  */
1554   bzero (simple_translate, sizeof simple_translate);
1555   for (i = 0; i < 0400; i++)
1556     simple_translate[i] = i;
1557
1558   i = 0;
1559   while (i != infinity)
1560     {
1561       unsigned char *ptr = base_pat + i;
1562       i += direction;
1563       if (i == dirlen)
1564         i = infinity;
1565       if (! NILP (trt))
1566         {
1567           int ch;
1568           int untranslated;
1569           int this_translated = 1;
1570
1571           if (multibyte
1572               /* Is *PTR the last byte of a character?  */
1573               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1574             {
1575               unsigned char *charstart = ptr;
1576               while (! CHAR_HEAD_P (*charstart))
1577                 charstart--;
1578               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1579               if (char_high_bits
1580                   == (ASCII_CHAR_P (untranslated) ? 0 : untranslated & ~0x3F))
1581                 {
1582                   TRANSLATE (ch, trt, untranslated);
1583                   if (! CHAR_HEAD_P (*ptr))
1584                     {
1585                       translate_prev_byte = ptr[-1];
1586                       if (! CHAR_HEAD_P (translate_prev_byte))
1587                         translate_anteprev_byte = ptr[-2];
1588                     }
1589                 }
1590               else
1591                 {
1592                   this_translated = 0;
1593                   ch = *ptr;
1594                 }
1595             }
1596           else if (!multibyte)
1597             TRANSLATE (ch, trt, *ptr);
1598           else
1599             {
1600               ch = *ptr;
1601               this_translated = 0;
1602             }
1603
1604           if (this_translated
1605               && ch >= 0200)
1606             j = (ch & 0x3F) | 0200;
1607           else
1608             j = (unsigned char) ch;
1609
1610           if (i == infinity)
1611             stride_for_teases = BM_tab[j];
1612
1613           BM_tab[j] = dirlen - i;
1614           /* A translation table is accompanied by its inverse -- see */
1615           /* comment following downcase_table for details */
1616           if (this_translated)
1617             {
1618               int starting_ch = ch;
1619               int starting_j = j;
1620               while (1)
1621                 {
1622                   TRANSLATE (ch, inverse_trt, ch);
1623                   if (ch > 0200)
1624                     j = (ch & 0x3F) | 0200;
1625                   else
1626                     j = (unsigned char) ch;
1627
1628                   /* For all the characters that map into CH,
1629                      set up simple_translate to map the last byte
1630                      into STARTING_J.  */
1631                   simple_translate[j] = starting_j;
1632                   if (ch == starting_ch)
1633                     break;
1634                   BM_tab[j] = dirlen - i;
1635                 }
1636             }
1637         }
1638       else
1639         {
1640           j = *ptr;
1641
1642           if (i == infinity)
1643             stride_for_teases = BM_tab[j];
1644           BM_tab[j] = dirlen - i;
1645         }
1646       /* stride_for_teases tells how much to stride if we get a */
1647       /* match on the far character but are subsequently */
1648       /* disappointed, by recording what the stride would have been */
1649       /* for that character if the last character had been */
1650       /* different. */
1651     }
1652   infinity = dirlen - infinity;
1653   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1654   /* loop invariant - POS_BYTE points at where last char (first
1655      char if reverse) of pattern would align in a possible match.  */
1656   while (n != 0)
1657     {
1658       int tail_end;
1659       unsigned char *tail_end_ptr;
1660
1661       /* It's been reported that some (broken) compiler thinks that
1662          Boolean expressions in an arithmetic context are unsigned.
1663          Using an explicit ?1:0 prevents this.  */
1664       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1665           < 0)
1666         return (n * (0 - direction));
1667       /* First we do the part we can by pointers (maybe nothing) */
1668       QUIT;
1669       pat = base_pat;
1670       limit = pos_byte - dirlen + direction;
1671       if (direction > 0)
1672         {
1673           limit = BUFFER_CEILING_OF (limit);
1674           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1675              can take on without hitting edge of buffer or the gap.  */
1676           limit = min (limit, pos_byte + 20000);
1677           limit = min (limit, lim_byte - 1);
1678         }
1679       else
1680         {
1681           limit = BUFFER_FLOOR_OF (limit);
1682           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1683              can take on without hitting edge of buffer or the gap.  */
1684           limit = max (limit, pos_byte - 20000);
1685           limit = max (limit, lim_byte);
1686         }
1687       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1688       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1689
1690       if ((limit - pos_byte) * direction > 20)
1691         {
1692           unsigned char *p2;
1693
1694           p_limit = BYTE_POS_ADDR (limit);
1695           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1696           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1697           while (1)             /* use one cursor setting as long as i can */
1698             {
1699               if (direction > 0) /* worth duplicating */
1700                 {
1701                   /* Use signed comparison if appropriate
1702                      to make cursor+infinity sure to be > p_limit.
1703                      Assuming that the buffer lies in a range of addresses
1704                      that are all "positive" (as ints) or all "negative",
1705                      either kind of comparison will work as long
1706                      as we don't step by infinity.  So pick the kind
1707                      that works when we do step by infinity.  */
1708                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1709                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1710                       cursor += BM_tab[*cursor];
1711                   else
1712                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1713                       cursor += BM_tab[*cursor];
1714                 }
1715               else
1716                 {
1717                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1718                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1719                       cursor += BM_tab[*cursor];
1720                   else
1721                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1722                       cursor += BM_tab[*cursor];
1723                 }
1724 /* If you are here, cursor is beyond the end of the searched region. */
1725 /* This can happen if you match on the far character of the pattern, */
1726 /* because the "stride" of that character is infinity, a number able */
1727 /* to throw you well beyond the end of the search.  It can also */
1728 /* happen if you fail to match within the permitted region and would */
1729 /* otherwise try a character beyond that region */
1730               if ((cursor - p_limit) * direction <= len_byte)
1731                 break;  /* a small overrun is genuine */
1732               cursor -= infinity; /* large overrun = hit */
1733               i = dirlen - direction;
1734               if (! NILP (trt))
1735                 {
1736                   while ((i -= direction) + direction != 0)
1737                     {
1738                       int ch;
1739                       cursor -= direction;
1740                       /* Translate only the last byte of a character.  */
1741                       if (! multibyte
1742                           || ((cursor == tail_end_ptr
1743                                || CHAR_HEAD_P (cursor[1]))
1744                               && (CHAR_HEAD_P (cursor[0])
1745                                   || (translate_prev_byte == cursor[-1]
1746                                       && (CHAR_HEAD_P (translate_prev_byte)
1747                                           || translate_anteprev_byte == cursor[-2])))))
1748                         ch = simple_translate[*cursor];
1749                       else
1750                         ch = *cursor;
1751                       if (pat[i] != ch)
1752                         break;
1753                     }
1754                 }
1755               else
1756                 {
1757                   while ((i -= direction) + direction != 0)
1758                     {
1759                       cursor -= direction;
1760                       if (pat[i] != *cursor)
1761                         break;
1762                     }
1763                 }
1764               cursor += dirlen - i - direction; /* fix cursor */
1765               if (i + direction == 0)
1766                 {
1767                   int position;
1768
1769                   cursor -= direction;
1770
1771                   position = pos_byte + cursor - p2 + ((direction > 0)
1772                                                        ? 1 - len_byte : 0);
1773                   set_search_regs (position, len_byte);
1774
1775                   if ((n -= direction) != 0)
1776                     cursor += dirlen; /* to resume search */
1777                   else
1778                     return ((direction > 0)
1779                             ? search_regs.end[0] : search_regs.start[0]);
1780                 }
1781               else
1782                 cursor += stride_for_teases; /* <sigh> we lose -  */
1783             }
1784           pos_byte += cursor - p2;
1785         }
1786       else
1787         /* Now we'll pick up a clump that has to be done the hard */
1788         /* way because it covers a discontinuity */
1789         {
1790           limit = ((direction > 0)
1791                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1792                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1793           limit = ((direction > 0)
1794                    ? min (limit + len_byte, lim_byte - 1)
1795                    : max (limit - len_byte, lim_byte));
1796           /* LIMIT is now the last value POS_BYTE can have
1797              and still be valid for a possible match.  */
1798           while (1)
1799             {
1800               /* This loop can be coded for space rather than */
1801               /* speed because it will usually run only once. */
1802               /* (the reach is at most len + 21, and typically */
1803               /* does not exceed len) */
1804               while ((limit - pos_byte) * direction >= 0)
1805                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1806               /* now run the same tests to distinguish going off the */
1807               /* end, a match or a phony match. */
1808               if ((pos_byte - limit) * direction <= len_byte)
1809                 break;  /* ran off the end */
1810               /* Found what might be a match.
1811                  Set POS_BYTE back to last (first if reverse) pos.  */
1812               pos_byte -= infinity;
1813               i = dirlen - direction;
1814               while ((i -= direction) + direction != 0)
1815                 {
1816                   int ch;
1817                   unsigned char *ptr;
1818                   pos_byte -= direction;
1819                   ptr = BYTE_POS_ADDR (pos_byte);
1820                   /* Translate only the last byte of a character.  */
1821                   if (! multibyte
1822                       || ((ptr == tail_end_ptr
1823                            || CHAR_HEAD_P (ptr[1]))
1824                           && (CHAR_HEAD_P (ptr[0])
1825                               || (translate_prev_byte == ptr[-1]
1826                                   && (CHAR_HEAD_P (translate_prev_byte)
1827                                       || translate_anteprev_byte == ptr[-2])))))
1828                     ch = simple_translate[*ptr];
1829                   else
1830                     ch = *ptr;
1831                   if (pat[i] != ch)
1832                     break;
1833                 }
1834               /* Above loop has moved POS_BYTE part or all the way
1835                  back to the first pos (last pos if reverse).
1836                  Set it once again at the last (first if reverse) char.  */
1837               pos_byte += dirlen - i- direction;
1838               if (i + direction == 0)
1839                 {
1840                   int position;
1841                   pos_byte -= direction;
1842
1843                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1844
1845                   set_search_regs (position, len_byte);
1846
1847                   if ((n -= direction) != 0)
1848                     pos_byte += dirlen; /* to resume search */
1849                   else
1850                     return ((direction > 0)
1851                             ? search_regs.end[0] : search_regs.start[0]);
1852                 }
1853               else
1854                 pos_byte += stride_for_teases;
1855             }
1856           }
1857       /* We have done one clump.  Can we continue? */
1858       if ((lim_byte - pos_byte) * direction < 0)
1859         return ((0 - n) * direction);
1860     }
1861   return BYTE_TO_CHAR (pos_byte);
1862 }
1863
1864 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1865    for the overall match just found in the current buffer.
1866    Also clear out the match data for registers 1 and up.  */
1867
1868 static void
1869 set_search_regs (beg_byte, nbytes)
1870      int beg_byte, nbytes;
1871 {
1872   int i;
1873
1874   /* Make sure we have registers in which to store
1875      the match position.  */
1876   if (search_regs.num_regs == 0)
1877     {
1878       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1879       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1880       search_regs.num_regs = 2;
1881     }
1882
1883   /* Clear out the other registers.  */
1884   for (i = 1; i < search_regs.num_regs; i++)
1885     {
1886       search_regs.start[i] = -1;
1887       search_regs.end[i] = -1;
1888     }
1889
1890   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1891   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1892   XSETBUFFER (last_thing_searched, current_buffer);
1893 }
1894 \f
1895 /* Given a string of words separated by word delimiters,
1896   compute a regexp that matches those exact words
1897   separated by arbitrary punctuation.  */
1898
1899 static Lisp_Object
1900 wordify (string)
1901      Lisp_Object string;
1902 {
1903   register unsigned char *p, *o;
1904   register int i, i_byte, len, punct_count = 0, word_count = 0;
1905   Lisp_Object val;
1906   int prev_c = 0;
1907   int adjust;
1908
1909   CHECK_STRING (string);
1910   p = SDATA (string);
1911   len = SCHARS (string);
1912
1913   for (i = 0, i_byte = 0; i < len; )
1914     {
1915       int c;
1916
1917       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1918
1919       if (SYNTAX (c) != Sword)
1920         {
1921           punct_count++;
1922           if (i > 0 && SYNTAX (prev_c) == Sword)
1923             word_count++;
1924         }
1925
1926       prev_c = c;
1927     }
1928
1929   if (SYNTAX (prev_c) == Sword)
1930     word_count++;
1931   if (!word_count)
1932     return empty_string;
1933
1934   adjust = - punct_count + 5 * (word_count - 1) + 4;
1935   if (STRING_MULTIBYTE (string))
1936     val = make_uninit_multibyte_string (len + adjust,
1937                                         SBYTES (string)
1938                                         + adjust);
1939   else
1940     val = make_uninit_string (len + adjust);
1941
1942   o = SDATA (val);
1943   *o++ = '\\';
1944   *o++ = 'b';
1945   prev_c = 0;
1946
1947   for (i = 0, i_byte = 0; i < len; )
1948     {
1949       int c;
1950       int i_byte_orig = i_byte;
1951
1952       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
1953
1954       if (SYNTAX (c) == Sword)
1955         {
1956           bcopy (SDATA (string) + i_byte_orig, o,
1957                  i_byte - i_byte_orig);
1958           o += i_byte - i_byte_orig;
1959         }
1960       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
1961         {
1962           *o++ = '\\';
1963           *o++ = 'W';
1964           *o++ = '\\';
1965           *o++ = 'W';
1966           *o++ = '*';
1967         }
1968
1969       prev_c = c;
1970     }
1971
1972   *o++ = '\\';
1973   *o++ = 'b';
1974
1975   return val;
1976 }
1977 \f
1978 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1979        "MSearch backward: ",
1980        doc: /* Search backward from point for STRING.
1981 Set point to the beginning of the occurrence found, and return point.
1982 An optional second argument bounds the search; it is a buffer position.
1983 The match found must not extend before that position.
1984 Optional third argument, if t, means if fail just return nil (no error).
1985  If not nil and not t, position at limit of search and return nil.
1986 Optional fourth argument is repeat count--search for successive occurrences.
1987
1988 Search case-sensitivity is determined by the value of the variable
1989 `case-fold-search', which see.
1990
1991 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
1992      (string, bound, noerror, count)
1993      Lisp_Object string, bound, noerror, count;
1994 {
1995   return search_command (string, bound, noerror, count, -1, 0, 0);
1996 }
1997
1998 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
1999        doc: /* Search forward from point for STRING.
2000 Set point to the end of the occurrence found, and return point.
2001 An optional second argument bounds the search; it is a buffer position.
2002 The match found must not extend after that position.  nil is equivalent
2003   to (point-max).
2004 Optional third argument, if t, means if fail just return nil (no error).
2005   If not nil and not t, move to limit of search and return nil.
2006 Optional fourth argument is repeat count--search for successive occurrences.
2007
2008 Search case-sensitivity is determined by the value of the variable
2009 `case-fold-search', which see.
2010
2011 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2012      (string, bound, noerror, count)
2013      Lisp_Object string, bound, noerror, count;
2014 {
2015   return search_command (string, bound, noerror, count, 1, 0, 0);
2016 }
2017
2018 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2019        "sWord search backward: ",
2020        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2021 Set point to the beginning of the occurrence found, and return point.
2022 An optional second argument bounds the search; it is a buffer position.
2023 The match found must not extend before that position.
2024 Optional third argument, if t, means if fail just return nil (no error).
2025   If not nil and not t, move to limit of search and return nil.
2026 Optional fourth argument is repeat count--search for successive occurrences.  */)
2027      (string, bound, noerror, count)
2028      Lisp_Object string, bound, noerror, count;
2029 {
2030   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2031 }
2032
2033 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2034        "sWord search: ",
2035        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2036 Set point to the end of the occurrence found, and return point.
2037 An optional second argument bounds the search; it is a buffer position.
2038 The match found must not extend after that position.
2039 Optional third argument, if t, means if fail just return nil (no error).
2040   If not nil and not t, move to limit of search and return nil.
2041 Optional fourth argument is repeat count--search for successive occurrences.  */)
2042      (string, bound, noerror, count)
2043      Lisp_Object string, bound, noerror, count;
2044 {
2045   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2046 }
2047
2048 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2049        "sRE search backward: ",
2050        doc: /* Search backward from point for match for regular expression REGEXP.
2051 Set point to the beginning of the match, and return point.
2052 The match found is the one starting last in the buffer
2053 and yet ending before the origin of the search.
2054 An optional second argument bounds the search; it is a buffer position.
2055 The match found must start at or after that position.
2056 Optional third argument, if t, means if fail just return nil (no error).
2057   If not nil and not t, move to limit of search and return nil.
2058 Optional fourth argument is repeat count--search for successive occurrences.
2059 See also the functions `match-beginning', `match-end', `match-string',
2060 and `replace-match'.  */)
2061      (regexp, bound, noerror, count)
2062      Lisp_Object regexp, bound, noerror, count;
2063 {
2064   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2065 }
2066
2067 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2068        "sRE search: ",
2069        doc: /* Search forward from point for regular expression REGEXP.
2070 Set point to the end of the occurrence found, and return point.
2071 An optional second argument bounds the search; it is a buffer position.
2072 The match found must not extend after that position.
2073 Optional third argument, if t, means if fail just return nil (no error).
2074   If not nil and not t, move to limit of search and return nil.
2075 Optional fourth argument is repeat count--search for successive occurrences.
2076 See also the functions `match-beginning', `match-end', `match-string',
2077 and `replace-match'.  */)
2078      (regexp, bound, noerror, count)
2079      Lisp_Object regexp, bound, noerror, count;
2080 {
2081   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2082 }
2083
2084 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2085        "sPosix search backward: ",
2086        doc: /* Search backward from point for match for regular expression REGEXP.
2087 Find the longest match in accord with Posix regular expression rules.
2088 Set point to the beginning of the match, and return point.
2089 The match found is the one starting last in the buffer
2090 and yet ending before the origin of the search.
2091 An optional second argument bounds the search; it is a buffer position.
2092 The match found must start at or after that position.
2093 Optional third argument, if t, means if fail just return nil (no error).
2094   If not nil and not t, move to limit of search and return nil.
2095 Optional fourth argument is repeat count--search for successive occurrences.
2096 See also the functions `match-beginning', `match-end', `match-string',
2097 and `replace-match'.  */)
2098      (regexp, bound, noerror, count)
2099      Lisp_Object regexp, bound, noerror, count;
2100 {
2101   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2102 }
2103
2104 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2105        "sPosix search: ",
2106        doc: /* Search forward from point for regular expression REGEXP.
2107 Find the longest match in accord with Posix regular expression rules.
2108 Set point to the end of the occurrence found, and return point.
2109 An optional second argument bounds the search; it is a buffer position.
2110 The match found must not extend after that position.
2111 Optional third argument, if t, means if fail just return nil (no error).
2112   If not nil and not t, move to limit of search and return nil.
2113 Optional fourth argument is repeat count--search for successive occurrences.
2114 See also the functions `match-beginning', `match-end', `match-string',
2115 and `replace-match'.  */)
2116      (regexp, bound, noerror, count)
2117      Lisp_Object regexp, bound, noerror, count;
2118 {
2119   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2120 }
2121 \f
2122 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2123        doc: /* Replace text matched by last search with NEWTEXT.
2124 Leave point at the end of the replacement text.
2125
2126 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2127 Otherwise maybe capitalize the whole text, or maybe just word initials,
2128 based on the replaced text.
2129 If the replaced text has only capital letters
2130 and has at least one multiletter word, convert NEWTEXT to all caps.
2131 Otherwise if all words are capitalized in the replaced text,
2132 capitalize each word in NEWTEXT.
2133
2134 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2135 Otherwise treat `\\' as special:
2136   `\\&' in NEWTEXT means substitute original matched text.
2137   `\\N' means substitute what matched the Nth `\\(...\\)'.
2138        If Nth parens didn't match, substitute nothing.
2139   `\\\\' means insert one `\\'.
2140 Case conversion does not apply to these substitutions.
2141
2142 FIXEDCASE and LITERAL are optional arguments.
2143
2144 The optional fourth argument STRING can be a string to modify.
2145 This is meaningful when the previous match was done against STRING,
2146 using `string-match'.  When used this way, `replace-match'
2147 creates and returns a new string made by copying STRING and replacing
2148 the part of STRING that was matched.
2149
2150 The optional fifth argument SUBEXP specifies a subexpression;
2151 it says to replace just that subexpression with NEWTEXT,
2152 rather than replacing the entire matched text.
2153 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2154 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2155 NEWTEXT in place of subexp N.
2156 This is useful only after a regular expression search or match,
2157 since only regular expressions have distinguished subexpressions.  */)
2158      (newtext, fixedcase, literal, string, subexp)
2159      Lisp_Object newtext, fixedcase, literal, string, subexp;
2160 {
2161   enum { nochange, all_caps, cap_initial } case_action;
2162   register int pos, pos_byte;
2163   int some_multiletter_word;
2164   int some_lowercase;
2165   int some_uppercase;
2166   int some_nonuppercase_initial;
2167   register int c, prevc;
2168   int sub;
2169   int opoint, newpoint;
2170
2171   CHECK_STRING (newtext);
2172
2173   if (! NILP (string))
2174     CHECK_STRING (string);
2175
2176   case_action = nochange;       /* We tried an initialization */
2177                                 /* but some C compilers blew it */
2178
2179   if (search_regs.num_regs <= 0)
2180     error ("replace-match called before any match found");
2181
2182   if (NILP (subexp))
2183     sub = 0;
2184   else
2185     {
2186       CHECK_NUMBER (subexp);
2187       sub = XINT (subexp);
2188       if (sub < 0 || sub >= search_regs.num_regs)
2189         args_out_of_range (subexp, make_number (search_regs.num_regs));
2190     }
2191
2192   if (NILP (string))
2193     {
2194       if (search_regs.start[sub] < BEGV
2195           || search_regs.start[sub] > search_regs.end[sub]
2196           || search_regs.end[sub] > ZV)
2197         args_out_of_range (make_number (search_regs.start[sub]),
2198                            make_number (search_regs.end[sub]));
2199     }
2200   else
2201     {
2202       if (search_regs.start[sub] < 0
2203           || search_regs.start[sub] > search_regs.end[sub]
2204           || search_regs.end[sub] > SCHARS (string))
2205         args_out_of_range (make_number (search_regs.start[sub]),
2206                            make_number (search_regs.end[sub]));
2207     }
2208
2209   if (NILP (fixedcase))
2210     {
2211       /* Decide how to casify by examining the matched text. */
2212       int last;
2213
2214       pos = search_regs.start[sub];
2215       last = search_regs.end[sub];
2216
2217       if (NILP (string))
2218         pos_byte = CHAR_TO_BYTE (pos);
2219       else
2220         pos_byte = string_char_to_byte (string, pos);
2221
2222       prevc = '\n';
2223       case_action = all_caps;
2224
2225       /* some_multiletter_word is set nonzero if any original word
2226          is more than one letter long. */
2227       some_multiletter_word = 0;
2228       some_lowercase = 0;
2229       some_nonuppercase_initial = 0;
2230       some_uppercase = 0;
2231
2232       while (pos < last)
2233         {
2234           if (NILP (string))
2235             {
2236               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2237               INC_BOTH (pos, pos_byte);
2238             }
2239           else
2240             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2241
2242           if (LOWERCASEP (c))
2243             {
2244               /* Cannot be all caps if any original char is lower case */
2245
2246               some_lowercase = 1;
2247               if (SYNTAX (prevc) != Sword)
2248                 some_nonuppercase_initial = 1;
2249               else
2250                 some_multiletter_word = 1;
2251             }
2252           else if (!NOCASEP (c))
2253             {
2254               some_uppercase = 1;
2255               if (SYNTAX (prevc) != Sword)
2256                 ;
2257               else
2258                 some_multiletter_word = 1;
2259             }
2260           else
2261             {
2262               /* If the initial is a caseless word constituent,
2263                  treat that like a lowercase initial.  */
2264               if (SYNTAX (prevc) != Sword)
2265                 some_nonuppercase_initial = 1;
2266             }
2267
2268           prevc = c;
2269         }
2270
2271       /* Convert to all caps if the old text is all caps
2272          and has at least one multiletter word.  */
2273       if (! some_lowercase && some_multiletter_word)
2274         case_action = all_caps;
2275       /* Capitalize each word, if the old text has all capitalized words.  */
2276       else if (!some_nonuppercase_initial && some_multiletter_word)
2277         case_action = cap_initial;
2278       else if (!some_nonuppercase_initial && some_uppercase)
2279         /* Should x -> yz, operating on X, give Yz or YZ?
2280            We'll assume the latter.  */
2281         case_action = all_caps;
2282       else
2283         case_action = nochange;
2284     }
2285
2286   /* Do replacement in a string.  */
2287   if (!NILP (string))
2288     {
2289       Lisp_Object before, after;
2290
2291       before = Fsubstring (string, make_number (0),
2292                            make_number (search_regs.start[sub]));
2293       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2294
2295       /* Substitute parts of the match into NEWTEXT
2296          if desired.  */
2297       if (NILP (literal))
2298         {
2299           int lastpos = 0;
2300           int lastpos_byte = 0;
2301           /* We build up the substituted string in ACCUM.  */
2302           Lisp_Object accum;
2303           Lisp_Object middle;
2304           int length = SBYTES (newtext);
2305
2306           accum = Qnil;
2307
2308           for (pos_byte = 0, pos = 0; pos_byte < length;)
2309             {
2310               int substart = -1;
2311               int subend = 0;
2312               int delbackslash = 0;
2313
2314               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2315
2316               if (c == '\\')
2317                 {
2318                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2319
2320                   if (c == '&')
2321                     {
2322                       substart = search_regs.start[sub];
2323                       subend = search_regs.end[sub];
2324                     }
2325                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2326                     {
2327                       if (search_regs.start[c - '0'] >= 0)
2328                         {
2329                           substart = search_regs.start[c - '0'];
2330                           subend = search_regs.end[c - '0'];
2331                         }
2332                     }
2333                   else if (c == '\\')
2334                     delbackslash = 1;
2335                   else
2336                     error ("Invalid use of `\\' in replacement text");
2337                 }
2338               if (substart >= 0)
2339                 {
2340                   if (pos - 2 != lastpos)
2341                     middle = substring_both (newtext, lastpos,
2342                                              lastpos_byte,
2343                                              pos - 2, pos_byte - 2);
2344                   else
2345                     middle = Qnil;
2346                   accum = concat3 (accum, middle,
2347                                    Fsubstring (string,
2348                                                make_number (substart),
2349                                                make_number (subend)));
2350                   lastpos = pos;
2351                   lastpos_byte = pos_byte;
2352                 }
2353               else if (delbackslash)
2354                 {
2355                   middle = substring_both (newtext, lastpos,
2356                                            lastpos_byte,
2357                                            pos - 1, pos_byte - 1);
2358
2359                   accum = concat2 (accum, middle);
2360                   lastpos = pos;
2361                   lastpos_byte = pos_byte;
2362                 }
2363             }
2364
2365           if (pos != lastpos)
2366             middle = substring_both (newtext, lastpos,
2367                                      lastpos_byte,
2368                                      pos, pos_byte);
2369           else
2370             middle = Qnil;
2371
2372           newtext = concat2 (accum, middle);
2373         }
2374
2375       /* Do case substitution in NEWTEXT if desired.  */
2376       if (case_action == all_caps)
2377         newtext = Fupcase (newtext);
2378       else if (case_action == cap_initial)
2379         newtext = Fupcase_initials (newtext);
2380
2381       return concat3 (before, newtext, after);
2382     }
2383
2384   /* Record point, then move (quietly) to the start of the match.  */
2385   if (PT >= search_regs.end[sub])
2386     opoint = PT - ZV;
2387   else if (PT > search_regs.start[sub])
2388     opoint = search_regs.end[sub] - ZV;
2389   else
2390     opoint = PT;
2391
2392   /* If we want non-literal replacement,
2393      perform substitution on the replacement string.  */
2394   if (NILP (literal))
2395     {
2396       int length = SBYTES (newtext);
2397       unsigned char *substed;
2398       int substed_alloc_size, substed_len;
2399       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2400       int str_multibyte = STRING_MULTIBYTE (newtext);
2401       Lisp_Object rev_tbl;
2402       int really_changed = 0;
2403
2404       rev_tbl = Qnil;
2405
2406       substed_alloc_size = length * 2 + 100;
2407       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2408       substed_len = 0;
2409
2410       /* Go thru NEWTEXT, producing the actual text to insert in
2411          SUBSTED while adjusting multibyteness to that of the current
2412          buffer.  */
2413
2414       for (pos_byte = 0, pos = 0; pos_byte < length;)
2415         {
2416           unsigned char str[MAX_MULTIBYTE_LENGTH];
2417           unsigned char *add_stuff = NULL;
2418           int add_len = 0;
2419           int idx = -1;
2420
2421           if (str_multibyte)
2422             {
2423               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2424               if (!buf_multibyte)
2425                 c = multibyte_char_to_unibyte (c, rev_tbl);
2426             }
2427           else
2428             {
2429               /* Note that we don't have to increment POS.  */
2430               c = SREF (newtext, pos_byte++);
2431               if (buf_multibyte)
2432                 c = unibyte_char_to_multibyte (c);
2433             }
2434
2435           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2436              or set IDX to a match index, which means put that part
2437              of the buffer text into SUBSTED.  */
2438
2439           if (c == '\\')
2440             {
2441               really_changed = 1;
2442
2443               if (str_multibyte)
2444                 {
2445                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2446                                                       pos, pos_byte);
2447                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2448                     c = multibyte_char_to_unibyte (c, rev_tbl);
2449                 }
2450               else
2451                 {
2452                   c = SREF (newtext, pos_byte++);
2453                   if (buf_multibyte)
2454                     c = unibyte_char_to_multibyte (c);
2455                 }
2456
2457               if (c == '&')
2458                 idx = sub;
2459               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2460                 {
2461                   if (search_regs.start[c - '0'] >= 1)
2462                     idx = c - '0';
2463                 }
2464               else if (c == '\\')
2465                 add_len = 1, add_stuff = "\\";
2466               else
2467                 {
2468                   xfree (substed);
2469                   error ("Invalid use of `\\' in replacement text");
2470                 }
2471             }
2472           else
2473             {
2474               add_len = CHAR_STRING (c, str);
2475               add_stuff = str;
2476             }
2477
2478           /* If we want to copy part of a previous match,
2479              set up ADD_STUFF and ADD_LEN to point to it.  */
2480           if (idx >= 0)
2481             {
2482               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2483               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2484               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2485                 move_gap (search_regs.start[idx]);
2486               add_stuff = BYTE_POS_ADDR (begbyte);
2487             }
2488
2489           /* Now the stuff we want to add to SUBSTED
2490              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2491
2492           /* Make sure SUBSTED is big enough.  */
2493           if (substed_len + add_len >= substed_alloc_size)
2494             {
2495               substed_alloc_size = substed_len + add_len + 500;
2496               substed = (unsigned char *) xrealloc (substed,
2497                                                     substed_alloc_size + 1);
2498             }
2499
2500           /* Now add to the end of SUBSTED.  */
2501           if (add_stuff)
2502             {
2503               bcopy (add_stuff, substed + substed_len, add_len);
2504               substed_len += add_len;
2505             }
2506         }
2507
2508       if (really_changed)
2509         newtext = make_string (substed, substed_len);
2510
2511       xfree (substed);
2512     }
2513
2514   /* Replace the old text with the new in the cleanest possible way.  */
2515   replace_range (search_regs.start[sub], search_regs.end[sub],
2516                  newtext, 1, 0, 1);
2517   newpoint = search_regs.start[sub] + SCHARS (newtext);
2518
2519   if (case_action == all_caps)
2520     Fupcase_region (make_number (search_regs.start[sub]),
2521                     make_number (newpoint));
2522   else if (case_action == cap_initial)
2523     Fupcase_initials_region (make_number (search_regs.start[sub]),
2524                              make_number (newpoint));
2525
2526   /* Adjust search data for this change.  */
2527   {
2528     int oldend = search_regs.end[sub];
2529     int change = newpoint - search_regs.end[sub];
2530     int i;
2531
2532     for (i = 0; i < search_regs.num_regs; i++)
2533       {
2534         if (search_regs.start[i] > oldend)
2535           search_regs.start[i] += change;
2536         if (search_regs.end[i] > oldend)
2537           search_regs.end[i] += change;
2538       }
2539   }
2540
2541   /* Put point back where it was in the text.  */
2542   if (opoint <= 0)
2543     TEMP_SET_PT (opoint + ZV);
2544   else
2545     TEMP_SET_PT (opoint);
2546
2547   /* Now move point "officially" to the start of the inserted replacement.  */
2548   move_if_not_intangible (newpoint);
2549
2550   return Qnil;
2551 }
2552 \f
2553 static Lisp_Object
2554 match_limit (num, beginningp)
2555      Lisp_Object num;
2556      int beginningp;
2557 {
2558   register int n;
2559
2560   CHECK_NUMBER (num);
2561   n = XINT (num);
2562   if (n < 0 || n >= search_regs.num_regs)
2563     args_out_of_range (num, make_number (search_regs.num_regs));
2564   if (search_regs.num_regs <= 0
2565       || search_regs.start[n] < 0)
2566     return Qnil;
2567   return (make_number ((beginningp) ? search_regs.start[n]
2568                                     : search_regs.end[n]));
2569 }
2570
2571 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2572        doc: /* Return position of start of text matched by last search.
2573 SUBEXP, a number, specifies which parenthesized expression in the last
2574   regexp.
2575 Value is nil if SUBEXPth pair didn't match, or there were less than
2576   SUBEXP pairs.
2577 Zero means the entire text matched by the whole regexp or whole string.  */)
2578      (subexp)
2579      Lisp_Object subexp;
2580 {
2581   return match_limit (subexp, 1);
2582 }
2583
2584 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2585        doc: /* Return position of end of text matched by last search.
2586 SUBEXP, a number, specifies which parenthesized expression in the last
2587   regexp.
2588 Value is nil if SUBEXPth pair didn't match, or there were less than
2589   SUBEXP pairs.
2590 Zero means the entire text matched by the whole regexp or whole string.  */)
2591      (subexp)
2592      Lisp_Object subexp;
2593 {
2594   return match_limit (subexp, 0);
2595 }
2596
2597 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2598        doc: /* Return a list containing all info on what the last search matched.
2599 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2600 All the elements are markers or nil (nil if the Nth pair didn't match)
2601 if the last match was on a buffer; integers or nil if a string was matched.
2602 Use `store-match-data' to reinstate the data in this list.
2603
2604 If INTEGERS (the optional first argument) is non-nil, always use integers
2605 \(rather than markers) to represent buffer positions.
2606 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2607 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2608
2609 Return value is undefined if the last search failed.  */)
2610      (integers, reuse)
2611      Lisp_Object integers, reuse;
2612 {
2613   Lisp_Object tail, prev;
2614   Lisp_Object *data;
2615   int i, len;
2616
2617   if (NILP (last_thing_searched))
2618     return Qnil;
2619
2620   prev = Qnil;
2621
2622   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2623                                  * sizeof (Lisp_Object));
2624
2625   len = -1;
2626   for (i = 0; i < search_regs.num_regs; i++)
2627     {
2628       int start = search_regs.start[i];
2629       if (start >= 0)
2630         {
2631           if (EQ (last_thing_searched, Qt)
2632               || ! NILP (integers))
2633             {
2634               XSETFASTINT (data[2 * i], start);
2635               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2636             }
2637           else if (BUFFERP (last_thing_searched))
2638             {
2639               data[2 * i] = Fmake_marker ();
2640               Fset_marker (data[2 * i],
2641                            make_number (start),
2642                            last_thing_searched);
2643               data[2 * i + 1] = Fmake_marker ();
2644               Fset_marker (data[2 * i + 1],
2645                            make_number (search_regs.end[i]),
2646                            last_thing_searched);
2647             }
2648           else
2649             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2650             abort ();
2651
2652           len = i;
2653         }
2654       else
2655         data[2 * i] = data [2 * i + 1] = Qnil;
2656     }
2657
2658   /* If REUSE is not usable, cons up the values and return them.  */
2659   if (! CONSP (reuse))
2660     return Flist (2 * len + 2, data);
2661
2662   /* If REUSE is a list, store as many value elements as will fit
2663      into the elements of REUSE.  */
2664   for (i = 0, tail = reuse; CONSP (tail);
2665        i++, tail = XCDR (tail))
2666     {
2667       if (i < 2 * len + 2)
2668         XSETCAR (tail, data[i]);
2669       else
2670         XSETCAR (tail, Qnil);
2671       prev = tail;
2672     }
2673
2674   /* If we couldn't fit all value elements into REUSE,
2675      cons up the rest of them and add them to the end of REUSE.  */
2676   if (i < 2 * len + 2)
2677     XSETCDR (prev, Flist (2 * len + 2 - i, data + i));
2678
2679   return reuse;
2680 }
2681
2682
2683 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2684        doc: /* Set internal data on last search match from elements of LIST.
2685 LIST should have been created by calling `match-data' previously.  */)
2686      (list)
2687      register Lisp_Object list;
2688 {
2689   register int i;
2690   register Lisp_Object marker;
2691
2692   if (running_asynch_code)
2693     save_search_regs ();
2694
2695   if (!CONSP (list) && !NILP (list))
2696     list = wrong_type_argument (Qconsp, list);
2697
2698   /* Unless we find a marker with a buffer in LIST, assume that this
2699      match data came from a string.  */
2700   last_thing_searched = Qt;
2701
2702   /* Allocate registers if they don't already exist.  */
2703   {
2704     int length = XFASTINT (Flength (list)) / 2;
2705
2706     if (length > search_regs.num_regs)
2707       {
2708         if (search_regs.num_regs == 0)
2709           {
2710             search_regs.start
2711               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2712             search_regs.end
2713               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2714           }
2715         else
2716           {
2717             search_regs.start
2718               = (regoff_t *) xrealloc (search_regs.start,
2719                                        length * sizeof (regoff_t));
2720             search_regs.end
2721               = (regoff_t *) xrealloc (search_regs.end,
2722                                        length * sizeof (regoff_t));
2723           }
2724
2725         for (i = search_regs.num_regs; i < length; i++)
2726           search_regs.start[i] = -1;
2727
2728         search_regs.num_regs = length;
2729       }
2730   }
2731
2732   for (i = 0; i < search_regs.num_regs; i++)
2733     {
2734       marker = Fcar (list);
2735       if (NILP (marker))
2736         {
2737           search_regs.start[i] = -1;
2738           list = Fcdr (list);
2739         }
2740       else
2741         {
2742           int from;
2743
2744           if (MARKERP (marker))
2745             {
2746               if (XMARKER (marker)->buffer == 0)
2747                 XSETFASTINT (marker, 0);
2748               else
2749                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2750             }
2751
2752           CHECK_NUMBER_COERCE_MARKER (marker);
2753           from = XINT (marker);
2754           list = Fcdr (list);
2755
2756           marker = Fcar (list);
2757           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2758             XSETFASTINT (marker, 0);
2759
2760           CHECK_NUMBER_COERCE_MARKER (marker);
2761           search_regs.start[i] = from;
2762           search_regs.end[i] = XINT (marker);
2763         }
2764       list = Fcdr (list);
2765     }
2766
2767   return Qnil;
2768 }
2769
2770 /* If non-zero the match data have been saved in saved_search_regs
2771    during the execution of a sentinel or filter. */
2772 static int search_regs_saved;
2773 static struct re_registers saved_search_regs;
2774
2775 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2776    if asynchronous code (filter or sentinel) is running. */
2777 static void
2778 save_search_regs ()
2779 {
2780   if (!search_regs_saved)
2781     {
2782       saved_search_regs.num_regs = search_regs.num_regs;
2783       saved_search_regs.start = search_regs.start;
2784       saved_search_regs.end = search_regs.end;
2785       search_regs.num_regs = 0;
2786       search_regs.start = 0;
2787       search_regs.end = 0;
2788
2789       search_regs_saved = 1;
2790     }
2791 }
2792
2793 /* Called upon exit from filters and sentinels. */
2794 void
2795 restore_match_data ()
2796 {
2797   if (search_regs_saved)
2798     {
2799       if (search_regs.num_regs > 0)
2800         {
2801           xfree (search_regs.start);
2802           xfree (search_regs.end);
2803         }
2804       search_regs.num_regs = saved_search_regs.num_regs;
2805       search_regs.start = saved_search_regs.start;
2806       search_regs.end = saved_search_regs.end;
2807
2808       search_regs_saved = 0;
2809     }
2810 }
2811
2812 /* Quote a string to inactivate reg-expr chars */
2813
2814 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2815        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
2816      (string)
2817      Lisp_Object string;
2818 {
2819   register unsigned char *in, *out, *end;
2820   register unsigned char *temp;
2821   int backslashes_added = 0;
2822
2823   CHECK_STRING (string);
2824
2825   temp = (unsigned char *) alloca (SBYTES (string) * 2);
2826
2827   /* Now copy the data into the new string, inserting escapes. */
2828
2829   in = SDATA (string);
2830   end = in + SBYTES (string);
2831   out = temp;
2832
2833   for (; in != end; in++)
2834     {
2835       if (*in == '[' || *in == ']'
2836           || *in == '*' || *in == '.' || *in == '\\'
2837           || *in == '?' || *in == '+'
2838           || *in == '^' || *in == '$')
2839         *out++ = '\\', backslashes_added++;
2840       *out++ = *in;
2841     }
2842
2843   return make_specified_string (temp,
2844                                 SCHARS (string) + backslashes_added,
2845                                 out - temp,
2846                                 STRING_MULTIBYTE (string));
2847 }
2848 \f
2849 void
2850 syms_of_search ()
2851 {
2852   register int i;
2853
2854   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2855     {
2856       searchbufs[i].buf.allocated = 100;
2857       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
2858       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2859       searchbufs[i].regexp = Qnil;
2860       staticpro (&searchbufs[i].regexp);
2861       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2862     }
2863   searchbuf_head = &searchbufs[0];
2864
2865   Qsearch_failed = intern ("search-failed");
2866   staticpro (&Qsearch_failed);
2867   Qinvalid_regexp = intern ("invalid-regexp");
2868   staticpro (&Qinvalid_regexp);
2869
2870   Fput (Qsearch_failed, Qerror_conditions,
2871         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2872   Fput (Qsearch_failed, Qerror_message,
2873         build_string ("Search failed"));
2874
2875   Fput (Qinvalid_regexp, Qerror_conditions,
2876         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2877   Fput (Qinvalid_regexp, Qerror_message,
2878         build_string ("Invalid regexp"));
2879
2880   last_thing_searched = Qnil;
2881   staticpro (&last_thing_searched);
2882
2883   defsubr (&Slooking_at);
2884   defsubr (&Sposix_looking_at);
2885   defsubr (&Sstring_match);
2886   defsubr (&Sposix_string_match);
2887   defsubr (&Ssearch_forward);
2888   defsubr (&Ssearch_backward);
2889   defsubr (&Sword_search_forward);
2890   defsubr (&Sword_search_backward);
2891   defsubr (&Sre_search_forward);
2892   defsubr (&Sre_search_backward);
2893   defsubr (&Sposix_search_forward);
2894   defsubr (&Sposix_search_backward);
2895   defsubr (&Sreplace_match);
2896   defsubr (&Smatch_beginning);
2897   defsubr (&Smatch_end);
2898   defsubr (&Smatch_data);
2899   defsubr (&Sset_match_data);
2900   defsubr (&Sregexp_quote);
2901 }