src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
   3                  2004, 2005 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   struct re_pattern_buffer buf;
  46   char fastmap[0400];
  47   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  48   char posix;
  49 };
  50
  51 /* The instances of that struct.  */
  52 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  53
  54 /* The head of the linked list; points to the most recently used buffer.  */
  55 struct regexp_cache *searchbuf_head;
  56
  57
  58 /* Every call to re_match, etc., must pass &search_regs as the regs
  59    argument unless you can show it is unnecessary (i.e., if re_match
  60    is certainly going to be called again before region-around-match
  61    can be called).
  62
  63    Since the registers are now dynamically allocated, we need to make
  64    sure not to refer to the Nth register before checking that it has
  65    been allocated by checking search_regs.num_regs.
  66
  67    The regex code keeps track of whether it has allocated the search
  68    buffer using bits in the re_pattern_buffer.  This means that whenever
  69    you compile a new pattern, it completely forgets whether it has
  70    allocated any registers, and will allocate new registers the next
  71    time you call a searching or matching function.  Therefore, we need
  72    to call re_set_registers after compiling a new pattern or after
  73    setting the match registers, so that the regex functions will be
  74    able to free or re-allocate it properly.  */
  75 static struct re_registers search_regs;
  76
  77 /* The buffer in which the last search was performed, or
  78    Qt if the last search was done in a string;
  79    Qnil if no searching has been done yet.  */
  80 static Lisp_Object last_thing_searched;
  81
  82 /* error condition signaled when regexp compile_pattern fails */
  83
  84 Lisp_Object Qinvalid_regexp;
  85
  86 Lisp_Object Vsearch_spaces_regexp;
  87
  88 static void set_search_regs ();
  89 static void save_search_regs ();
  90 static int simple_search ();
  91 static int boyer_moore ();
  92 static int search_buffer ();
  93
  94 static void
  95 matcher_overflow ()
  96 {
  97   error ("Stack overflow in regexp matcher");
  98 }
  99
 100 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 101    PATTERN is the pattern to compile.
 102    CP is the place to put the result.
 103    TRANSLATE is a translation table for ignoring case, or nil for none.
 104    REGP is the structure that says where to store the "register"
 105    values that will result from matching this pattern.
 106    If it is 0, we should compile the pattern not to record any
 107    subexpression bounds.
 108    POSIX is nonzero if we want full backtracking (POSIX style)
 109    for this pattern.  0 means backtrack only enough to get a valid match.
 110    MULTIBYTE is nonzero iff a target of match is a multibyte buffer or
 111    string.
 112
 113    The behavior also depends on Vsearch_spaces_regexp.  */
 114
 115 static void
 116 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 117      struct regexp_cache *cp;
 118      Lisp_Object pattern;
 119      Lisp_Object translate;
 120      struct re_registers *regp;
 121      int posix;
 122      int multibyte;
 123 {
 124   char *val;
 125   reg_syntax_t old;
 126
 127   cp->regexp = Qnil;
 128   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 129   cp->posix = posix;
 130   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 131   cp->buf.target_multibyte = multibyte;
 132   cp->whitespace_regexp = Vsearch_spaces_regexp;
 133   BLOCK_INPUT;
 134   old = re_set_syntax (RE_SYNTAX_EMACS
 135                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 136   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 137                             : SDATA (Vsearch_spaces_regexp));
 138
 139   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 140                                      SBYTES (pattern), &cp->buf);
 141
 142   re_set_whitespace_regexp (NULL);
 143
 144   re_set_syntax (old);
 145   UNBLOCK_INPUT;
 146   if (val)
 147     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 148
 149   cp->regexp = Fcopy_sequence (pattern);
 150 }
 151
 152 /* Shrink each compiled regexp buffer in the cache
 153    to the size actually used right now.
 154    This is called from garbage collection.  */
 155
 156 void
 157 shrink_regexp_cache ()
 158 {
 159   struct regexp_cache *cp;
 160
 161   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 162     {
 163       cp->buf.allocated = cp->buf.used;
 164       cp->buf.buffer
 165         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 166     }
 167 }
 168
 169 /* Compile a regexp if necessary, but first check to see if there's one in
 170    the cache.
 171    PATTERN is the pattern to compile.
 172    TRANSLATE is a translation table for ignoring case, or nil for none.
 173    REGP is the structure that says where to store the "register"
 174    values that will result from matching this pattern.
 175    If it is 0, we should compile the pattern not to record any
 176    subexpression bounds.
 177    POSIX is nonzero if we want full backtracking (POSIX style)
 178    for this pattern.  0 means backtrack only enough to get a valid match.  */
 179
 180 struct re_pattern_buffer *
 181 compile_pattern (pattern, regp, translate, posix, multibyte)
 182      Lisp_Object pattern;
 183      struct re_registers *regp;
 184      Lisp_Object translate;
 185      int posix, multibyte;
 186 {
 187   struct regexp_cache *cp, **cpp;
 188
 189   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 190     {
 191       cp = *cpp;
 192       /* Entries are initialized to nil, and may be set to nil by
 193          compile_pattern_1 if the pattern isn't valid.  Don't apply
 194          string accessors in those cases.  However, compile_pattern_1
 195          is only applied to the cache entry we pick here to reuse.  So
 196          nil should never appear before a non-nil entry.  */
 197       if (NILP (cp->regexp))
 198         goto compile_it;
 199       if (SCHARS (cp->regexp) == SCHARS (pattern)
 200           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 201           && !NILP (Fstring_equal (cp->regexp, pattern))
 202           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 203           && cp->posix == posix
 204           && cp->buf.target_multibyte == multibyte
 205           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 206         break;
 207
 208       /* If we're at the end of the cache, compile into the nil cell
 209          we found, or the last (least recently used) cell with a
 210          string value.  */
 211       if (cp->next == 0)
 212         {
 213         compile_it:
 214           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 215           break;
 216         }
 217     }
 218
 219   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 220      either because we found it in the cache or because we just compiled it.
 221      Move it to the front of the queue to mark it as most recently used.  */
 222   *cpp = cp->next;
 223   cp->next = searchbuf_head;
 224   searchbuf_head = cp;
 225
 226   /* Advise the searching functions about the space we have allocated
 227      for register data.  */
 228   if (regp)
 229     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 230
 231   return &cp->buf;
 232 }
 233
 234 /* Error condition used for failing searches */
 235 Lisp_Object Qsearch_failed;
 236
 237 Lisp_Object
 238 signal_failure (arg)
 239      Lisp_Object arg;
 240 {
 241   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 242   return Qnil;
 243 }
 244 \f
 245 static Lisp_Object
 246 looking_at_1 (string, posix)
 247      Lisp_Object string;
 248      int posix;
 249 {
 250   Lisp_Object val;
 251   unsigned char *p1, *p2;
 252   int s1, s2;
 253   register int i;
 254   struct re_pattern_buffer *bufp;
 255
 256   if (running_asynch_code)
 257     save_search_regs ();
 258
 259   CHECK_STRING (string);
 260   bufp = compile_pattern (string, &search_regs,
 261                           (!NILP (current_buffer->case_fold_search)
 262                            ? current_buffer->case_canon_table : Qnil),
 263                           posix,
 264                           !NILP (current_buffer->enable_multibyte_characters));
 265
 266   immediate_quit = 1;
 267   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 268
 269   /* Get pointers and sizes of the two strings
 270      that make up the visible portion of the buffer. */
 271
 272   p1 = BEGV_ADDR;
 273   s1 = GPT_BYTE - BEGV_BYTE;
 274   p2 = GAP_END_ADDR;
 275   s2 = ZV_BYTE - GPT_BYTE;
 276   if (s1 < 0)
 277     {
 278       p2 = p1;
 279       s2 = ZV_BYTE - BEGV_BYTE;
 280       s1 = 0;
 281     }
 282   if (s2 < 0)
 283     {
 284       s1 = ZV_BYTE - BEGV_BYTE;
 285       s2 = 0;
 286     }
 287
 288   re_match_object = Qnil;
 289
 290   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 291                   PT_BYTE - BEGV_BYTE, &search_regs,
 292                   ZV_BYTE - BEGV_BYTE);
 293   immediate_quit = 0;
 294
 295   if (i == -2)
 296     matcher_overflow ();
 297
 298   val = (0 <= i ? Qt : Qnil);
 299   if (i >= 0)
 300     for (i = 0; i < search_regs.num_regs; i++)
 301       if (search_regs.start[i] >= 0)
 302         {
 303           search_regs.start[i]
 304             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 305           search_regs.end[i]
 306             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 307         }
 308   XSETBUFFER (last_thing_searched, current_buffer);
 309   return val;
 310 }
 311
 312 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 313        doc: /* Return t if text after point matches regular expression REGEXP.
 314 This function modifies the match data that `match-beginning',
 315 `match-end' and `match-data' access; save and restore the match
 316 data if you want to preserve them.  */)
 317      (regexp)
 318      Lisp_Object regexp;
 319 {
 320   return looking_at_1 (regexp, 0);
 321 }
 322
 323 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 324        doc: /* Return t if text after point matches regular expression REGEXP.
 325 Find the longest match, in accord with Posix regular expression rules.
 326 This function modifies the match data that `match-beginning',
 327 `match-end' and `match-data' access; save and restore the match
 328 data if you want to preserve them.  */)
 329      (regexp)
 330      Lisp_Object regexp;
 331 {
 332   return looking_at_1 (regexp, 1);
 333 }
 334 \f
 335 static Lisp_Object
 336 string_match_1 (regexp, string, start, posix)
 337      Lisp_Object regexp, string, start;
 338      int posix;
 339 {
 340   int val;
 341   struct re_pattern_buffer *bufp;
 342   int pos, pos_byte;
 343   int i;
 344
 345   if (running_asynch_code)
 346     save_search_regs ();
 347
 348   CHECK_STRING (regexp);
 349   CHECK_STRING (string);
 350
 351   if (NILP (start))
 352     pos = 0, pos_byte = 0;
 353   else
 354     {
 355       int len = SCHARS (string);
 356
 357       CHECK_NUMBER (start);
 358       pos = XINT (start);
 359       if (pos < 0 && -pos <= len)
 360         pos = len + pos;
 361       else if (0 > pos || pos > len)
 362         args_out_of_range (string, start);
 363       pos_byte = string_char_to_byte (string, pos);
 364     }
 365
 366   bufp = compile_pattern (regexp, &search_regs,
 367                           (!NILP (current_buffer->case_fold_search)
 368                            ? current_buffer->case_canon_table : Qnil),
 369                           posix,
 370                           STRING_MULTIBYTE (string));
 371   immediate_quit = 1;
 372   re_match_object = string;
 373
 374   val = re_search (bufp, (char *) SDATA (string),
 375                    SBYTES (string), pos_byte,
 376                    SBYTES (string) - pos_byte,
 377                    &search_regs);
 378   immediate_quit = 0;
 379   last_thing_searched = Qt;
 380   if (val == -2)
 381     matcher_overflow ();
 382   if (val < 0) return Qnil;
 383
 384   for (i = 0; i < search_regs.num_regs; i++)
 385     if (search_regs.start[i] >= 0)
 386       {
 387         search_regs.start[i]
 388           = string_byte_to_char (string, search_regs.start[i]);
 389         search_regs.end[i]
 390           = string_byte_to_char (string, search_regs.end[i]);
 391       }
 392
 393   return make_number (string_byte_to_char (string, val));
 394 }
 395
 396 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 397        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 398 Matching ignores case if `case-fold-search' is non-nil.
 399 If third arg START is non-nil, start search at that index in STRING.
 400 For index of first char beyond the match, do (match-end 0).
 401 `match-end' and `match-beginning' also give indices of substrings
 402 matched by parenthesis constructs in the pattern.
 403
 404 You can use the function `match-string' to extract the substrings
 405 matched by the parenthesis constructions in REGEXP. */)
 406      (regexp, string, start)
 407      Lisp_Object regexp, string, start;
 408 {
 409   return string_match_1 (regexp, string, start, 0);
 410 }
 411
 412 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 413        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 414 Find the longest match, in accord with Posix regular expression rules.
 415 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 416 If third arg START is non-nil, start search at that index in STRING.
 417 For index of first char beyond the match, do (match-end 0).
 418 `match-end' and `match-beginning' also give indices of substrings
 419 matched by parenthesis constructs in the pattern.  */)
 420      (regexp, string, start)
 421      Lisp_Object regexp, string, start;
 422 {
 423   return string_match_1 (regexp, string, start, 1);
 424 }
 425
 426 /* Match REGEXP against STRING, searching all of STRING,
 427    and return the index of the match, or negative on failure.
 428    This does not clobber the match data.  */
 429
 430 int
 431 fast_string_match (regexp, string)
 432      Lisp_Object regexp, string;
 433 {
 434   int val;
 435   struct re_pattern_buffer *bufp;
 436
 437   bufp = compile_pattern (regexp, 0, Qnil,
 438                           0, STRING_MULTIBYTE (string));
 439   immediate_quit = 1;
 440   re_match_object = string;
 441
 442   val = re_search (bufp, (char *) SDATA (string),
 443                    SBYTES (string), 0,
 444                    SBYTES (string), 0);
 445   immediate_quit = 0;
 446   return val;
 447 }
 448
 449 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 450    and return the index of the match, or negative on failure.
 451    This does not clobber the match data.
 452    We assume that STRING contains single-byte characters.  */
 453
 454 extern Lisp_Object Vascii_downcase_table;
 455
 456 int
 457 fast_c_string_match_ignore_case (regexp, string)
 458      Lisp_Object regexp;
 459      const char *string;
 460 {
 461   int val;
 462   struct re_pattern_buffer *bufp;
 463   int len = strlen (string);
 464
 465   regexp = string_make_unibyte (regexp);
 466   re_match_object = Qt;
 467   bufp = compile_pattern (regexp, 0,
 468                           Vascii_canon_table, 0,
 469                           0);
 470   immediate_quit = 1;
 471   val = re_search (bufp, string, len, 0, len, 0);
 472   immediate_quit = 0;
 473   return val;
 474 }
 475
 476 /* Like fast_string_match but ignore case.  */
 477
 478 int
 479 fast_string_match_ignore_case (regexp, string)
 480      Lisp_Object regexp, string;
 481 {
 482   int val;
 483   struct re_pattern_buffer *bufp;
 484
 485   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 486                           0, STRING_MULTIBYTE (string));
 487   immediate_quit = 1;
 488   re_match_object = string;
 489
 490   val = re_search (bufp, (char *) SDATA (string),
 491                    SBYTES (string), 0,
 492                    SBYTES (string), 0);
 493   immediate_quit = 0;
 494   return val;
 495 }
 496 \f
 497 /* The newline cache: remembering which sections of text have no newlines.  */
 498
 499 /* If the user has requested newline caching, make sure it's on.
 500    Otherwise, make sure it's off.
 501    This is our cheezy way of associating an action with the change of
 502    state of a buffer-local variable.  */
 503 static void
 504 newline_cache_on_off (buf)
 505      struct buffer *buf;
 506 {
 507   if (NILP (buf->cache_long_line_scans))
 508     {
 509       /* It should be off.  */
 510       if (buf->newline_cache)
 511         {
 512           free_region_cache (buf->newline_cache);
 513           buf->newline_cache = 0;
 514         }
 515     }
 516   else
 517     {
 518       /* It should be on.  */
 519       if (buf->newline_cache == 0)
 520         buf->newline_cache = new_region_cache ();
 521     }
 522 }
 523
 524 \f
 525 /* Search for COUNT instances of the character TARGET between START and END.
 526
 527    If COUNT is positive, search forwards; END must be >= START.
 528    If COUNT is negative, search backwards for the -COUNTth instance;
 529       END must be <= START.
 530    If COUNT is zero, do anything you please; run rogue, for all I care.
 531
 532    If END is zero, use BEGV or ZV instead, as appropriate for the
 533    direction indicated by COUNT.
 534
 535    If we find COUNT instances, set *SHORTAGE to zero, and return the
 536    position past the COUNTth match.  Note that for reverse motion
 537    this is not the same as the usual convention for Emacs motion commands.
 538
 539    If we don't find COUNT instances before reaching END, set *SHORTAGE
 540    to the number of TARGETs left unfound, and return END.
 541
 542    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 543    except when inside redisplay.  */
 544
 545 int
 546 scan_buffer (target, start, end, count, shortage, allow_quit)
 547      register int target;
 548      int start, end;
 549      int count;
 550      int *shortage;
 551      int allow_quit;
 552 {
 553   struct region_cache *newline_cache;
 554   int direction;
 555
 556   if (count > 0)
 557     {
 558       direction = 1;
 559       if (! end) end = ZV;
 560     }
 561   else
 562     {
 563       direction = -1;
 564       if (! end) end = BEGV;
 565     }
 566
 567   newline_cache_on_off (current_buffer);
 568   newline_cache = current_buffer->newline_cache;
 569
 570   if (shortage != 0)
 571     *shortage = 0;
 572
 573   immediate_quit = allow_quit;
 574
 575   if (count > 0)
 576     while (start != end)
 577       {
 578         /* Our innermost scanning loop is very simple; it doesn't know
 579            about gaps, buffer ends, or the newline cache.  ceiling is
 580            the position of the last character before the next such
 581            obstacle --- the last character the dumb search loop should
 582            examine.  */
 583         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 584         int start_byte = CHAR_TO_BYTE (start);
 585         int tem;
 586
 587         /* If we're looking for a newline, consult the newline cache
 588            to see where we can avoid some scanning.  */
 589         if (target == '\n' && newline_cache)
 590           {
 591             int next_change;
 592             immediate_quit = 0;
 593             while (region_cache_forward
 594                    (current_buffer, newline_cache, start_byte, &next_change))
 595               start_byte = next_change;
 596             immediate_quit = allow_quit;
 597
 598             /* START should never be after END.  */
 599             if (start_byte > ceiling_byte)
 600               start_byte = ceiling_byte;
 601
 602             /* Now the text after start is an unknown region, and
 603                next_change is the position of the next known region. */
 604             ceiling_byte = min (next_change - 1, ceiling_byte);
 605           }
 606
 607         /* The dumb loop can only scan text stored in contiguous
 608            bytes. BUFFER_CEILING_OF returns the last character
 609            position that is contiguous, so the ceiling is the
 610            position after that.  */
 611         tem = BUFFER_CEILING_OF (start_byte);
 612         ceiling_byte = min (tem, ceiling_byte);
 613
 614         {
 615           /* The termination address of the dumb loop.  */
 616           register unsigned char *ceiling_addr
 617             = BYTE_POS_ADDR (ceiling_byte) + 1;
 618           register unsigned char *cursor
 619             = BYTE_POS_ADDR (start_byte);
 620           unsigned char *base = cursor;
 621
 622           while (cursor < ceiling_addr)
 623             {
 624               unsigned char *scan_start = cursor;
 625
 626               /* The dumb loop.  */
 627               while (*cursor != target && ++cursor < ceiling_addr)
 628                 ;
 629
 630               /* If we're looking for newlines, cache the fact that
 631                  the region from start to cursor is free of them. */
 632               if (target == '\n' && newline_cache)
 633                 know_region_cache (current_buffer, newline_cache,
 634                                    start_byte + scan_start - base,
 635                                    start_byte + cursor - base);
 636
 637               /* Did we find the target character?  */
 638               if (cursor < ceiling_addr)
 639                 {
 640                   if (--count == 0)
 641                     {
 642                       immediate_quit = 0;
 643                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 644                     }
 645                   cursor++;
 646                 }
 647             }
 648
 649           start = BYTE_TO_CHAR (start_byte + cursor - base);
 650         }
 651       }
 652   else
 653     while (start > end)
 654       {
 655         /* The last character to check before the next obstacle.  */
 656         int ceiling_byte = CHAR_TO_BYTE (end);
 657         int start_byte = CHAR_TO_BYTE (start);
 658         int tem;
 659
 660         /* Consult the newline cache, if appropriate.  */
 661         if (target == '\n' && newline_cache)
 662           {
 663             int next_change;
 664             immediate_quit = 0;
 665             while (region_cache_backward
 666                    (current_buffer, newline_cache, start_byte, &next_change))
 667               start_byte = next_change;
 668             immediate_quit = allow_quit;
 669
 670             /* Start should never be at or before end.  */
 671             if (start_byte <= ceiling_byte)
 672               start_byte = ceiling_byte + 1;
 673
 674             /* Now the text before start is an unknown region, and
 675                next_change is the position of the next known region. */
 676             ceiling_byte = max (next_change, ceiling_byte);
 677           }
 678
 679         /* Stop scanning before the gap.  */
 680         tem = BUFFER_FLOOR_OF (start_byte - 1);
 681         ceiling_byte = max (tem, ceiling_byte);
 682
 683         {
 684           /* The termination address of the dumb loop.  */
 685           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 686           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 687           unsigned char *base = cursor;
 688
 689           while (cursor >= ceiling_addr)
 690             {
 691               unsigned char *scan_start = cursor;
 692
 693               while (*cursor != target && --cursor >= ceiling_addr)
 694                 ;
 695
 696               /* If we're looking for newlines, cache the fact that
 697                  the region from after the cursor to start is free of them.  */
 698               if (target == '\n' && newline_cache)
 699                 know_region_cache (current_buffer, newline_cache,
 700                                    start_byte + cursor - base,
 701                                    start_byte + scan_start - base);
 702
 703               /* Did we find the target character?  */
 704               if (cursor >= ceiling_addr)
 705                 {
 706                   if (++count >= 0)
 707                     {
 708                       immediate_quit = 0;
 709                       return BYTE_TO_CHAR (start_byte + cursor - base);
 710                     }
 711                   cursor--;
 712                 }
 713             }
 714
 715           start = BYTE_TO_CHAR (start_byte + cursor - base);
 716         }
 717       }
 718
 719   immediate_quit = 0;
 720   if (shortage != 0)
 721     *shortage = count * direction;
 722   return start;
 723 }
 724 \f
 725 /* Search for COUNT instances of a line boundary, which means either a
 726    newline or (if selective display enabled) a carriage return.
 727    Start at START.  If COUNT is negative, search backwards.
 728
 729    We report the resulting position by calling TEMP_SET_PT_BOTH.
 730
 731    If we find COUNT instances. we position after (always after,
 732    even if scanning backwards) the COUNTth match, and return 0.
 733
 734    If we don't find COUNT instances before reaching the end of the
 735    buffer (or the beginning, if scanning backwards), we return
 736    the number of line boundaries left unfound, and position at
 737    the limit we bumped up against.
 738
 739    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 740    except in special cases.  */
 741
 742 int
 743 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 744      int start, start_byte;
 745      int limit, limit_byte;
 746      register int count;
 747      int allow_quit;
 748 {
 749   int direction = ((count > 0) ? 1 : -1);
 750
 751   register unsigned char *cursor;
 752   unsigned char *base;
 753
 754   register int ceiling;
 755   register unsigned char *ceiling_addr;
 756
 757   int old_immediate_quit = immediate_quit;
 758
 759   /* The code that follows is like scan_buffer
 760      but checks for either newline or carriage return.  */
 761
 762   if (allow_quit)
 763     immediate_quit++;
 764
 765   start_byte = CHAR_TO_BYTE (start);
 766
 767   if (count > 0)
 768     {
 769       while (start_byte < limit_byte)
 770         {
 771           ceiling =  BUFFER_CEILING_OF (start_byte);
 772           ceiling = min (limit_byte - 1, ceiling);
 773           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 774           base = (cursor = BYTE_POS_ADDR (start_byte));
 775           while (1)
 776             {
 777               while (*cursor != '\n' && ++cursor != ceiling_addr)
 778                 ;
 779
 780               if (cursor != ceiling_addr)
 781                 {
 782                   if (--count == 0)
 783                     {
 784                       immediate_quit = old_immediate_quit;
 785                       start_byte = start_byte + cursor - base + 1;
 786                       start = BYTE_TO_CHAR (start_byte);
 787                       TEMP_SET_PT_BOTH (start, start_byte);
 788                       return 0;
 789                     }
 790                   else
 791                     if (++cursor == ceiling_addr)
 792                       break;
 793                 }
 794               else
 795                 break;
 796             }
 797           start_byte += cursor - base;
 798         }
 799     }
 800   else
 801     {
 802       while (start_byte > limit_byte)
 803         {
 804           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 805           ceiling = max (limit_byte, ceiling);
 806           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 807           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 808           while (1)
 809             {
 810               while (--cursor != ceiling_addr && *cursor != '\n')
 811                 ;
 812
 813               if (cursor != ceiling_addr)
 814                 {
 815                   if (++count == 0)
 816                     {
 817                       immediate_quit = old_immediate_quit;
 818                       /* Return the position AFTER the match we found.  */
 819                       start_byte = start_byte + cursor - base + 1;
 820                       start = BYTE_TO_CHAR (start_byte);
 821                       TEMP_SET_PT_BOTH (start, start_byte);
 822                       return 0;
 823                     }
 824                 }
 825               else
 826                 break;
 827             }
 828           /* Here we add 1 to compensate for the last decrement
 829              of CURSOR, which took it past the valid range.  */
 830           start_byte += cursor - base + 1;
 831         }
 832     }
 833
 834   TEMP_SET_PT_BOTH (limit, limit_byte);
 835   immediate_quit = old_immediate_quit;
 836
 837   return count * direction;
 838 }
 839
 840 int
 841 find_next_newline_no_quit (from, cnt)
 842      register int from, cnt;
 843 {
 844   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 845 }
 846
 847 /* Like find_next_newline, but returns position before the newline,
 848    not after, and only search up to TO.  This isn't just
 849    find_next_newline (...)-1, because you might hit TO.  */
 850
 851 int
 852 find_before_next_newline (from, to, cnt)
 853      int from, to, cnt;
 854 {
 855   int shortage;
 856   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 857
 858   if (shortage == 0)
 859     pos--;
 860
 861   return pos;
 862 }
 863 \f
 864 /* Subroutines of Lisp buffer search functions. */
 865
 866 static Lisp_Object
 867 search_command (string, bound, noerror, count, direction, RE, posix)
 868      Lisp_Object string, bound, noerror, count;
 869      int direction;
 870      int RE;
 871      int posix;
 872 {
 873   register int np;
 874   int lim, lim_byte;
 875   int n = direction;
 876
 877   if (!NILP (count))
 878     {
 879       CHECK_NUMBER (count);
 880       n *= XINT (count);
 881     }
 882
 883   CHECK_STRING (string);
 884   if (NILP (bound))
 885     {
 886       if (n > 0)
 887         lim = ZV, lim_byte = ZV_BYTE;
 888       else
 889         lim = BEGV, lim_byte = BEGV_BYTE;
 890     }
 891   else
 892     {
 893       CHECK_NUMBER_COERCE_MARKER (bound);
 894       lim = XINT (bound);
 895       if (n > 0 ? lim < PT : lim > PT)
 896         error ("Invalid search bound (wrong side of point)");
 897       if (lim > ZV)
 898         lim = ZV, lim_byte = ZV_BYTE;
 899       else if (lim < BEGV)
 900         lim = BEGV, lim_byte = BEGV_BYTE;
 901       else
 902         lim_byte = CHAR_TO_BYTE (lim);
 903     }
 904
 905   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 906                       (!NILP (current_buffer->case_fold_search)
 907                        ? current_buffer->case_canon_table
 908                        : Qnil),
 909                       (!NILP (current_buffer->case_fold_search)
 910                        ? current_buffer->case_eqv_table
 911                        : Qnil),
 912                       posix);
 913   if (np <= 0)
 914     {
 915       if (NILP (noerror))
 916         return signal_failure (string);
 917       if (!EQ (noerror, Qt))
 918         {
 919           if (lim < BEGV || lim > ZV)
 920             abort ();
 921           SET_PT_BOTH (lim, lim_byte);
 922           return Qnil;
 923 #if 0 /* This would be clean, but maybe programs depend on
 924          a value of nil here.  */
 925           np = lim;
 926 #endif
 927         }
 928       else
 929         return Qnil;
 930     }
 931
 932   if (np < BEGV || np > ZV)
 933     abort ();
 934
 935   SET_PT (np);
 936
 937   return make_number (np);
 938 }
 939 \f
 940 /* Return 1 if REGEXP it matches just one constant string.  */
 941
 942 static int
 943 trivial_regexp_p (regexp)
 944      Lisp_Object regexp;
 945 {
 946   int len = SBYTES (regexp);
 947   unsigned char *s = SDATA (regexp);
 948   while (--len >= 0)
 949     {
 950       switch (*s++)
 951         {
 952         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 953           return 0;
 954         case '\\':
 955           if (--len < 0)
 956             return 0;
 957           switch (*s++)
 958             {
 959             case '|': case '(': case ')': case '`': case '\'': case 'b':
 960             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 961             case 'S': case '=': case '{': case '}': case '_':
 962             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 963             case '1': case '2': case '3': case '4': case '5':
 964             case '6': case '7': case '8': case '9':
 965               return 0;
 966             }
 967         }
 968     }
 969   return 1;
 970 }
 971
 972 /* Search for the n'th occurrence of STRING in the current buffer,
 973    starting at position POS and stopping at position LIM,
 974    treating STRING as a literal string if RE is false or as
 975    a regular expression if RE is true.
 976
 977    If N is positive, searching is forward and LIM must be greater than POS.
 978    If N is negative, searching is backward and LIM must be less than POS.
 979
 980    Returns -x if x occurrences remain to be found (x > 0),
 981    or else the position at the beginning of the Nth occurrence
 982    (if searching backward) or the end (if searching forward).
 983
 984    POSIX is nonzero if we want full backtracking (POSIX style)
 985    for this pattern.  0 means backtrack only enough to get a valid match.  */
 986
 987 #define TRANSLATE(out, trt, d)                  \
 988 do                                              \
 989   {                                             \
 990     if (! NILP (trt))                           \
 991       {                                         \
 992         Lisp_Object temp;                       \
 993         temp = Faref (trt, make_number (d));    \
 994         if (INTEGERP (temp))                    \
 995           out = XINT (temp);                    \
 996         else                                    \
 997           out = d;                              \
 998       }                                         \
 999     else                                        \
1000       out = d;                                  \
1001   }                                             \
1002 while (0)
1003
1004 static int
1005 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1006                RE, trt, inverse_trt, posix)
1007      Lisp_Object string;
1008      int pos;
1009      int pos_byte;
1010      int lim;
1011      int lim_byte;
1012      int n;
1013      int RE;
1014      Lisp_Object trt;
1015      Lisp_Object inverse_trt;
1016      int posix;
1017 {
1018   int len = SCHARS (string);
1019   int len_byte = SBYTES (string);
1020   register int i;
1021
1022   if (running_asynch_code)
1023     save_search_regs ();
1024
1025   /* Searching 0 times means don't move.  */
1026   /* Null string is found at starting position.  */
1027   if (len == 0 || n == 0)
1028     {
1029       set_search_regs (pos_byte, 0);
1030       return pos;
1031     }
1032
1033   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1034     {
1035       unsigned char *p1, *p2;
1036       int s1, s2;
1037       struct re_pattern_buffer *bufp;
1038
1039       bufp = compile_pattern (string, &search_regs, trt, posix,
1040                               !NILP (current_buffer->enable_multibyte_characters));
1041
1042       immediate_quit = 1;       /* Quit immediately if user types ^G,
1043                                    because letting this function finish
1044                                    can take too long. */
1045       QUIT;                     /* Do a pending quit right away,
1046                                    to avoid paradoxical behavior */
1047       /* Get pointers and sizes of the two strings
1048          that make up the visible portion of the buffer. */
1049
1050       p1 = BEGV_ADDR;
1051       s1 = GPT_BYTE - BEGV_BYTE;
1052       p2 = GAP_END_ADDR;
1053       s2 = ZV_BYTE - GPT_BYTE;
1054       if (s1 < 0)
1055         {
1056           p2 = p1;
1057           s2 = ZV_BYTE - BEGV_BYTE;
1058           s1 = 0;
1059         }
1060       if (s2 < 0)
1061         {
1062           s1 = ZV_BYTE - BEGV_BYTE;
1063           s2 = 0;
1064         }
1065       re_match_object = Qnil;
1066
1067       while (n < 0)
1068         {
1069           int val;
1070           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1071                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1072                              &search_regs,
1073                              /* Don't allow match past current point */
1074                              pos_byte - BEGV_BYTE);
1075           if (val == -2)
1076             {
1077               matcher_overflow ();
1078             }
1079           if (val >= 0)
1080             {
1081               pos_byte = search_regs.start[0] + BEGV_BYTE;
1082               for (i = 0; i < search_regs.num_regs; i++)
1083                 if (search_regs.start[i] >= 0)
1084                   {
1085                     search_regs.start[i]
1086                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1087                     search_regs.end[i]
1088                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1089                   }
1090               XSETBUFFER (last_thing_searched, current_buffer);
1091               /* Set pos to the new position. */
1092               pos = search_regs.start[0];
1093             }
1094           else
1095             {
1096               immediate_quit = 0;
1097               return (n);
1098             }
1099           n++;
1100         }
1101       while (n > 0)
1102         {
1103           int val;
1104           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1105                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106                              &search_regs,
1107                              lim_byte - BEGV_BYTE);
1108           if (val == -2)
1109             {
1110               matcher_overflow ();
1111             }
1112           if (val >= 0)
1113             {
1114               pos_byte = search_regs.end[0] + BEGV_BYTE;
1115               for (i = 0; i < search_regs.num_regs; i++)
1116                 if (search_regs.start[i] >= 0)
1117                   {
1118                     search_regs.start[i]
1119                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1120                     search_regs.end[i]
1121                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1122                   }
1123               XSETBUFFER (last_thing_searched, current_buffer);
1124               pos = search_regs.end[0];
1125             }
1126           else
1127             {
1128               immediate_quit = 0;
1129               return (0 - n);
1130             }
1131           n--;
1132         }
1133       immediate_quit = 0;
1134       return (pos);
1135     }
1136   else                          /* non-RE case */
1137     {
1138       unsigned char *raw_pattern, *pat;
1139       int raw_pattern_size;
1140       int raw_pattern_size_byte;
1141       unsigned char *patbuf;
1142       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1143       unsigned char *base_pat = SDATA (string);
1144       /* Set to positive if we find a non-ASCII char that need
1145          translation.  Otherwise set to zero later.  */
1146       int char_base = -1;
1147       int boyer_moore_ok = 1;
1148
1149       /* MULTIBYTE says whether the text to be searched is multibyte.
1150          We must convert PATTERN to match that, or we will not really
1151          find things right.  */
1152
1153       if (multibyte == STRING_MULTIBYTE (string))
1154         {
1155           raw_pattern = (unsigned char *) SDATA (string);
1156           raw_pattern_size = SCHARS (string);
1157           raw_pattern_size_byte = SBYTES (string);
1158         }
1159       else if (multibyte)
1160         {
1161           raw_pattern_size = SCHARS (string);
1162           raw_pattern_size_byte
1163             = count_size_as_multibyte (SDATA (string),
1164                                        raw_pattern_size);
1165           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1166           copy_text (SDATA (string), raw_pattern,
1167                      SCHARS (string), 0, 1);
1168         }
1169       else
1170         {
1171           /* Converting multibyte to single-byte.
1172
1173              ??? Perhaps this conversion should be done in a special way
1174              by subtracting nonascii-insert-offset from each non-ASCII char,
1175              so that only the multibyte chars which really correspond to
1176              the chosen single-byte character set can possibly match.  */
1177           raw_pattern_size = SCHARS (string);
1178           raw_pattern_size_byte = SCHARS (string);
1179           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1180           copy_text (SDATA (string), raw_pattern,
1181                      SBYTES (string), 1, 0);
1182         }
1183
1184       /* Copy and optionally translate the pattern.  */
1185       len = raw_pattern_size;
1186       len_byte = raw_pattern_size_byte;
1187       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1188       pat = patbuf;
1189       base_pat = raw_pattern;
1190       if (multibyte)
1191         {
1192           /* Fill patbuf by translated characters in STRING while
1193              checking if we can use boyer-moore search.  If TRT is
1194              non-nil, we can use boyer-moore search only if TRT can be
1195              represented by the byte array of 256 elements.  For that,
1196              all non-ASCII case-equivalents of all case-senstive
1197              characters in STRING must belong to the same charset and
1198              row.  */
1199
1200           while (--len >= 0)
1201             {
1202               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1203               int c, translated, inverse;
1204               int in_charlen, charlen;
1205
1206               /* If we got here and the RE flag is set, it's because we're
1207                  dealing with a regexp known to be trivial, so the backslash
1208                  just quotes the next character.  */
1209               if (RE && *base_pat == '\\')
1210                 {
1211                   len--;
1212                   len_byte--;
1213                   base_pat++;
1214                 }
1215
1216               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1217
1218               if (NILP (trt))
1219                 {
1220                   str = base_pat;
1221                   charlen = in_charlen;
1222                 }
1223               else
1224                 {
1225                   /* Translate the character.  */
1226                   TRANSLATE (translated, trt, c);
1227                   charlen = CHAR_STRING (translated, str_base);
1228                   str = str_base;
1229
1230                   /* Check if C has any other case-equivalents.  */
1231                   TRANSLATE (inverse, inverse_trt, c);
1232                   /* If so, check if we can use boyer-moore.  */
1233                   if (c != inverse && boyer_moore_ok)
1234                     {
1235                       /* Check if all equivalents belong to the same
1236                          group of characters.  Note that the check of C
1237                          itself is done by the last iteration.  */
1238                       int this_char_base = -1;
1239
1240                       while (boyer_moore_ok)
1241                         {
1242                           if (ASCII_BYTE_P (inverse))
1243                             {
1244                               if (this_char_base > 0)
1245                                 boyer_moore_ok = 0;
1246                               else
1247                                 {
1248                                   this_char_base = 0;
1249                                   if (char_base < 0)
1250                                     char_base = this_char_base;
1251                                 }
1252                             }
1253                           else if (CHAR_BYTE8_P (inverse))
1254                             /* Boyer-moore search can't handle a
1255                                translation of an eight-bit
1256                                character.  */
1257                             boyer_moore_ok = 0;
1258                           else if (this_char_base < 0)
1259                             {
1260                               this_char_base = inverse & ~0x3F;
1261                               if (char_base < 0)
1262                                 char_base = this_char_base;
1263                               else if (char_base > 0
1264                                        && this_char_base != char_base)
1265                                 boyer_moore_ok = 0;
1266                             }
1267                           else if ((inverse & ~0x3F) != this_char_base)
1268                             boyer_moore_ok = 0;
1269                           if (c == inverse)
1270                             break;
1271                           TRANSLATE (inverse, inverse_trt, inverse);
1272                         }
1273                     }
1274                 }
1275               if (char_base < 0)
1276                 char_base = 0;
1277
1278               /* Store this character into the translated pattern.  */
1279               bcopy (str, pat, charlen);
1280               pat += charlen;
1281               base_pat += in_charlen;
1282               len_byte -= in_charlen;
1283             }
1284         }
1285       else
1286         {
1287           /* Unibyte buffer.  */
1288           char_base = 0;
1289           while (--len >= 0)
1290             {
1291               int c, translated;
1292
1293               /* If we got here and the RE flag is set, it's because we're
1294                  dealing with a regexp known to be trivial, so the backslash
1295                  just quotes the next character.  */
1296               if (RE && *base_pat == '\\')
1297                 {
1298                   len--;
1299                   raw_pattern_size--;
1300                   base_pat++;
1301                 }
1302               c = *base_pat++;
1303               TRANSLATE (translated, trt, c);
1304               *pat++ = translated;
1305             }
1306         }
1307
1308       len_byte = pat - patbuf;
1309       len = raw_pattern_size;
1310       pat = base_pat = patbuf;
1311
1312       if (boyer_moore_ok)
1313         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1314                             pos, pos_byte, lim, lim_byte,
1315                             char_base);
1316       else
1317         return simple_search (n, pat, len, len_byte, trt,
1318                               pos, pos_byte, lim, lim_byte);
1319     }
1320 }
1321 \f
1322 /* Do a simple string search N times for the string PAT,
1323    whose length is LEN/LEN_BYTE,
1324    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1325    TRT is the translation table.
1326
1327    Return the character position where the match is found.
1328    Otherwise, if M matches remained to be found, return -M.
1329
1330    This kind of search works regardless of what is in PAT and
1331    regardless of what is in TRT.  It is used in cases where
1332    boyer_moore cannot work.  */
1333
1334 static int
1335 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1336      int n;
1337      unsigned char *pat;
1338      int len, len_byte;
1339      Lisp_Object trt;
1340      int pos, pos_byte;
1341      int lim, lim_byte;
1342 {
1343   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1344   int forward = n > 0;
1345   /* Number of buffer bytes matched.  Note that this may be different
1346      from len_byte in a multibyte buffer.  */
1347   int match_byte;
1348
1349   if (lim > pos && multibyte)
1350     while (n > 0)
1351       {
1352         while (1)
1353           {
1354             /* Try matching at position POS.  */
1355             int this_pos = pos;
1356             int this_pos_byte = pos_byte;
1357             int this_len = len;
1358             int this_len_byte = len_byte;
1359             unsigned char *p = pat;
1360             if (pos + len > lim)
1361               goto stop;
1362
1363             while (this_len > 0)
1364               {
1365                 int charlen, buf_charlen;
1366                 int pat_ch, buf_ch;
1367
1368                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1369                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1370                                                  ZV_BYTE - this_pos_byte,
1371                                                  buf_charlen);
1372                 TRANSLATE (buf_ch, trt, buf_ch);
1373
1374                 if (buf_ch != pat_ch)
1375                   break;
1376
1377                 this_len_byte -= charlen;
1378                 this_len--;
1379                 p += charlen;
1380
1381                 this_pos_byte += buf_charlen;
1382                 this_pos++;
1383               }
1384
1385             if (this_len == 0)
1386               {
1387                 match_byte = this_pos_byte - pos_byte;
1388                 pos += len;
1389                 pos_byte += match_byte;
1390                 break;
1391               }
1392
1393             INC_BOTH (pos, pos_byte);
1394           }
1395
1396         n--;
1397       }
1398   else if (lim > pos)
1399     while (n > 0)
1400       {
1401         while (1)
1402           {
1403             /* Try matching at position POS.  */
1404             int this_pos = pos;
1405             int this_len = len;
1406             unsigned char *p = pat;
1407
1408             if (pos + len > lim)
1409               goto stop;
1410
1411             while (this_len > 0)
1412               {
1413                 int pat_ch = *p++;
1414                 int buf_ch = FETCH_BYTE (this_pos);
1415                 TRANSLATE (buf_ch, trt, buf_ch);
1416
1417                 if (buf_ch != pat_ch)
1418                   break;
1419
1420                 this_len--;
1421                 this_pos++;
1422               }
1423
1424             if (this_len == 0)
1425               {
1426                 match_byte = len;
1427                 pos += len;
1428                 break;
1429               }
1430
1431             pos++;
1432           }
1433
1434         n--;
1435       }
1436   /* Backwards search.  */
1437   else if (lim < pos && multibyte)
1438     while (n < 0)
1439       {
1440         while (1)
1441           {
1442             /* Try matching at position POS.  */
1443             int this_pos = pos - len;
1444             int this_pos_byte;
1445             int this_len = len;
1446             int this_len_byte = len_byte;
1447             unsigned char *p = pat;
1448
1449             if (pos - len < lim)
1450               goto stop;
1451             this_pos_byte = CHAR_TO_BYTE (this_pos);
1452             match_byte = pos_byte - this_pos_byte;
1453
1454             while (this_len > 0)
1455               {
1456                 int charlen, buf_charlen;
1457                 int pat_ch, buf_ch;
1458
1459                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1460                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1461                                                  ZV_BYTE - this_pos_byte,
1462                                                  buf_charlen);
1463                 TRANSLATE (buf_ch, trt, buf_ch);
1464
1465                 if (buf_ch != pat_ch)
1466                   break;
1467
1468                 this_len_byte -= charlen;
1469                 this_len--;
1470                 p += charlen;
1471                 this_pos_byte += buf_charlen;
1472                 this_pos++;
1473               }
1474
1475             if (this_len == 0)
1476               {
1477                 pos -= len;
1478                 pos_byte -= match_byte;
1479                 break;
1480               }
1481
1482             DEC_BOTH (pos, pos_byte);
1483           }
1484
1485         n++;
1486       }
1487   else if (lim < pos)
1488     while (n < 0)
1489       {
1490         while (1)
1491           {
1492             /* Try matching at position POS.  */
1493             int this_pos = pos - len;
1494             int this_len = len;
1495             unsigned char *p = pat;
1496
1497             if (pos - len < lim)
1498               goto stop;
1499
1500             while (this_len > 0)
1501               {
1502                 int pat_ch = *p++;
1503                 int buf_ch = FETCH_BYTE (this_pos);
1504                 TRANSLATE (buf_ch, trt, buf_ch);
1505
1506                 if (buf_ch != pat_ch)
1507                   break;
1508                 this_len--;
1509                 this_pos++;
1510               }
1511
1512             if (this_len == 0)
1513               {
1514                 match_byte = len;
1515                 pos -= len;
1516                 break;
1517               }
1518
1519             pos--;
1520           }
1521
1522         n++;
1523       }
1524
1525  stop:
1526   if (n == 0)
1527     {
1528       if (forward)
1529         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1530       else
1531         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1532
1533       return pos;
1534     }
1535   else if (n > 0)
1536     return -n;
1537   else
1538     return n;
1539 }
1540 \f
1541 /* Do Boyer-Moore search N times for the string BASE_PAT,
1542    whose length is LEN/LEN_BYTE,
1543    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1544    DIRECTION says which direction we search in.
1545    TRT and INVERSE_TRT are translation tables.
1546    Characters in PAT are already translated by TRT.
1547
1548    This kind of search works if all the characters in BASE_PAT that
1549    have nontrivial translation are the same aside from the last byte.
1550    This makes it possible to translate just the last byte of a
1551    character, and do so after just a simple test of the context.
1552    CHAR_BASE is nonzero iff there is such a non-ASCII character.
1553
1554    If that criterion is not satisfied, do not call this function.  */
1555
1556 static int
1557 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1558              pos, pos_byte, lim, lim_byte, char_base)
1559      int n;
1560      unsigned char *base_pat;
1561      int len, len_byte;
1562      Lisp_Object trt;
1563      Lisp_Object inverse_trt;
1564      int pos, pos_byte;
1565      int lim, lim_byte;
1566      int char_base;
1567 {
1568   int direction = ((n > 0) ? 1 : -1);
1569   register int dirlen;
1570   int infinity, limit, stride_for_teases = 0;
1571   register int *BM_tab;
1572   int *BM_tab_base;
1573   register unsigned char *cursor, *p_limit;
1574   register int i, j;
1575   unsigned char *pat, *pat_end;
1576   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1577
1578   unsigned char simple_translate[0400];
1579   /* These are set to the preceding bytes of a byte to be translated
1580      if char_base is nonzero.  As the maximum byte length of a
1581      multibyte character is 5, we have to check at most four previous
1582      bytes.  */
1583   int translate_prev_byte1 = 0;
1584   int translate_prev_byte2 = 0;
1585   int translate_prev_byte3 = 0;
1586   int translate_prev_byte4 = 0;
1587
1588 #ifdef C_ALLOCA
1589   int BM_tab_space[0400];
1590   BM_tab = &BM_tab_space[0];
1591 #else
1592   BM_tab = (int *) alloca (0400 * sizeof (int));
1593 #endif
1594   /* The general approach is that we are going to maintain that we know */
1595   /* the first (closest to the present position, in whatever direction */
1596   /* we're searching) character that could possibly be the last */
1597   /* (furthest from present position) character of a valid match.  We */
1598   /* advance the state of our knowledge by looking at that character */
1599   /* and seeing whether it indeed matches the last character of the */
1600   /* pattern.  If it does, we take a closer look.  If it does not, we */
1601   /* move our pointer (to putative last characters) as far as is */
1602   /* logically possible.  This amount of movement, which I call a */
1603   /* stride, will be the length of the pattern if the actual character */
1604   /* appears nowhere in the pattern, otherwise it will be the distance */
1605   /* from the last occurrence of that character to the end of the */
1606   /* pattern. */
1607   /* As a coding trick, an enormous stride is coded into the table for */
1608   /* characters that match the last character.  This allows use of only */
1609   /* a single test, a test for having gone past the end of the */
1610   /* permissible match region, to test for both possible matches (when */
1611   /* the stride goes past the end immediately) and failure to */
1612   /* match (where you get nudged past the end one stride at a time). */
1613
1614   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1615   /* is determined only by the last character of the putative match. */
1616   /* If that character does not match, we will stride the proper */
1617   /* distance to propose a match that superimposes it on the last */
1618   /* instance of a character that matches it (per trt), or misses */
1619   /* it entirely if there is none. */
1620
1621   dirlen = len_byte * direction;
1622   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1623
1624   /* Record position after the end of the pattern.  */
1625   pat_end = base_pat + len_byte;
1626   /* BASE_PAT points to a character that we start scanning from.
1627      It is the first character in a forward search,
1628      the last character in a backward search.  */
1629   if (direction < 0)
1630     base_pat = pat_end - 1;
1631
1632   BM_tab_base = BM_tab;
1633   BM_tab += 0400;
1634   j = dirlen;           /* to get it in a register */
1635   /* A character that does not appear in the pattern induces a */
1636   /* stride equal to the pattern length. */
1637   while (BM_tab_base != BM_tab)
1638     {
1639       *--BM_tab = j;
1640       *--BM_tab = j;
1641       *--BM_tab = j;
1642       *--BM_tab = j;
1643     }
1644
1645   /* We use this for translation, instead of TRT itself.
1646      We fill this in to handle the characters that actually
1647      occur in the pattern.  Others don't matter anyway!  */
1648   bzero (simple_translate, sizeof simple_translate);
1649   for (i = 0; i < 0400; i++)
1650     simple_translate[i] = i;
1651
1652   if (char_base)
1653     {
1654       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1655          byte following them are the target of translation.  */
1656       unsigned char str[MAX_MULTIBYTE_LENGTH];
1657       int len = CHAR_STRING (char_base, str);
1658
1659       translate_prev_byte1 = str[len - 2];
1660       if (len > 2)
1661         {
1662           translate_prev_byte2 = str[len - 3];
1663           if (len > 3)
1664             {
1665               translate_prev_byte3 = str[len - 4];
1666               if (len > 4)
1667                 translate_prev_byte4 = str[len - 5];
1668             }
1669         }
1670     }
1671
1672   i = 0;
1673   while (i != infinity)
1674     {
1675       unsigned char *ptr = base_pat + i;
1676       i += direction;
1677       if (i == dirlen)
1678         i = infinity;
1679       if (! NILP (trt))
1680         {
1681           /* If the byte currently looking at is the last of a
1682              character to check case-equivalents, set CH to that
1683              character.  An ASCII character and a non-ASCII character
1684              matching with CHAR_BASE are to be checked.  */
1685           int ch = -1;
1686
1687           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1688             ch = *ptr;
1689           else if (char_base
1690                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1691             {
1692               unsigned char *charstart = ptr - 1;
1693
1694               while (! (CHAR_HEAD_P (*charstart)))
1695                 charstart--;
1696               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1697               if (char_base != (ch & ~0x3F))
1698                 ch = -1;
1699             }
1700
1701           if (ch > 0400)
1702             j = (ch & 0x3F) | 0200;
1703           else
1704             j = *ptr;
1705
1706           if (i == infinity)
1707             stride_for_teases = BM_tab[j];
1708
1709           BM_tab[j] = dirlen - i;
1710           /* A translation table is accompanied by its inverse -- see */
1711           /* comment following downcase_table for details */
1712           if (ch >= 0)
1713             {
1714               int starting_ch = ch;
1715               int starting_j = j;
1716
1717               while (1)
1718                 {
1719                   TRANSLATE (ch, inverse_trt, ch);
1720                   if (ch > 0400)
1721                     j = (ch & 0x3F) | 0200;
1722                   else
1723                     j = ch;
1724
1725                   /* For all the characters that map into CH,
1726                      set up simple_translate to map the last byte
1727                      into STARTING_J.  */
1728                   simple_translate[j] = starting_j;
1729                   if (ch == starting_ch)
1730                     break;
1731                   BM_tab[j] = dirlen - i;
1732                 }
1733             }
1734         }
1735       else
1736         {
1737           j = *ptr;
1738
1739           if (i == infinity)
1740             stride_for_teases = BM_tab[j];
1741           BM_tab[j] = dirlen - i;
1742         }
1743       /* stride_for_teases tells how much to stride if we get a */
1744       /* match on the far character but are subsequently */
1745       /* disappointed, by recording what the stride would have been */
1746       /* for that character if the last character had been */
1747       /* different. */
1748     }
1749   infinity = dirlen - infinity;
1750   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1751   /* loop invariant - POS_BYTE points at where last char (first
1752      char if reverse) of pattern would align in a possible match.  */
1753   while (n != 0)
1754     {
1755       int tail_end;
1756       unsigned char *tail_end_ptr;
1757
1758       /* It's been reported that some (broken) compiler thinks that
1759          Boolean expressions in an arithmetic context are unsigned.
1760          Using an explicit ?1:0 prevents this.  */
1761       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1762           < 0)
1763         return (n * (0 - direction));
1764       /* First we do the part we can by pointers (maybe nothing) */
1765       QUIT;
1766       pat = base_pat;
1767       limit = pos_byte - dirlen + direction;
1768       if (direction > 0)
1769         {
1770           limit = BUFFER_CEILING_OF (limit);
1771           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1772              can take on without hitting edge of buffer or the gap.  */
1773           limit = min (limit, pos_byte + 20000);
1774           limit = min (limit, lim_byte - 1);
1775         }
1776       else
1777         {
1778           limit = BUFFER_FLOOR_OF (limit);
1779           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1780              can take on without hitting edge of buffer or the gap.  */
1781           limit = max (limit, pos_byte - 20000);
1782           limit = max (limit, lim_byte);
1783         }
1784       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1785       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1786
1787       if ((limit - pos_byte) * direction > 20)
1788         {
1789           unsigned char *p2;
1790
1791           p_limit = BYTE_POS_ADDR (limit);
1792           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1793           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1794           while (1)             /* use one cursor setting as long as i can */
1795             {
1796               if (direction > 0) /* worth duplicating */
1797                 {
1798                   /* Use signed comparison if appropriate
1799                      to make cursor+infinity sure to be > p_limit.
1800                      Assuming that the buffer lies in a range of addresses
1801                      that are all "positive" (as ints) or all "negative",
1802                      either kind of comparison will work as long
1803                      as we don't step by infinity.  So pick the kind
1804                      that works when we do step by infinity.  */
1805                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1806                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1807                       cursor += BM_tab[*cursor];
1808                   else
1809                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1810                       cursor += BM_tab[*cursor];
1811                 }
1812               else
1813                 {
1814                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1815                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1816                       cursor += BM_tab[*cursor];
1817                   else
1818                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1819                       cursor += BM_tab[*cursor];
1820                 }
1821 /* If you are here, cursor is beyond the end of the searched region. */
1822 /* This can happen if you match on the far character of the pattern, */
1823 /* because the "stride" of that character is infinity, a number able */
1824 /* to throw you well beyond the end of the search.  It can also */
1825 /* happen if you fail to match within the permitted region and would */
1826 /* otherwise try a character beyond that region */
1827               if ((cursor - p_limit) * direction <= len_byte)
1828                 break;  /* a small overrun is genuine */
1829               cursor -= infinity; /* large overrun = hit */
1830               i = dirlen - direction;
1831               if (! NILP (trt))
1832                 {
1833                   while ((i -= direction) + direction != 0)
1834                     {
1835                       int ch;
1836                       cursor -= direction;
1837                       /* Translate only the last byte of a character.  */
1838                       if (! multibyte
1839                           || ((cursor == tail_end_ptr
1840                                || CHAR_HEAD_P (cursor[1]))
1841                               && (CHAR_HEAD_P (cursor[0])
1842                                   /* Check if this is the last byte of
1843                                      a translable character.  */
1844                                   || (translate_prev_byte1 == cursor[-1]
1845                                       && (CHAR_HEAD_P (translate_prev_byte1)
1846                                           || (translate_prev_byte2 == cursor[-2]
1847                                               && (CHAR_HEAD_P (translate_prev_byte2)
1848                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1849                         ch = simple_translate[*cursor];
1850                       else
1851                         ch = *cursor;
1852                       if (pat[i] != ch)
1853                         break;
1854                     }
1855                 }
1856               else
1857                 {
1858                   while ((i -= direction) + direction != 0)
1859                     {
1860                       cursor -= direction;
1861                       if (pat[i] != *cursor)
1862                         break;
1863                     }
1864                 }
1865               cursor += dirlen - i - direction; /* fix cursor */
1866               if (i + direction == 0)
1867                 {
1868                   int position;
1869
1870                   cursor -= direction;
1871
1872                   position = pos_byte + cursor - p2 + ((direction > 0)
1873                                                        ? 1 - len_byte : 0);
1874                   set_search_regs (position, len_byte);
1875
1876                   if ((n -= direction) != 0)
1877                     cursor += dirlen; /* to resume search */
1878                   else
1879                     return ((direction > 0)
1880                             ? search_regs.end[0] : search_regs.start[0]);
1881                 }
1882               else
1883                 cursor += stride_for_teases; /* <sigh> we lose -  */
1884             }
1885           pos_byte += cursor - p2;
1886         }
1887       else
1888         /* Now we'll pick up a clump that has to be done the hard */
1889         /* way because it covers a discontinuity */
1890         {
1891           limit = ((direction > 0)
1892                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1893                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1894           limit = ((direction > 0)
1895                    ? min (limit + len_byte, lim_byte - 1)
1896                    : max (limit - len_byte, lim_byte));
1897           /* LIMIT is now the last value POS_BYTE can have
1898              and still be valid for a possible match.  */
1899           while (1)
1900             {
1901               /* This loop can be coded for space rather than */
1902               /* speed because it will usually run only once. */
1903               /* (the reach is at most len + 21, and typically */
1904               /* does not exceed len) */
1905               while ((limit - pos_byte) * direction >= 0)
1906                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1907               /* now run the same tests to distinguish going off the */
1908               /* end, a match or a phony match. */
1909               if ((pos_byte - limit) * direction <= len_byte)
1910                 break;  /* ran off the end */
1911               /* Found what might be a match.
1912                  Set POS_BYTE back to last (first if reverse) pos.  */
1913               pos_byte -= infinity;
1914               i = dirlen - direction;
1915               while ((i -= direction) + direction != 0)
1916                 {
1917                   int ch;
1918                   unsigned char *ptr;
1919                   pos_byte -= direction;
1920                   ptr = BYTE_POS_ADDR (pos_byte);
1921                   /* Translate only the last byte of a character.  */
1922                   if (! multibyte
1923                       || ((ptr == tail_end_ptr
1924                            || CHAR_HEAD_P (ptr[1]))
1925                           && (CHAR_HEAD_P (ptr[0])
1926                               /* Check if this is the last byte of a
1927                                  translable character.  */
1928                               || (translate_prev_byte1 == ptr[-1]
1929                                   && (CHAR_HEAD_P (translate_prev_byte1)
1930                                       || (translate_prev_byte2 == ptr[-2]
1931                                           && (CHAR_HEAD_P (translate_prev_byte2)
1932                                               || translate_prev_byte3 == ptr[-3])))))))
1933                     ch = simple_translate[*ptr];
1934                   else
1935                     ch = *ptr;
1936                   if (pat[i] != ch)
1937                     break;
1938                 }
1939               /* Above loop has moved POS_BYTE part or all the way
1940                  back to the first pos (last pos if reverse).
1941                  Set it once again at the last (first if reverse) char.  */
1942               pos_byte += dirlen - i- direction;
1943               if (i + direction == 0)
1944                 {
1945                   int position;
1946                   pos_byte -= direction;
1947
1948                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1949
1950                   set_search_regs (position, len_byte);
1951
1952                   if ((n -= direction) != 0)
1953                     pos_byte += dirlen; /* to resume search */
1954                   else
1955                     return ((direction > 0)
1956                             ? search_regs.end[0] : search_regs.start[0]);
1957                 }
1958               else
1959                 pos_byte += stride_for_teases;
1960             }
1961           }
1962       /* We have done one clump.  Can we continue? */
1963       if ((lim_byte - pos_byte) * direction < 0)
1964         return ((0 - n) * direction);
1965     }
1966   return BYTE_TO_CHAR (pos_byte);
1967 }
1968
1969 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1970    for the overall match just found in the current buffer.
1971    Also clear out the match data for registers 1 and up.  */
1972
1973 static void
1974 set_search_regs (beg_byte, nbytes)
1975      int beg_byte, nbytes;
1976 {
1977   int i;
1978
1979   /* Make sure we have registers in which to store
1980      the match position.  */
1981   if (search_regs.num_regs == 0)
1982     {
1983       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1984       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1985       search_regs.num_regs = 2;
1986     }
1987
1988   /* Clear out the other registers.  */
1989   for (i = 1; i < search_regs.num_regs; i++)
1990     {
1991       search_regs.start[i] = -1;
1992       search_regs.end[i] = -1;
1993     }
1994
1995   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1996   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1997   XSETBUFFER (last_thing_searched, current_buffer);
1998 }
1999 \f
2000 /* Given a string of words separated by word delimiters,
2001   compute a regexp that matches those exact words
2002   separated by arbitrary punctuation.  */
2003
2004 static Lisp_Object
2005 wordify (string)
2006      Lisp_Object string;
2007 {
2008   register unsigned char *p, *o;
2009   register int i, i_byte, len, punct_count = 0, word_count = 0;
2010   Lisp_Object val;
2011   int prev_c = 0;
2012   int adjust;
2013
2014   CHECK_STRING (string);
2015   p = SDATA (string);
2016   len = SCHARS (string);
2017
2018   for (i = 0, i_byte = 0; i < len; )
2019     {
2020       int c;
2021
2022       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2023
2024       if (SYNTAX (c) != Sword)
2025         {
2026           punct_count++;
2027           if (i > 0 && SYNTAX (prev_c) == Sword)
2028             word_count++;
2029         }
2030
2031       prev_c = c;
2032     }
2033
2034   if (SYNTAX (prev_c) == Sword)
2035     word_count++;
2036   if (!word_count)
2037     return empty_string;
2038
2039   adjust = - punct_count + 5 * (word_count - 1) + 4;
2040   if (STRING_MULTIBYTE (string))
2041     val = make_uninit_multibyte_string (len + adjust,
2042                                         SBYTES (string)
2043                                         + adjust);
2044   else
2045     val = make_uninit_string (len + adjust);
2046
2047   o = SDATA (val);
2048   *o++ = '\\';
2049   *o++ = 'b';
2050   prev_c = 0;
2051
2052   for (i = 0, i_byte = 0; i < len; )
2053     {
2054       int c;
2055       int i_byte_orig = i_byte;
2056
2057       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2058
2059       if (SYNTAX (c) == Sword)
2060         {
2061           bcopy (SDATA (string) + i_byte_orig, o,
2062                  i_byte - i_byte_orig);
2063           o += i_byte - i_byte_orig;
2064         }
2065       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2066         {
2067           *o++ = '\\';
2068           *o++ = 'W';
2069           *o++ = '\\';
2070           *o++ = 'W';
2071           *o++ = '*';
2072         }
2073
2074       prev_c = c;
2075     }
2076
2077   *o++ = '\\';
2078   *o++ = 'b';
2079
2080   return val;
2081 }
2082 \f
2083 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2084        "MSearch backward: ",
2085        doc: /* Search backward from point for STRING.
2086 Set point to the beginning of the occurrence found, and return point.
2087 An optional second argument bounds the search; it is a buffer position.
2088 The match found must not extend before that position.
2089 Optional third argument, if t, means if fail just return nil (no error).
2090  If not nil and not t, position at limit of search and return nil.
2091 Optional fourth argument is repeat count--search for successive occurrences.
2092
2093 Search case-sensitivity is determined by the value of the variable
2094 `case-fold-search', which see.
2095
2096 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2097      (string, bound, noerror, count)
2098      Lisp_Object string, bound, noerror, count;
2099 {
2100   return search_command (string, bound, noerror, count, -1, 0, 0);
2101 }
2102
2103 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2104        doc: /* Search forward from point for STRING.
2105 Set point to the end of the occurrence found, and return point.
2106 An optional second argument bounds the search; it is a buffer position.
2107 The match found must not extend after that position.  nil is equivalent
2108   to (point-max).
2109 Optional third argument, if t, means if fail just return nil (no error).
2110   If not nil and not t, move to limit of search and return nil.
2111 Optional fourth argument is repeat count--search for successive occurrences.
2112
2113 Search case-sensitivity is determined by the value of the variable
2114 `case-fold-search', which see.
2115
2116 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2117      (string, bound, noerror, count)
2118      Lisp_Object string, bound, noerror, count;
2119 {
2120   return search_command (string, bound, noerror, count, 1, 0, 0);
2121 }
2122
2123 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2124        "sWord search backward: ",
2125        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2126 Set point to the beginning of the occurrence found, and return point.
2127 An optional second argument bounds the search; it is a buffer position.
2128 The match found must not extend before that position.
2129 Optional third argument, if t, means if fail just return nil (no error).
2130   If not nil and not t, move to limit of search and return nil.
2131 Optional fourth argument is repeat count--search for successive occurrences.  */)
2132      (string, bound, noerror, count)
2133      Lisp_Object string, bound, noerror, count;
2134 {
2135   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2136 }
2137
2138 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2139        "sWord search: ",
2140        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2141 Set point to the end of the occurrence found, and return point.
2142 An optional second argument bounds the search; it is a buffer position.
2143 The match found must not extend after that position.
2144 Optional third argument, if t, means if fail just return nil (no error).
2145   If not nil and not t, move to limit of search and return nil.
2146 Optional fourth argument is repeat count--search for successive occurrences.  */)
2147      (string, bound, noerror, count)
2148      Lisp_Object string, bound, noerror, count;
2149 {
2150   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2151 }
2152
2153 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2154        "sRE search backward: ",
2155        doc: /* Search backward from point for match for regular expression REGEXP.
2156 Set point to the beginning of the match, and return point.
2157 The match found is the one starting last in the buffer
2158 and yet ending before the origin of the search.
2159 An optional second argument bounds the search; it is a buffer position.
2160 The match found must start at or after that position.
2161 Optional third argument, if t, means if fail just return nil (no error).
2162   If not nil and not t, move to limit of search and return nil.
2163 Optional fourth argument is repeat count--search for successive occurrences.
2164 See also the functions `match-beginning', `match-end', `match-string',
2165 and `replace-match'.  */)
2166      (regexp, bound, noerror, count)
2167      Lisp_Object regexp, bound, noerror, count;
2168 {
2169   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2170 }
2171
2172 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2173        "sRE search: ",
2174        doc: /* Search forward from point for regular expression REGEXP.
2175 Set point to the end of the occurrence found, and return point.
2176 An optional second argument bounds the search; it is a buffer position.
2177 The match found must not extend after that position.
2178 Optional third argument, if t, means if fail just return nil (no error).
2179   If not nil and not t, move to limit of search and return nil.
2180 Optional fourth argument is repeat count--search for successive occurrences.
2181 See also the functions `match-beginning', `match-end', `match-string',
2182 and `replace-match'.  */)
2183      (regexp, bound, noerror, count)
2184      Lisp_Object regexp, bound, noerror, count;
2185 {
2186   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2187 }
2188
2189 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2190        "sPosix search backward: ",
2191        doc: /* Search backward from point for match for regular expression REGEXP.
2192 Find the longest match in accord with Posix regular expression rules.
2193 Set point to the beginning of the match, and return point.
2194 The match found is the one starting last in the buffer
2195 and yet ending before the origin of the search.
2196 An optional second argument bounds the search; it is a buffer position.
2197 The match found must start at or after that position.
2198 Optional third argument, if t, means if fail just return nil (no error).
2199   If not nil and not t, move to limit of search and return nil.
2200 Optional fourth argument is repeat count--search for successive occurrences.
2201 See also the functions `match-beginning', `match-end', `match-string',
2202 and `replace-match'.  */)
2203      (regexp, bound, noerror, count)
2204      Lisp_Object regexp, bound, noerror, count;
2205 {
2206   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2207 }
2208
2209 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2210        "sPosix search: ",
2211        doc: /* Search forward from point for regular expression REGEXP.
2212 Find the longest match in accord with Posix regular expression rules.
2213 Set point to the end of the occurrence found, and return point.
2214 An optional second argument bounds the search; it is a buffer position.
2215 The match found must not extend after that position.
2216 Optional third argument, if t, means if fail just return nil (no error).
2217   If not nil and not t, move to limit of search and return nil.
2218 Optional fourth argument is repeat count--search for successive occurrences.
2219 See also the functions `match-beginning', `match-end', `match-string',
2220 and `replace-match'.  */)
2221      (regexp, bound, noerror, count)
2222      Lisp_Object regexp, bound, noerror, count;
2223 {
2224   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2225 }
2226 \f
2227 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2228        doc: /* Replace text matched by last search with NEWTEXT.
2229 Leave point at the end of the replacement text.
2230
2231 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2232 Otherwise maybe capitalize the whole text, or maybe just word initials,
2233 based on the replaced text.
2234 If the replaced text has only capital letters
2235 and has at least one multiletter word, convert NEWTEXT to all caps.
2236 Otherwise if all words are capitalized in the replaced text,
2237 capitalize each word in NEWTEXT.
2238
2239 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2240 Otherwise treat `\\' as special:
2241   `\\&' in NEWTEXT means substitute original matched text.
2242   `\\N' means substitute what matched the Nth `\\(...\\)'.
2243        If Nth parens didn't match, substitute nothing.
2244   `\\\\' means insert one `\\'.
2245 Case conversion does not apply to these substitutions.
2246
2247 FIXEDCASE and LITERAL are optional arguments.
2248
2249 The optional fourth argument STRING can be a string to modify.
2250 This is meaningful when the previous match was done against STRING,
2251 using `string-match'.  When used this way, `replace-match'
2252 creates and returns a new string made by copying STRING and replacing
2253 the part of STRING that was matched.
2254
2255 The optional fifth argument SUBEXP specifies a subexpression;
2256 it says to replace just that subexpression with NEWTEXT,
2257 rather than replacing the entire matched text.
2258 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2259 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2260 NEWTEXT in place of subexp N.
2261 This is useful only after a regular expression search or match,
2262 since only regular expressions have distinguished subexpressions.  */)
2263      (newtext, fixedcase, literal, string, subexp)
2264      Lisp_Object newtext, fixedcase, literal, string, subexp;
2265 {
2266   enum { nochange, all_caps, cap_initial } case_action;
2267   register int pos, pos_byte;
2268   int some_multiletter_word;
2269   int some_lowercase;
2270   int some_uppercase;
2271   int some_nonuppercase_initial;
2272   register int c, prevc;
2273   int sub;
2274   int opoint, newpoint;
2275
2276   CHECK_STRING (newtext);
2277
2278   if (! NILP (string))
2279     CHECK_STRING (string);
2280
2281   case_action = nochange;       /* We tried an initialization */
2282                                 /* but some C compilers blew it */
2283
2284   if (search_regs.num_regs <= 0)
2285     error ("`replace-match' called before any match found");
2286
2287   if (NILP (subexp))
2288     sub = 0;
2289   else
2290     {
2291       CHECK_NUMBER (subexp);
2292       sub = XINT (subexp);
2293       if (sub < 0 || sub >= search_regs.num_regs)
2294         args_out_of_range (subexp, make_number (search_regs.num_regs));
2295     }
2296
2297   if (NILP (string))
2298     {
2299       if (search_regs.start[sub] < BEGV
2300           || search_regs.start[sub] > search_regs.end[sub]
2301           || search_regs.end[sub] > ZV)
2302         args_out_of_range (make_number (search_regs.start[sub]),
2303                            make_number (search_regs.end[sub]));
2304     }
2305   else
2306     {
2307       if (search_regs.start[sub] < 0
2308           || search_regs.start[sub] > search_regs.end[sub]
2309           || search_regs.end[sub] > SCHARS (string))
2310         args_out_of_range (make_number (search_regs.start[sub]),
2311                            make_number (search_regs.end[sub]));
2312     }
2313
2314   if (NILP (fixedcase))
2315     {
2316       /* Decide how to casify by examining the matched text. */
2317       int last;
2318
2319       pos = search_regs.start[sub];
2320       last = search_regs.end[sub];
2321
2322       if (NILP (string))
2323         pos_byte = CHAR_TO_BYTE (pos);
2324       else
2325         pos_byte = string_char_to_byte (string, pos);
2326
2327       prevc = '\n';
2328       case_action = all_caps;
2329
2330       /* some_multiletter_word is set nonzero if any original word
2331          is more than one letter long. */
2332       some_multiletter_word = 0;
2333       some_lowercase = 0;
2334       some_nonuppercase_initial = 0;
2335       some_uppercase = 0;
2336
2337       while (pos < last)
2338         {
2339           if (NILP (string))
2340             {
2341               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2342               INC_BOTH (pos, pos_byte);
2343             }
2344           else
2345             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2346
2347           if (LOWERCASEP (c))
2348             {
2349               /* Cannot be all caps if any original char is lower case */
2350
2351               some_lowercase = 1;
2352               if (SYNTAX (prevc) != Sword)
2353                 some_nonuppercase_initial = 1;
2354               else
2355                 some_multiletter_word = 1;
2356             }
2357           else if (!NOCASEP (c))
2358             {
2359               some_uppercase = 1;
2360               if (SYNTAX (prevc) != Sword)
2361                 ;
2362               else
2363                 some_multiletter_word = 1;
2364             }
2365           else
2366             {
2367               /* If the initial is a caseless word constituent,
2368                  treat that like a lowercase initial.  */
2369               if (SYNTAX (prevc) != Sword)
2370                 some_nonuppercase_initial = 1;
2371             }
2372
2373           prevc = c;
2374         }
2375
2376       /* Convert to all caps if the old text is all caps
2377          and has at least one multiletter word.  */
2378       if (! some_lowercase && some_multiletter_word)
2379         case_action = all_caps;
2380       /* Capitalize each word, if the old text has all capitalized words.  */
2381       else if (!some_nonuppercase_initial && some_multiletter_word)
2382         case_action = cap_initial;
2383       else if (!some_nonuppercase_initial && some_uppercase)
2384         /* Should x -> yz, operating on X, give Yz or YZ?
2385            We'll assume the latter.  */
2386         case_action = all_caps;
2387       else
2388         case_action = nochange;
2389     }
2390
2391   /* Do replacement in a string.  */
2392   if (!NILP (string))
2393     {
2394       Lisp_Object before, after;
2395
2396       before = Fsubstring (string, make_number (0),
2397                            make_number (search_regs.start[sub]));
2398       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2399
2400       /* Substitute parts of the match into NEWTEXT
2401          if desired.  */
2402       if (NILP (literal))
2403         {
2404           int lastpos = 0;
2405           int lastpos_byte = 0;
2406           /* We build up the substituted string in ACCUM.  */
2407           Lisp_Object accum;
2408           Lisp_Object middle;
2409           int length = SBYTES (newtext);
2410
2411           accum = Qnil;
2412
2413           for (pos_byte = 0, pos = 0; pos_byte < length;)
2414             {
2415               int substart = -1;
2416               int subend = 0;
2417               int delbackslash = 0;
2418
2419               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2420
2421               if (c == '\\')
2422                 {
2423                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2424
2425                   if (c == '&')
2426                     {
2427                       substart = search_regs.start[sub];
2428                       subend = search_regs.end[sub];
2429                     }
2430                   else if (c >= '1' && c <= '9')
2431                     {
2432                       if (search_regs.start[c - '0'] >= 0
2433                           && c <= search_regs.num_regs + '0')
2434                         {
2435                           substart = search_regs.start[c - '0'];
2436                           subend = search_regs.end[c - '0'];
2437                         }
2438                       else
2439                         {
2440                           /* If that subexp did not match,
2441                              replace \\N with nothing.  */
2442                           substart = 0;
2443                           subend = 0;
2444                         }
2445                     }
2446                   else if (c == '\\')
2447                     delbackslash = 1;
2448                   else
2449                     error ("Invalid use of `\\' in replacement text");
2450                 }
2451               if (substart >= 0)
2452                 {
2453                   if (pos - 2 != lastpos)
2454                     middle = substring_both (newtext, lastpos,
2455                                              lastpos_byte,
2456                                              pos - 2, pos_byte - 2);
2457                   else
2458                     middle = Qnil;
2459                   accum = concat3 (accum, middle,
2460                                    Fsubstring (string,
2461                                                make_number (substart),
2462                                                make_number (subend)));
2463                   lastpos = pos;
2464                   lastpos_byte = pos_byte;
2465                 }
2466               else if (delbackslash)
2467                 {
2468                   middle = substring_both (newtext, lastpos,
2469                                            lastpos_byte,
2470                                            pos - 1, pos_byte - 1);
2471
2472                   accum = concat2 (accum, middle);
2473                   lastpos = pos;
2474                   lastpos_byte = pos_byte;
2475                 }
2476             }
2477
2478           if (pos != lastpos)
2479             middle = substring_both (newtext, lastpos,
2480                                      lastpos_byte,
2481                                      pos, pos_byte);
2482           else
2483             middle = Qnil;
2484
2485           newtext = concat2 (accum, middle);
2486         }
2487
2488       /* Do case substitution in NEWTEXT if desired.  */
2489       if (case_action == all_caps)
2490         newtext = Fupcase (newtext);
2491       else if (case_action == cap_initial)
2492         newtext = Fupcase_initials (newtext);
2493
2494       return concat3 (before, newtext, after);
2495     }
2496
2497   /* Record point, then move (quietly) to the start of the match.  */
2498   if (PT >= search_regs.end[sub])
2499     opoint = PT - ZV;
2500   else if (PT > search_regs.start[sub])
2501     opoint = search_regs.end[sub] - ZV;
2502   else
2503     opoint = PT;
2504
2505   /* If we want non-literal replacement,
2506      perform substitution on the replacement string.  */
2507   if (NILP (literal))
2508     {
2509       int length = SBYTES (newtext);
2510       unsigned char *substed;
2511       int substed_alloc_size, substed_len;
2512       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2513       int str_multibyte = STRING_MULTIBYTE (newtext);
2514       Lisp_Object rev_tbl;
2515       int really_changed = 0;
2516
2517       rev_tbl = Qnil;
2518
2519       substed_alloc_size = length * 2 + 100;
2520       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2521       substed_len = 0;
2522
2523       /* Go thru NEWTEXT, producing the actual text to insert in
2524          SUBSTED while adjusting multibyteness to that of the current
2525          buffer.  */
2526
2527       for (pos_byte = 0, pos = 0; pos_byte < length;)
2528         {
2529           unsigned char str[MAX_MULTIBYTE_LENGTH];
2530           unsigned char *add_stuff = NULL;
2531           int add_len = 0;
2532           int idx = -1;
2533
2534           if (str_multibyte)
2535             {
2536               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2537               if (!buf_multibyte)
2538                 c = multibyte_char_to_unibyte (c, rev_tbl);
2539             }
2540           else
2541             {
2542               /* Note that we don't have to increment POS.  */
2543               c = SREF (newtext, pos_byte++);
2544               if (buf_multibyte)
2545                 c = unibyte_char_to_multibyte (c);
2546             }
2547
2548           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2549              or set IDX to a match index, which means put that part
2550              of the buffer text into SUBSTED.  */
2551
2552           if (c == '\\')
2553             {
2554               really_changed = 1;
2555
2556               if (str_multibyte)
2557                 {
2558                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2559                                                       pos, pos_byte);
2560                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2561                     c = multibyte_char_to_unibyte (c, rev_tbl);
2562                 }
2563               else
2564                 {
2565                   c = SREF (newtext, pos_byte++);
2566                   if (buf_multibyte)
2567                     c = unibyte_char_to_multibyte (c);
2568                 }
2569
2570               if (c == '&')
2571                 idx = sub;
2572               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2573                 {
2574                   if (search_regs.start[c - '0'] >= 1)
2575                     idx = c - '0';
2576                 }
2577               else if (c == '\\')
2578                 add_len = 1, add_stuff = "\\";
2579               else
2580                 {
2581                   xfree (substed);
2582                   error ("Invalid use of `\\' in replacement text");
2583                 }
2584             }
2585           else
2586             {
2587               add_len = CHAR_STRING (c, str);
2588               add_stuff = str;
2589             }
2590
2591           /* If we want to copy part of a previous match,
2592              set up ADD_STUFF and ADD_LEN to point to it.  */
2593           if (idx >= 0)
2594             {
2595               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2596               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2597               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2598                 move_gap (search_regs.start[idx]);
2599               add_stuff = BYTE_POS_ADDR (begbyte);
2600             }
2601
2602           /* Now the stuff we want to add to SUBSTED
2603              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2604
2605           /* Make sure SUBSTED is big enough.  */
2606           if (substed_len + add_len >= substed_alloc_size)
2607             {
2608               substed_alloc_size = substed_len + add_len + 500;
2609               substed = (unsigned char *) xrealloc (substed,
2610                                                     substed_alloc_size + 1);
2611             }
2612
2613           /* Now add to the end of SUBSTED.  */
2614           if (add_stuff)
2615             {
2616               bcopy (add_stuff, substed + substed_len, add_len);
2617               substed_len += add_len;
2618             }
2619         }
2620
2621       if (really_changed)
2622         {
2623           if (buf_multibyte)
2624             {
2625               int nchars = multibyte_chars_in_text (substed, substed_len);
2626
2627               newtext = make_multibyte_string (substed, nchars, substed_len);
2628             }
2629           else
2630             newtext = make_unibyte_string (substed, substed_len);
2631         }
2632       xfree (substed);
2633     }
2634
2635   /* Replace the old text with the new in the cleanest possible way.  */
2636   replace_range (search_regs.start[sub], search_regs.end[sub],
2637                  newtext, 1, 0, 1);
2638   newpoint = search_regs.start[sub] + SCHARS (newtext);
2639
2640   if (case_action == all_caps)
2641     Fupcase_region (make_number (search_regs.start[sub]),
2642                     make_number (newpoint));
2643   else if (case_action == cap_initial)
2644     Fupcase_initials_region (make_number (search_regs.start[sub]),
2645                              make_number (newpoint));
2646
2647   /* Adjust search data for this change.  */
2648   {
2649     int oldend = search_regs.end[sub];
2650     int oldstart = search_regs.start[sub];
2651     int change = newpoint - search_regs.end[sub];
2652     int i;
2653
2654     for (i = 0; i < search_regs.num_regs; i++)
2655       {
2656         if (search_regs.start[i] >= oldend)
2657           search_regs.start[i] += change;
2658         else if (search_regs.start[i] > oldstart)
2659           search_regs.start[i] = oldstart;
2660         if (search_regs.end[i] >= oldend)
2661           search_regs.end[i] += change;
2662         else if (search_regs.end[i] > oldstart)
2663           search_regs.end[i] = oldstart;
2664       }
2665   }
2666
2667   /* Put point back where it was in the text.  */
2668   if (opoint <= 0)
2669     TEMP_SET_PT (opoint + ZV);
2670   else
2671     TEMP_SET_PT (opoint);
2672
2673   /* Now move point "officially" to the start of the inserted replacement.  */
2674   move_if_not_intangible (newpoint);
2675
2676   return Qnil;
2677 }
2678 \f
2679 static Lisp_Object
2680 match_limit (num, beginningp)
2681      Lisp_Object num;
2682      int beginningp;
2683 {
2684   register int n;
2685
2686   CHECK_NUMBER (num);
2687   n = XINT (num);
2688   if (n < 0)
2689     args_out_of_range (num, make_number (0));
2690   if (search_regs.num_regs <= 0)
2691     error ("No match data, because no search succeeded");
2692   if (n >= search_regs.num_regs
2693       || search_regs.start[n] < 0)
2694     return Qnil;
2695   return (make_number ((beginningp) ? search_regs.start[n]
2696                                     : search_regs.end[n]));
2697 }
2698
2699 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2700        doc: /* Return position of start of text matched by last search.
2701 SUBEXP, a number, specifies which parenthesized expression in the last
2702   regexp.
2703 Value is nil if SUBEXPth pair didn't match, or there were less than
2704   SUBEXP pairs.
2705 Zero means the entire text matched by the whole regexp or whole string.  */)
2706      (subexp)
2707      Lisp_Object subexp;
2708 {
2709   return match_limit (subexp, 1);
2710 }
2711
2712 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2713        doc: /* Return position of end of text matched by last search.
2714 SUBEXP, a number, specifies which parenthesized expression in the last
2715   regexp.
2716 Value is nil if SUBEXPth pair didn't match, or there were less than
2717   SUBEXP pairs.
2718 Zero means the entire text matched by the whole regexp or whole string.  */)
2719      (subexp)
2720      Lisp_Object subexp;
2721 {
2722   return match_limit (subexp, 0);
2723 }
2724
2725 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2726        doc: /* Return a list containing all info on what the last search matched.
2727 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2728 All the elements are markers or nil (nil if the Nth pair didn't match)
2729 if the last match was on a buffer; integers or nil if a string was matched.
2730 Use `store-match-data' to reinstate the data in this list.
2731
2732 If INTEGERS (the optional first argument) is non-nil, always use
2733 integers \(rather than markers) to represent buffer positions.  In
2734 this case, and if the last match was in a buffer, the buffer will get
2735 stored as one additional element at the end of the list.
2736
2737 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2738 enough to hold all the values, and if INTEGERS is non-nil, no consing
2739 is done.
2740
2741 If optional third arg RESEAT is non-nil, any previous markers on the
2742 REUSE list will be modified to point to nowhere.
2743
2744 Return value is undefined if the last search failed.  */)
2745   (integers, reuse, reseat)
2746      Lisp_Object integers, reuse, reseat;
2747 {
2748   Lisp_Object tail, prev;
2749   Lisp_Object *data;
2750   int i, len;
2751
2752   if (!NILP (reseat))
2753     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2754       if (MARKERP (XCAR (tail)))
2755         {
2756           unchain_marker (XMARKER (XCAR (tail)));
2757           XSETCAR (tail, Qnil);
2758         }
2759
2760   if (NILP (last_thing_searched))
2761     return Qnil;
2762
2763   prev = Qnil;
2764
2765   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2766                                  * sizeof (Lisp_Object));
2767
2768   len = 0;
2769   for (i = 0; i < search_regs.num_regs; i++)
2770     {
2771       int start = search_regs.start[i];
2772       if (start >= 0)
2773         {
2774           if (EQ (last_thing_searched, Qt)
2775               || ! NILP (integers))
2776             {
2777               XSETFASTINT (data[2 * i], start);
2778               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2779             }
2780           else if (BUFFERP (last_thing_searched))
2781             {
2782               data[2 * i] = Fmake_marker ();
2783               Fset_marker (data[2 * i],
2784                            make_number (start),
2785                            last_thing_searched);
2786               data[2 * i + 1] = Fmake_marker ();
2787               Fset_marker (data[2 * i + 1],
2788                            make_number (search_regs.end[i]),
2789                            last_thing_searched);
2790             }
2791           else
2792             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2793             abort ();
2794
2795           len = 2 * i + 2;
2796         }
2797       else
2798         data[2 * i] = data[2 * i + 1] = Qnil;
2799     }
2800
2801   if (BUFFERP (last_thing_searched) && !NILP (integers))
2802     {
2803       data[len] = last_thing_searched;
2804       len++;
2805     }
2806
2807   /* If REUSE is not usable, cons up the values and return them.  */
2808   if (! CONSP (reuse))
2809     return Flist (len, data);
2810
2811   /* If REUSE is a list, store as many value elements as will fit
2812      into the elements of REUSE.  */
2813   for (i = 0, tail = reuse; CONSP (tail);
2814        i++, tail = XCDR (tail))
2815     {
2816       if (i < len)
2817         XSETCAR (tail, data[i]);
2818       else
2819         XSETCAR (tail, Qnil);
2820       prev = tail;
2821     }
2822
2823   /* If we couldn't fit all value elements into REUSE,
2824      cons up the rest of them and add them to the end of REUSE.  */
2825   if (i < len)
2826     XSETCDR (prev, Flist (len - i, data + i));
2827
2828   return reuse;
2829 }
2830
2831 /* Internal usage only:
2832    If RESEAT is `evaporate', put the markers back on the free list
2833    immediately.  No other references to the markers must exist in this case,
2834    so it is used only internally on the unwind stack and save-match-data from
2835    Lisp.  */
2836
2837 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2838        doc: /* Set internal data on last search match from elements of LIST.
2839 LIST should have been created by calling `match-data' previously.
2840
2841 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2842     (list, reseat)
2843      register Lisp_Object list, reseat;
2844 {
2845   register int i;
2846   register Lisp_Object marker;
2847
2848   if (running_asynch_code)
2849     save_search_regs ();
2850
2851   if (!CONSP (list) && !NILP (list))
2852     list = wrong_type_argument (Qconsp, list);
2853
2854   /* Unless we find a marker with a buffer or an explicit buffer
2855      in LIST, assume that this match data came from a string.  */
2856   last_thing_searched = Qt;
2857
2858   /* Allocate registers if they don't already exist.  */
2859   {
2860     int length = XFASTINT (Flength (list)) / 2;
2861
2862     if (length > search_regs.num_regs)
2863       {
2864         if (search_regs.num_regs == 0)
2865           {
2866             search_regs.start
2867               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2868             search_regs.end
2869               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2870           }
2871         else
2872           {
2873             search_regs.start
2874               = (regoff_t *) xrealloc (search_regs.start,
2875                                        length * sizeof (regoff_t));
2876             search_regs.end
2877               = (regoff_t *) xrealloc (search_regs.end,
2878                                        length * sizeof (regoff_t));
2879           }
2880
2881         for (i = search_regs.num_regs; i < length; i++)
2882           search_regs.start[i] = -1;
2883
2884         search_regs.num_regs = length;
2885       }
2886
2887     for (i = 0; CONSP (list); i++)
2888       {
2889         marker = XCAR (list);
2890         if (BUFFERP (marker))
2891           {
2892             last_thing_searched = marker;
2893             break;
2894           }
2895         if (i >= length)
2896           break;
2897         if (NILP (marker))
2898           {
2899             search_regs.start[i] = -1;
2900             list = XCDR (list);
2901           }
2902         else
2903           {
2904             int from;
2905             Lisp_Object m;
2906
2907             m = marker;
2908             if (MARKERP (marker))
2909               {
2910                 if (XMARKER (marker)->buffer == 0)
2911                   XSETFASTINT (marker, 0);
2912                 else
2913                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2914               }
2915
2916             CHECK_NUMBER_COERCE_MARKER (marker);
2917             from = XINT (marker);
2918
2919             if (!NILP (reseat) && MARKERP (m))
2920               {
2921                 if (EQ (reseat, Qevaporate))
2922                   free_marker (m);
2923                 else
2924                   unchain_marker (XMARKER (m));
2925                 XSETCAR (list, Qnil);
2926               }
2927
2928             if ((list = XCDR (list), !CONSP (list)))
2929               break;
2930
2931             m = marker = XCAR (list);
2932
2933             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2934               XSETFASTINT (marker, 0);
2935
2936             CHECK_NUMBER_COERCE_MARKER (marker);
2937             search_regs.start[i] = from;
2938             search_regs.end[i] = XINT (marker);
2939
2940             if (!NILP (reseat) && MARKERP (m))
2941               {
2942                 if (EQ (reseat, Qevaporate))
2943                   free_marker (m);
2944                 else
2945                   unchain_marker (XMARKER (m));
2946                 XSETCAR (list, Qnil);
2947               }
2948           }
2949         list = XCDR (list);
2950       }
2951
2952     for (; i < search_regs.num_regs; i++)
2953       search_regs.start[i] = -1;
2954   }
2955
2956   return Qnil;
2957 }
2958
2959 /* If non-zero the match data have been saved in saved_search_regs
2960    during the execution of a sentinel or filter. */
2961 static int search_regs_saved;
2962 static struct re_registers saved_search_regs;
2963 static Lisp_Object saved_last_thing_searched;
2964
2965 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2966    if asynchronous code (filter or sentinel) is running. */
2967 static void
2968 save_search_regs ()
2969 {
2970   if (!search_regs_saved)
2971     {
2972       saved_search_regs.num_regs = search_regs.num_regs;
2973       saved_search_regs.start = search_regs.start;
2974       saved_search_regs.end = search_regs.end;
2975       saved_last_thing_searched = last_thing_searched;
2976       last_thing_searched = Qnil;
2977       search_regs.num_regs = 0;
2978       search_regs.start = 0;
2979       search_regs.end = 0;
2980
2981       search_regs_saved = 1;
2982     }
2983 }
2984
2985 /* Called upon exit from filters and sentinels. */
2986 void
2987 restore_search_regs ()
2988 {
2989   if (search_regs_saved)
2990     {
2991       if (search_regs.num_regs > 0)
2992         {
2993           xfree (search_regs.start);
2994           xfree (search_regs.end);
2995         }
2996       search_regs.num_regs = saved_search_regs.num_regs;
2997       search_regs.start = saved_search_regs.start;
2998       search_regs.end = saved_search_regs.end;
2999       last_thing_searched = saved_last_thing_searched;
3000       saved_last_thing_searched = Qnil;
3001       search_regs_saved = 0;
3002     }
3003 }
3004
3005 static Lisp_Object
3006 unwind_set_match_data (list)
3007      Lisp_Object list;
3008 {
3009   /* It is safe to free (evaporate) the markers immediately.  */
3010   return Fset_match_data (list, Qevaporate);
3011 }
3012
3013 /* Called to unwind protect the match data.  */
3014 void
3015 record_unwind_save_match_data ()
3016 {
3017   record_unwind_protect (unwind_set_match_data,
3018                          Fmatch_data (Qnil, Qnil, Qnil));
3019 }
3020
3021 /* Quote a string to inactivate reg-expr chars */
3022
3023 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3024        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3025      (string)
3026      Lisp_Object string;
3027 {
3028   register unsigned char *in, *out, *end;
3029   register unsigned char *temp;
3030   int backslashes_added = 0;
3031
3032   CHECK_STRING (string);
3033
3034   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3035
3036   /* Now copy the data into the new string, inserting escapes. */
3037
3038   in = SDATA (string);
3039   end = in + SBYTES (string);
3040   out = temp;
3041
3042   for (; in != end; in++)
3043     {
3044       if (*in == '[' || *in == ']'
3045           || *in == '*' || *in == '.' || *in == '\\'
3046           || *in == '?' || *in == '+'
3047           || *in == '^' || *in == '$')
3048         *out++ = '\\', backslashes_added++;
3049       *out++ = *in;
3050     }
3051
3052   return make_specified_string (temp,
3053                                 SCHARS (string) + backslashes_added,
3054                                 out - temp,
3055                                 STRING_MULTIBYTE (string));
3056 }
3057 \f
3058 void
3059 syms_of_search ()
3060 {
3061   register int i;
3062
3063   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3064     {
3065       searchbufs[i].buf.allocated = 100;
3066       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3067       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3068       searchbufs[i].regexp = Qnil;
3069       searchbufs[i].whitespace_regexp = Qnil;
3070       staticpro (&searchbufs[i].regexp);
3071       staticpro (&searchbufs[i].whitespace_regexp);
3072       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3073     }
3074   searchbuf_head = &searchbufs[0];
3075
3076   Qsearch_failed = intern ("search-failed");
3077   staticpro (&Qsearch_failed);
3078   Qinvalid_regexp = intern ("invalid-regexp");
3079   staticpro (&Qinvalid_regexp);
3080
3081   Fput (Qsearch_failed, Qerror_conditions,
3082         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3083   Fput (Qsearch_failed, Qerror_message,
3084         build_string ("Search failed"));
3085
3086   Fput (Qinvalid_regexp, Qerror_conditions,
3087         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3088   Fput (Qinvalid_regexp, Qerror_message,
3089         build_string ("Invalid regexp"));
3090
3091   last_thing_searched = Qnil;
3092   staticpro (&last_thing_searched);
3093
3094   saved_last_thing_searched = Qnil;
3095   staticpro (&saved_last_thing_searched);
3096
3097   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3098       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3099 Some commands use this for user-specified regexps.
3100 Spaces that occur inside character classes or repetition operators
3101 or other such regexp constructs are not replaced with this.
3102 A value of nil (which is the normal value) means treat spaces literally.  */);
3103   Vsearch_spaces_regexp = Qnil;
3104
3105   defsubr (&Slooking_at);
3106   defsubr (&Sposix_looking_at);
3107   defsubr (&Sstring_match);
3108   defsubr (&Sposix_string_match);
3109   defsubr (&Ssearch_forward);
3110   defsubr (&Ssearch_backward);
3111   defsubr (&Sword_search_forward);
3112   defsubr (&Sword_search_backward);
3113   defsubr (&Sre_search_forward);
3114   defsubr (&Sre_search_backward);
3115   defsubr (&Sposix_search_forward);
3116   defsubr (&Sposix_search_backward);
3117   defsubr (&Sreplace_match);
3118   defsubr (&Smatch_beginning);
3119   defsubr (&Smatch_end);
3120   defsubr (&Smatch_data);
3121   defsubr (&Sset_match_data);
3122   defsubr (&Sregexp_quote);
3123 }
3124
3125 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3126    (do not change this comment) */