src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
   3                  2004, 2005, 2006 Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   struct re_pattern_buffer buf;
  46   char fastmap[0400];
  47   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  48   char posix;
  49 };
  50
  51 /* The instances of that struct.  */
  52 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  53
  54 /* The head of the linked list; points to the most recently used buffer.  */
  55 struct regexp_cache *searchbuf_head;
  56
  57
  58 /* Every call to re_match, etc., must pass &search_regs as the regs
  59    argument unless you can show it is unnecessary (i.e., if re_match
  60    is certainly going to be called again before region-around-match
  61    can be called).
  62
  63    Since the registers are now dynamically allocated, we need to make
  64    sure not to refer to the Nth register before checking that it has
  65    been allocated by checking search_regs.num_regs.
  66
  67    The regex code keeps track of whether it has allocated the search
  68    buffer using bits in the re_pattern_buffer.  This means that whenever
  69    you compile a new pattern, it completely forgets whether it has
  70    allocated any registers, and will allocate new registers the next
  71    time you call a searching or matching function.  Therefore, we need
  72    to call re_set_registers after compiling a new pattern or after
  73    setting the match registers, so that the regex functions will be
  74    able to free or re-allocate it properly.  */
  75 static struct re_registers search_regs;
  76
  77 /* The buffer in which the last search was performed, or
  78    Qt if the last search was done in a string;
  79    Qnil if no searching has been done yet.  */
  80 static Lisp_Object last_thing_searched;
  81
  82 /* error condition signaled when regexp compile_pattern fails */
  83
  84 Lisp_Object Qinvalid_regexp;
  85
  86 Lisp_Object Vsearch_spaces_regexp;
  87
  88 static void set_search_regs ();
  89 static void save_search_regs ();
  90 static int simple_search ();
  91 static int boyer_moore ();
  92 static int search_buffer ();
  93
  94 static void
  95 matcher_overflow ()
  96 {
  97   error ("Stack overflow in regexp matcher");
  98 }
  99
 100 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 101    PATTERN is the pattern to compile.
 102    CP is the place to put the result.
 103    TRANSLATE is a translation table for ignoring case, or nil for none.
 104    REGP is the structure that says where to store the "register"
 105    values that will result from matching this pattern.
 106    If it is 0, we should compile the pattern not to record any
 107    subexpression bounds.
 108    POSIX is nonzero if we want full backtracking (POSIX style)
 109    for this pattern.  0 means backtrack only enough to get a valid match.
 110    MULTIBYTE is nonzero iff a target of match is a multibyte buffer or
 111    string.
 112
 113    The behavior also depends on Vsearch_spaces_regexp.  */
 114
 115 static void
 116 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 117      struct regexp_cache *cp;
 118      Lisp_Object pattern;
 119      Lisp_Object translate;
 120      struct re_registers *regp;
 121      int posix;
 122      int multibyte;
 123 {
 124   char *val;
 125   reg_syntax_t old;
 126
 127   cp->regexp = Qnil;
 128   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 129   cp->posix = posix;
 130   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 131   cp->buf.target_multibyte = multibyte;
 132   cp->whitespace_regexp = Vsearch_spaces_regexp;
 133   BLOCK_INPUT;
 134   old = re_set_syntax (RE_SYNTAX_EMACS
 135                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 136   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 137                             : SDATA (Vsearch_spaces_regexp));
 138
 139   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 140                                      SBYTES (pattern), &cp->buf);
 141
 142   re_set_whitespace_regexp (NULL);
 143
 144   re_set_syntax (old);
 145   UNBLOCK_INPUT;
 146   if (val)
 147     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 148
 149   cp->regexp = Fcopy_sequence (pattern);
 150 }
 151
 152 /* Shrink each compiled regexp buffer in the cache
 153    to the size actually used right now.
 154    This is called from garbage collection.  */
 155
 156 void
 157 shrink_regexp_cache ()
 158 {
 159   struct regexp_cache *cp;
 160
 161   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 162     {
 163       cp->buf.allocated = cp->buf.used;
 164       cp->buf.buffer
 165         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 166     }
 167 }
 168
 169 /* Compile a regexp if necessary, but first check to see if there's one in
 170    the cache.
 171    PATTERN is the pattern to compile.
 172    TRANSLATE is a translation table for ignoring case, or nil for none.
 173    REGP is the structure that says where to store the "register"
 174    values that will result from matching this pattern.
 175    If it is 0, we should compile the pattern not to record any
 176    subexpression bounds.
 177    POSIX is nonzero if we want full backtracking (POSIX style)
 178    for this pattern.  0 means backtrack only enough to get a valid match.  */
 179
 180 struct re_pattern_buffer *
 181 compile_pattern (pattern, regp, translate, posix, multibyte)
 182      Lisp_Object pattern;
 183      struct re_registers *regp;
 184      Lisp_Object translate;
 185      int posix, multibyte;
 186 {
 187   struct regexp_cache *cp, **cpp;
 188
 189   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 190     {
 191       cp = *cpp;
 192       /* Entries are initialized to nil, and may be set to nil by
 193          compile_pattern_1 if the pattern isn't valid.  Don't apply
 194          string accessors in those cases.  However, compile_pattern_1
 195          is only applied to the cache entry we pick here to reuse.  So
 196          nil should never appear before a non-nil entry.  */
 197       if (NILP (cp->regexp))
 198         goto compile_it;
 199       if (SCHARS (cp->regexp) == SCHARS (pattern)
 200           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 201           && !NILP (Fstring_equal (cp->regexp, pattern))
 202           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 203           && cp->posix == posix
 204           && cp->buf.target_multibyte == multibyte
 205           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 206         break;
 207
 208       /* If we're at the end of the cache, compile into the nil cell
 209          we found, or the last (least recently used) cell with a
 210          string value.  */
 211       if (cp->next == 0)
 212         {
 213         compile_it:
 214           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 215           break;
 216         }
 217     }
 218
 219   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 220      either because we found it in the cache or because we just compiled it.
 221      Move it to the front of the queue to mark it as most recently used.  */
 222   *cpp = cp->next;
 223   cp->next = searchbuf_head;
 224   searchbuf_head = cp;
 225
 226   /* Advise the searching functions about the space we have allocated
 227      for register data.  */
 228   if (regp)
 229     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 230
 231   return &cp->buf;
 232 }
 233
 234 /* Error condition used for failing searches */
 235 Lisp_Object Qsearch_failed;
 236
 237 Lisp_Object
 238 signal_failure (arg)
 239      Lisp_Object arg;
 240 {
 241   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 242   return Qnil;
 243 }
 244 \f
 245 static Lisp_Object
 246 looking_at_1 (string, posix)
 247      Lisp_Object string;
 248      int posix;
 249 {
 250   Lisp_Object val;
 251   unsigned char *p1, *p2;
 252   int s1, s2;
 253   register int i;
 254   struct re_pattern_buffer *bufp;
 255
 256   if (running_asynch_code)
 257     save_search_regs ();
 258
 259   CHECK_STRING (string);
 260   bufp = compile_pattern (string, &search_regs,
 261                           (!NILP (current_buffer->case_fold_search)
 262                            ? current_buffer->case_canon_table : Qnil),
 263                           posix,
 264                           !NILP (current_buffer->enable_multibyte_characters));
 265
 266   immediate_quit = 1;
 267   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 268
 269   /* Get pointers and sizes of the two strings
 270      that make up the visible portion of the buffer. */
 271
 272   p1 = BEGV_ADDR;
 273   s1 = GPT_BYTE - BEGV_BYTE;
 274   p2 = GAP_END_ADDR;
 275   s2 = ZV_BYTE - GPT_BYTE;
 276   if (s1 < 0)
 277     {
 278       p2 = p1;
 279       s2 = ZV_BYTE - BEGV_BYTE;
 280       s1 = 0;
 281     }
 282   if (s2 < 0)
 283     {
 284       s1 = ZV_BYTE - BEGV_BYTE;
 285       s2 = 0;
 286     }
 287
 288   re_match_object = Qnil;
 289
 290   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 291                   PT_BYTE - BEGV_BYTE, &search_regs,
 292                   ZV_BYTE - BEGV_BYTE);
 293   immediate_quit = 0;
 294
 295   if (i == -2)
 296     matcher_overflow ();
 297
 298   val = (0 <= i ? Qt : Qnil);
 299   if (i >= 0)
 300     for (i = 0; i < search_regs.num_regs; i++)
 301       if (search_regs.start[i] >= 0)
 302         {
 303           search_regs.start[i]
 304             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 305           search_regs.end[i]
 306             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 307         }
 308   XSETBUFFER (last_thing_searched, current_buffer);
 309   return val;
 310 }
 311
 312 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 313        doc: /* Return t if text after point matches regular expression REGEXP.
 314 This function modifies the match data that `match-beginning',
 315 `match-end' and `match-data' access; save and restore the match
 316 data if you want to preserve them.  */)
 317      (regexp)
 318      Lisp_Object regexp;
 319 {
 320   return looking_at_1 (regexp, 0);
 321 }
 322
 323 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 324        doc: /* Return t if text after point matches regular expression REGEXP.
 325 Find the longest match, in accord with Posix regular expression rules.
 326 This function modifies the match data that `match-beginning',
 327 `match-end' and `match-data' access; save and restore the match
 328 data if you want to preserve them.  */)
 329      (regexp)
 330      Lisp_Object regexp;
 331 {
 332   return looking_at_1 (regexp, 1);
 333 }
 334 \f
 335 static Lisp_Object
 336 string_match_1 (regexp, string, start, posix)
 337      Lisp_Object regexp, string, start;
 338      int posix;
 339 {
 340   int val;
 341   struct re_pattern_buffer *bufp;
 342   int pos, pos_byte;
 343   int i;
 344
 345   if (running_asynch_code)
 346     save_search_regs ();
 347
 348   CHECK_STRING (regexp);
 349   CHECK_STRING (string);
 350
 351   if (NILP (start))
 352     pos = 0, pos_byte = 0;
 353   else
 354     {
 355       int len = SCHARS (string);
 356
 357       CHECK_NUMBER (start);
 358       pos = XINT (start);
 359       if (pos < 0 && -pos <= len)
 360         pos = len + pos;
 361       else if (0 > pos || pos > len)
 362         args_out_of_range (string, start);
 363       pos_byte = string_char_to_byte (string, pos);
 364     }
 365
 366   bufp = compile_pattern (regexp, &search_regs,
 367                           (!NILP (current_buffer->case_fold_search)
 368                            ? current_buffer->case_canon_table : Qnil),
 369                           posix,
 370                           STRING_MULTIBYTE (string));
 371   immediate_quit = 1;
 372   re_match_object = string;
 373
 374   val = re_search (bufp, (char *) SDATA (string),
 375                    SBYTES (string), pos_byte,
 376                    SBYTES (string) - pos_byte,
 377                    &search_regs);
 378   immediate_quit = 0;
 379   last_thing_searched = Qt;
 380   if (val == -2)
 381     matcher_overflow ();
 382   if (val < 0) return Qnil;
 383
 384   for (i = 0; i < search_regs.num_regs; i++)
 385     if (search_regs.start[i] >= 0)
 386       {
 387         search_regs.start[i]
 388           = string_byte_to_char (string, search_regs.start[i]);
 389         search_regs.end[i]
 390           = string_byte_to_char (string, search_regs.end[i]);
 391       }
 392
 393   return make_number (string_byte_to_char (string, val));
 394 }
 395
 396 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 397        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 398 Matching ignores case if `case-fold-search' is non-nil.
 399 If third arg START is non-nil, start search at that index in STRING.
 400 For index of first char beyond the match, do (match-end 0).
 401 `match-end' and `match-beginning' also give indices of substrings
 402 matched by parenthesis constructs in the pattern.
 403
 404 You can use the function `match-string' to extract the substrings
 405 matched by the parenthesis constructions in REGEXP. */)
 406      (regexp, string, start)
 407      Lisp_Object regexp, string, start;
 408 {
 409   return string_match_1 (regexp, string, start, 0);
 410 }
 411
 412 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 413        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 414 Find the longest match, in accord with Posix regular expression rules.
 415 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 416 If third arg START is non-nil, start search at that index in STRING.
 417 For index of first char beyond the match, do (match-end 0).
 418 `match-end' and `match-beginning' also give indices of substrings
 419 matched by parenthesis constructs in the pattern.  */)
 420      (regexp, string, start)
 421      Lisp_Object regexp, string, start;
 422 {
 423   return string_match_1 (regexp, string, start, 1);
 424 }
 425
 426 /* Match REGEXP against STRING, searching all of STRING,
 427    and return the index of the match, or negative on failure.
 428    This does not clobber the match data.  */
 429
 430 int
 431 fast_string_match (regexp, string)
 432      Lisp_Object regexp, string;
 433 {
 434   int val;
 435   struct re_pattern_buffer *bufp;
 436
 437   bufp = compile_pattern (regexp, 0, Qnil,
 438                           0, STRING_MULTIBYTE (string));
 439   immediate_quit = 1;
 440   re_match_object = string;
 441
 442   val = re_search (bufp, (char *) SDATA (string),
 443                    SBYTES (string), 0,
 444                    SBYTES (string), 0);
 445   immediate_quit = 0;
 446   return val;
 447 }
 448
 449 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 450    and return the index of the match, or negative on failure.
 451    This does not clobber the match data.
 452    We assume that STRING contains single-byte characters.  */
 453
 454 extern Lisp_Object Vascii_downcase_table;
 455
 456 int
 457 fast_c_string_match_ignore_case (regexp, string)
 458      Lisp_Object regexp;
 459      const char *string;
 460 {
 461   int val;
 462   struct re_pattern_buffer *bufp;
 463   int len = strlen (string);
 464
 465   regexp = string_make_unibyte (regexp);
 466   re_match_object = Qt;
 467   bufp = compile_pattern (regexp, 0,
 468                           Vascii_canon_table, 0,
 469                           0);
 470   immediate_quit = 1;
 471   val = re_search (bufp, string, len, 0, len, 0);
 472   immediate_quit = 0;
 473   return val;
 474 }
 475
 476 /* Like fast_string_match but ignore case.  */
 477
 478 int
 479 fast_string_match_ignore_case (regexp, string)
 480      Lisp_Object regexp, string;
 481 {
 482   int val;
 483   struct re_pattern_buffer *bufp;
 484
 485   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 486                           0, STRING_MULTIBYTE (string));
 487   immediate_quit = 1;
 488   re_match_object = string;
 489
 490   val = re_search (bufp, (char *) SDATA (string),
 491                    SBYTES (string), 0,
 492                    SBYTES (string), 0);
 493   immediate_quit = 0;
 494   return val;
 495 }
 496 \f
 497 /* The newline cache: remembering which sections of text have no newlines.  */
 498
 499 /* If the user has requested newline caching, make sure it's on.
 500    Otherwise, make sure it's off.
 501    This is our cheezy way of associating an action with the change of
 502    state of a buffer-local variable.  */
 503 static void
 504 newline_cache_on_off (buf)
 505      struct buffer *buf;
 506 {
 507   if (NILP (buf->cache_long_line_scans))
 508     {
 509       /* It should be off.  */
 510       if (buf->newline_cache)
 511         {
 512           free_region_cache (buf->newline_cache);
 513           buf->newline_cache = 0;
 514         }
 515     }
 516   else
 517     {
 518       /* It should be on.  */
 519       if (buf->newline_cache == 0)
 520         buf->newline_cache = new_region_cache ();
 521     }
 522 }
 523
 524 \f
 525 /* Search for COUNT instances of the character TARGET between START and END.
 526
 527    If COUNT is positive, search forwards; END must be >= START.
 528    If COUNT is negative, search backwards for the -COUNTth instance;
 529       END must be <= START.
 530    If COUNT is zero, do anything you please; run rogue, for all I care.
 531
 532    If END is zero, use BEGV or ZV instead, as appropriate for the
 533    direction indicated by COUNT.
 534
 535    If we find COUNT instances, set *SHORTAGE to zero, and return the
 536    position past the COUNTth match.  Note that for reverse motion
 537    this is not the same as the usual convention for Emacs motion commands.
 538
 539    If we don't find COUNT instances before reaching END, set *SHORTAGE
 540    to the number of TARGETs left unfound, and return END.
 541
 542    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 543    except when inside redisplay.  */
 544
 545 int
 546 scan_buffer (target, start, end, count, shortage, allow_quit)
 547      register int target;
 548      int start, end;
 549      int count;
 550      int *shortage;
 551      int allow_quit;
 552 {
 553   struct region_cache *newline_cache;
 554   int direction;
 555
 556   if (count > 0)
 557     {
 558       direction = 1;
 559       if (! end) end = ZV;
 560     }
 561   else
 562     {
 563       direction = -1;
 564       if (! end) end = BEGV;
 565     }
 566
 567   newline_cache_on_off (current_buffer);
 568   newline_cache = current_buffer->newline_cache;
 569
 570   if (shortage != 0)
 571     *shortage = 0;
 572
 573   immediate_quit = allow_quit;
 574
 575   if (count > 0)
 576     while (start != end)
 577       {
 578         /* Our innermost scanning loop is very simple; it doesn't know
 579            about gaps, buffer ends, or the newline cache.  ceiling is
 580            the position of the last character before the next such
 581            obstacle --- the last character the dumb search loop should
 582            examine.  */
 583         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 584         int start_byte = CHAR_TO_BYTE (start);
 585         int tem;
 586
 587         /* If we're looking for a newline, consult the newline cache
 588            to see where we can avoid some scanning.  */
 589         if (target == '\n' && newline_cache)
 590           {
 591             int next_change;
 592             immediate_quit = 0;
 593             while (region_cache_forward
 594                    (current_buffer, newline_cache, start_byte, &next_change))
 595               start_byte = next_change;
 596             immediate_quit = allow_quit;
 597
 598             /* START should never be after END.  */
 599             if (start_byte > ceiling_byte)
 600               start_byte = ceiling_byte;
 601
 602             /* Now the text after start is an unknown region, and
 603                next_change is the position of the next known region. */
 604             ceiling_byte = min (next_change - 1, ceiling_byte);
 605           }
 606
 607         /* The dumb loop can only scan text stored in contiguous
 608            bytes. BUFFER_CEILING_OF returns the last character
 609            position that is contiguous, so the ceiling is the
 610            position after that.  */
 611         tem = BUFFER_CEILING_OF (start_byte);
 612         ceiling_byte = min (tem, ceiling_byte);
 613
 614         {
 615           /* The termination address of the dumb loop.  */
 616           register unsigned char *ceiling_addr
 617             = BYTE_POS_ADDR (ceiling_byte) + 1;
 618           register unsigned char *cursor
 619             = BYTE_POS_ADDR (start_byte);
 620           unsigned char *base = cursor;
 621
 622           while (cursor < ceiling_addr)
 623             {
 624               unsigned char *scan_start = cursor;
 625
 626               /* The dumb loop.  */
 627               while (*cursor != target && ++cursor < ceiling_addr)
 628                 ;
 629
 630               /* If we're looking for newlines, cache the fact that
 631                  the region from start to cursor is free of them. */
 632               if (target == '\n' && newline_cache)
 633                 know_region_cache (current_buffer, newline_cache,
 634                                    start_byte + scan_start - base,
 635                                    start_byte + cursor - base);
 636
 637               /* Did we find the target character?  */
 638               if (cursor < ceiling_addr)
 639                 {
 640                   if (--count == 0)
 641                     {
 642                       immediate_quit = 0;
 643                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 644                     }
 645                   cursor++;
 646                 }
 647             }
 648
 649           start = BYTE_TO_CHAR (start_byte + cursor - base);
 650         }
 651       }
 652   else
 653     while (start > end)
 654       {
 655         /* The last character to check before the next obstacle.  */
 656         int ceiling_byte = CHAR_TO_BYTE (end);
 657         int start_byte = CHAR_TO_BYTE (start);
 658         int tem;
 659
 660         /* Consult the newline cache, if appropriate.  */
 661         if (target == '\n' && newline_cache)
 662           {
 663             int next_change;
 664             immediate_quit = 0;
 665             while (region_cache_backward
 666                    (current_buffer, newline_cache, start_byte, &next_change))
 667               start_byte = next_change;
 668             immediate_quit = allow_quit;
 669
 670             /* Start should never be at or before end.  */
 671             if (start_byte <= ceiling_byte)
 672               start_byte = ceiling_byte + 1;
 673
 674             /* Now the text before start is an unknown region, and
 675                next_change is the position of the next known region. */
 676             ceiling_byte = max (next_change, ceiling_byte);
 677           }
 678
 679         /* Stop scanning before the gap.  */
 680         tem = BUFFER_FLOOR_OF (start_byte - 1);
 681         ceiling_byte = max (tem, ceiling_byte);
 682
 683         {
 684           /* The termination address of the dumb loop.  */
 685           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 686           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 687           unsigned char *base = cursor;
 688
 689           while (cursor >= ceiling_addr)
 690             {
 691               unsigned char *scan_start = cursor;
 692
 693               while (*cursor != target && --cursor >= ceiling_addr)
 694                 ;
 695
 696               /* If we're looking for newlines, cache the fact that
 697                  the region from after the cursor to start is free of them.  */
 698               if (target == '\n' && newline_cache)
 699                 know_region_cache (current_buffer, newline_cache,
 700                                    start_byte + cursor - base,
 701                                    start_byte + scan_start - base);
 702
 703               /* Did we find the target character?  */
 704               if (cursor >= ceiling_addr)
 705                 {
 706                   if (++count >= 0)
 707                     {
 708                       immediate_quit = 0;
 709                       return BYTE_TO_CHAR (start_byte + cursor - base);
 710                     }
 711                   cursor--;
 712                 }
 713             }
 714
 715           start = BYTE_TO_CHAR (start_byte + cursor - base);
 716         }
 717       }
 718
 719   immediate_quit = 0;
 720   if (shortage != 0)
 721     *shortage = count * direction;
 722   return start;
 723 }
 724 \f
 725 /* Search for COUNT instances of a line boundary, which means either a
 726    newline or (if selective display enabled) a carriage return.
 727    Start at START.  If COUNT is negative, search backwards.
 728
 729    We report the resulting position by calling TEMP_SET_PT_BOTH.
 730
 731    If we find COUNT instances. we position after (always after,
 732    even if scanning backwards) the COUNTth match, and return 0.
 733
 734    If we don't find COUNT instances before reaching the end of the
 735    buffer (or the beginning, if scanning backwards), we return
 736    the number of line boundaries left unfound, and position at
 737    the limit we bumped up against.
 738
 739    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 740    except in special cases.  */
 741
 742 int
 743 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 744      int start, start_byte;
 745      int limit, limit_byte;
 746      register int count;
 747      int allow_quit;
 748 {
 749   int direction = ((count > 0) ? 1 : -1);
 750
 751   register unsigned char *cursor;
 752   unsigned char *base;
 753
 754   register int ceiling;
 755   register unsigned char *ceiling_addr;
 756
 757   int old_immediate_quit = immediate_quit;
 758
 759   /* The code that follows is like scan_buffer
 760      but checks for either newline or carriage return.  */
 761
 762   if (allow_quit)
 763     immediate_quit++;
 764
 765   start_byte = CHAR_TO_BYTE (start);
 766
 767   if (count > 0)
 768     {
 769       while (start_byte < limit_byte)
 770         {
 771           ceiling =  BUFFER_CEILING_OF (start_byte);
 772           ceiling = min (limit_byte - 1, ceiling);
 773           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 774           base = (cursor = BYTE_POS_ADDR (start_byte));
 775           while (1)
 776             {
 777               while (*cursor != '\n' && ++cursor != ceiling_addr)
 778                 ;
 779
 780               if (cursor != ceiling_addr)
 781                 {
 782                   if (--count == 0)
 783                     {
 784                       immediate_quit = old_immediate_quit;
 785                       start_byte = start_byte + cursor - base + 1;
 786                       start = BYTE_TO_CHAR (start_byte);
 787                       TEMP_SET_PT_BOTH (start, start_byte);
 788                       return 0;
 789                     }
 790                   else
 791                     if (++cursor == ceiling_addr)
 792                       break;
 793                 }
 794               else
 795                 break;
 796             }
 797           start_byte += cursor - base;
 798         }
 799     }
 800   else
 801     {
 802       while (start_byte > limit_byte)
 803         {
 804           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 805           ceiling = max (limit_byte, ceiling);
 806           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 807           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 808           while (1)
 809             {
 810               while (--cursor != ceiling_addr && *cursor != '\n')
 811                 ;
 812
 813               if (cursor != ceiling_addr)
 814                 {
 815                   if (++count == 0)
 816                     {
 817                       immediate_quit = old_immediate_quit;
 818                       /* Return the position AFTER the match we found.  */
 819                       start_byte = start_byte + cursor - base + 1;
 820                       start = BYTE_TO_CHAR (start_byte);
 821                       TEMP_SET_PT_BOTH (start, start_byte);
 822                       return 0;
 823                     }
 824                 }
 825               else
 826                 break;
 827             }
 828           /* Here we add 1 to compensate for the last decrement
 829              of CURSOR, which took it past the valid range.  */
 830           start_byte += cursor - base + 1;
 831         }
 832     }
 833
 834   TEMP_SET_PT_BOTH (limit, limit_byte);
 835   immediate_quit = old_immediate_quit;
 836
 837   return count * direction;
 838 }
 839
 840 int
 841 find_next_newline_no_quit (from, cnt)
 842      register int from, cnt;
 843 {
 844   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 845 }
 846
 847 /* Like find_next_newline, but returns position before the newline,
 848    not after, and only search up to TO.  This isn't just
 849    find_next_newline (...)-1, because you might hit TO.  */
 850
 851 int
 852 find_before_next_newline (from, to, cnt)
 853      int from, to, cnt;
 854 {
 855   int shortage;
 856   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 857
 858   if (shortage == 0)
 859     pos--;
 860
 861   return pos;
 862 }
 863 \f
 864 /* Subroutines of Lisp buffer search functions. */
 865
 866 static Lisp_Object
 867 search_command (string, bound, noerror, count, direction, RE, posix)
 868      Lisp_Object string, bound, noerror, count;
 869      int direction;
 870      int RE;
 871      int posix;
 872 {
 873   register int np;
 874   int lim, lim_byte;
 875   int n = direction;
 876
 877   if (!NILP (count))
 878     {
 879       CHECK_NUMBER (count);
 880       n *= XINT (count);
 881     }
 882
 883   CHECK_STRING (string);
 884   if (NILP (bound))
 885     {
 886       if (n > 0)
 887         lim = ZV, lim_byte = ZV_BYTE;
 888       else
 889         lim = BEGV, lim_byte = BEGV_BYTE;
 890     }
 891   else
 892     {
 893       CHECK_NUMBER_COERCE_MARKER (bound);
 894       lim = XINT (bound);
 895       if (n > 0 ? lim < PT : lim > PT)
 896         error ("Invalid search bound (wrong side of point)");
 897       if (lim > ZV)
 898         lim = ZV, lim_byte = ZV_BYTE;
 899       else if (lim < BEGV)
 900         lim = BEGV, lim_byte = BEGV_BYTE;
 901       else
 902         lim_byte = CHAR_TO_BYTE (lim);
 903     }
 904
 905   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 906                       (!NILP (current_buffer->case_fold_search)
 907                        ? current_buffer->case_canon_table
 908                        : Qnil),
 909                       (!NILP (current_buffer->case_fold_search)
 910                        ? current_buffer->case_eqv_table
 911                        : Qnil),
 912                       posix);
 913   if (np <= 0)
 914     {
 915       if (NILP (noerror))
 916         return signal_failure (string);
 917       if (!EQ (noerror, Qt))
 918         {
 919           if (lim < BEGV || lim > ZV)
 920             abort ();
 921           SET_PT_BOTH (lim, lim_byte);
 922           return Qnil;
 923 #if 0 /* This would be clean, but maybe programs depend on
 924          a value of nil here.  */
 925           np = lim;
 926 #endif
 927         }
 928       else
 929         return Qnil;
 930     }
 931
 932   if (np < BEGV || np > ZV)
 933     abort ();
 934
 935   SET_PT (np);
 936
 937   return make_number (np);
 938 }
 939 \f
 940 /* Return 1 if REGEXP it matches just one constant string.  */
 941
 942 static int
 943 trivial_regexp_p (regexp)
 944      Lisp_Object regexp;
 945 {
 946   int len = SBYTES (regexp);
 947   unsigned char *s = SDATA (regexp);
 948   while (--len >= 0)
 949     {
 950       switch (*s++)
 951         {
 952         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 953           return 0;
 954         case '\\':
 955           if (--len < 0)
 956             return 0;
 957           switch (*s++)
 958             {
 959             case '|': case '(': case ')': case '`': case '\'': case 'b':
 960             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 961             case 'S': case '=': case '{': case '}': case '_':
 962             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 963             case '1': case '2': case '3': case '4': case '5':
 964             case '6': case '7': case '8': case '9':
 965               return 0;
 966             }
 967         }
 968     }
 969   return 1;
 970 }
 971
 972 /* Search for the n'th occurrence of STRING in the current buffer,
 973    starting at position POS and stopping at position LIM,
 974    treating STRING as a literal string if RE is false or as
 975    a regular expression if RE is true.
 976
 977    If N is positive, searching is forward and LIM must be greater than POS.
 978    If N is negative, searching is backward and LIM must be less than POS.
 979
 980    Returns -x if x occurrences remain to be found (x > 0),
 981    or else the position at the beginning of the Nth occurrence
 982    (if searching backward) or the end (if searching forward).
 983
 984    POSIX is nonzero if we want full backtracking (POSIX style)
 985    for this pattern.  0 means backtrack only enough to get a valid match.  */
 986
 987 #define TRANSLATE(out, trt, d)                  \
 988 do                                              \
 989   {                                             \
 990     if (! NILP (trt))                           \
 991       {                                         \
 992         Lisp_Object temp;                       \
 993         temp = Faref (trt, make_number (d));    \
 994         if (INTEGERP (temp))                    \
 995           out = XINT (temp);                    \
 996         else                                    \
 997           out = d;                              \
 998       }                                         \
 999     else                                        \
1000       out = d;                                  \
1001   }                                             \
1002 while (0)
1003
1004 static int
1005 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1006                RE, trt, inverse_trt, posix)
1007      Lisp_Object string;
1008      int pos;
1009      int pos_byte;
1010      int lim;
1011      int lim_byte;
1012      int n;
1013      int RE;
1014      Lisp_Object trt;
1015      Lisp_Object inverse_trt;
1016      int posix;
1017 {
1018   int len = SCHARS (string);
1019   int len_byte = SBYTES (string);
1020   register int i;
1021
1022   if (running_asynch_code)
1023     save_search_regs ();
1024
1025   /* Searching 0 times means don't move.  */
1026   /* Null string is found at starting position.  */
1027   if (len == 0 || n == 0)
1028     {
1029       set_search_regs (pos_byte, 0);
1030       return pos;
1031     }
1032
1033   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1034     {
1035       unsigned char *p1, *p2;
1036       int s1, s2;
1037       struct re_pattern_buffer *bufp;
1038
1039       bufp = compile_pattern (string, &search_regs, trt, posix,
1040                               !NILP (current_buffer->enable_multibyte_characters));
1041
1042       immediate_quit = 1;       /* Quit immediately if user types ^G,
1043                                    because letting this function finish
1044                                    can take too long. */
1045       QUIT;                     /* Do a pending quit right away,
1046                                    to avoid paradoxical behavior */
1047       /* Get pointers and sizes of the two strings
1048          that make up the visible portion of the buffer. */
1049
1050       p1 = BEGV_ADDR;
1051       s1 = GPT_BYTE - BEGV_BYTE;
1052       p2 = GAP_END_ADDR;
1053       s2 = ZV_BYTE - GPT_BYTE;
1054       if (s1 < 0)
1055         {
1056           p2 = p1;
1057           s2 = ZV_BYTE - BEGV_BYTE;
1058           s1 = 0;
1059         }
1060       if (s2 < 0)
1061         {
1062           s1 = ZV_BYTE - BEGV_BYTE;
1063           s2 = 0;
1064         }
1065       re_match_object = Qnil;
1066
1067       while (n < 0)
1068         {
1069           int val;
1070           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1071                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1072                              &search_regs,
1073                              /* Don't allow match past current point */
1074                              pos_byte - BEGV_BYTE);
1075           if (val == -2)
1076             {
1077               matcher_overflow ();
1078             }
1079           if (val >= 0)
1080             {
1081               pos_byte = search_regs.start[0] + BEGV_BYTE;
1082               for (i = 0; i < search_regs.num_regs; i++)
1083                 if (search_regs.start[i] >= 0)
1084                   {
1085                     search_regs.start[i]
1086                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1087                     search_regs.end[i]
1088                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1089                   }
1090               XSETBUFFER (last_thing_searched, current_buffer);
1091               /* Set pos to the new position. */
1092               pos = search_regs.start[0];
1093             }
1094           else
1095             {
1096               immediate_quit = 0;
1097               return (n);
1098             }
1099           n++;
1100         }
1101       while (n > 0)
1102         {
1103           int val;
1104           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1105                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106                              &search_regs,
1107                              lim_byte - BEGV_BYTE);
1108           if (val == -2)
1109             {
1110               matcher_overflow ();
1111             }
1112           if (val >= 0)
1113             {
1114               pos_byte = search_regs.end[0] + BEGV_BYTE;
1115               for (i = 0; i < search_regs.num_regs; i++)
1116                 if (search_regs.start[i] >= 0)
1117                   {
1118                     search_regs.start[i]
1119                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1120                     search_regs.end[i]
1121                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1122                   }
1123               XSETBUFFER (last_thing_searched, current_buffer);
1124               pos = search_regs.end[0];
1125             }
1126           else
1127             {
1128               immediate_quit = 0;
1129               return (0 - n);
1130             }
1131           n--;
1132         }
1133       immediate_quit = 0;
1134       return (pos);
1135     }
1136   else                          /* non-RE case */
1137     {
1138       unsigned char *raw_pattern, *pat;
1139       int raw_pattern_size;
1140       int raw_pattern_size_byte;
1141       unsigned char *patbuf;
1142       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1143       unsigned char *base_pat;
1144       /* Set to positive if we find a non-ASCII char that need
1145          translation.  Otherwise set to zero later.  */
1146       int char_base = -1;
1147       int boyer_moore_ok = 1;
1148
1149       /* MULTIBYTE says whether the text to be searched is multibyte.
1150          We must convert PATTERN to match that, or we will not really
1151          find things right.  */
1152
1153       if (multibyte == STRING_MULTIBYTE (string))
1154         {
1155           raw_pattern = (unsigned char *) SDATA (string);
1156           raw_pattern_size = SCHARS (string);
1157           raw_pattern_size_byte = SBYTES (string);
1158         }
1159       else if (multibyte)
1160         {
1161           raw_pattern_size = SCHARS (string);
1162           raw_pattern_size_byte
1163             = count_size_as_multibyte (SDATA (string),
1164                                        raw_pattern_size);
1165           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1166           copy_text (SDATA (string), raw_pattern,
1167                      SCHARS (string), 0, 1);
1168         }
1169       else
1170         {
1171           /* Converting multibyte to single-byte.
1172
1173              ??? Perhaps this conversion should be done in a special way
1174              by subtracting nonascii-insert-offset from each non-ASCII char,
1175              so that only the multibyte chars which really correspond to
1176              the chosen single-byte character set can possibly match.  */
1177           raw_pattern_size = SCHARS (string);
1178           raw_pattern_size_byte = SCHARS (string);
1179           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1180           copy_text (SDATA (string), raw_pattern,
1181                      SBYTES (string), 1, 0);
1182         }
1183
1184       /* Copy and optionally translate the pattern.  */
1185       len = raw_pattern_size;
1186       len_byte = raw_pattern_size_byte;
1187       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1188       pat = patbuf;
1189       base_pat = raw_pattern;
1190       if (multibyte)
1191         {
1192           /* Fill patbuf by translated characters in STRING while
1193              checking if we can use boyer-moore search.  If TRT is
1194              non-nil, we can use boyer-moore search only if TRT can be
1195              represented by the byte array of 256 elements.  For that,
1196              all non-ASCII case-equivalents of all case-senstive
1197              characters in STRING must belong to the same charset and
1198              row.  */
1199
1200           while (--len >= 0)
1201             {
1202               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1203               int c, translated, inverse;
1204               int in_charlen, charlen;
1205
1206               /* If we got here and the RE flag is set, it's because we're
1207                  dealing with a regexp known to be trivial, so the backslash
1208                  just quotes the next character.  */
1209               if (RE && *base_pat == '\\')
1210                 {
1211                   len--;
1212                   raw_pattern_size--;
1213                   len_byte--;
1214                   base_pat++;
1215                 }
1216
1217               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1218
1219               if (NILP (trt))
1220                 {
1221                   str = base_pat;
1222                   charlen = in_charlen;
1223                 }
1224               else
1225                 {
1226                   /* Translate the character.  */
1227                   TRANSLATE (translated, trt, c);
1228                   charlen = CHAR_STRING (translated, str_base);
1229                   str = str_base;
1230
1231                   /* Check if C has any other case-equivalents.  */
1232                   TRANSLATE (inverse, inverse_trt, c);
1233                   /* If so, check if we can use boyer-moore.  */
1234                   if (c != inverse && boyer_moore_ok)
1235                     {
1236                       /* Check if all equivalents belong to the same
1237                          group of characters.  Note that the check of C
1238                          itself is done by the last iteration.  */
1239                       int this_char_base = -1;
1240
1241                       while (boyer_moore_ok)
1242                         {
1243                           if (ASCII_BYTE_P (inverse))
1244                             {
1245                               if (this_char_base > 0)
1246                                 boyer_moore_ok = 0;
1247                               else
1248                                 {
1249                                   this_char_base = 0;
1250                                   if (char_base < 0)
1251                                     char_base = this_char_base;
1252                                 }
1253                             }
1254                           else if (CHAR_BYTE8_P (inverse))
1255                             /* Boyer-moore search can't handle a
1256                                translation of an eight-bit
1257                                character.  */
1258                             boyer_moore_ok = 0;
1259                           else if (this_char_base < 0)
1260                             {
1261                               this_char_base = inverse & ~0x3F;
1262                               if (char_base < 0)
1263                                 char_base = this_char_base;
1264                               else if (char_base > 0
1265                                        && this_char_base != char_base)
1266                                 boyer_moore_ok = 0;
1267                             }
1268                           else if ((inverse & ~0x3F) != this_char_base)
1269                             boyer_moore_ok = 0;
1270                           if (c == inverse)
1271                             break;
1272                           TRANSLATE (inverse, inverse_trt, inverse);
1273                         }
1274                     }
1275                 }
1276               if (char_base < 0)
1277                 char_base = 0;
1278
1279               /* Store this character into the translated pattern.  */
1280               bcopy (str, pat, charlen);
1281               pat += charlen;
1282               base_pat += in_charlen;
1283               len_byte -= in_charlen;
1284             }
1285         }
1286       else
1287         {
1288           /* Unibyte buffer.  */
1289           char_base = 0;
1290           while (--len >= 0)
1291             {
1292               int c, translated;
1293
1294               /* If we got here and the RE flag is set, it's because we're
1295                  dealing with a regexp known to be trivial, so the backslash
1296                  just quotes the next character.  */
1297               if (RE && *base_pat == '\\')
1298                 {
1299                   len--;
1300                   raw_pattern_size--;
1301                   base_pat++;
1302                 }
1303               c = *base_pat++;
1304               TRANSLATE (translated, trt, c);
1305               *pat++ = translated;
1306             }
1307         }
1308
1309       len_byte = pat - patbuf;
1310       len = raw_pattern_size;
1311       pat = base_pat = patbuf;
1312
1313       if (boyer_moore_ok)
1314         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1315                             pos, pos_byte, lim, lim_byte,
1316                             char_base);
1317       else
1318         return simple_search (n, pat, len, len_byte, trt,
1319                               pos, pos_byte, lim, lim_byte);
1320     }
1321 }
1322 \f
1323 /* Do a simple string search N times for the string PAT,
1324    whose length is LEN/LEN_BYTE,
1325    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1326    TRT is the translation table.
1327
1328    Return the character position where the match is found.
1329    Otherwise, if M matches remained to be found, return -M.
1330
1331    This kind of search works regardless of what is in PAT and
1332    regardless of what is in TRT.  It is used in cases where
1333    boyer_moore cannot work.  */
1334
1335 static int
1336 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1337      int n;
1338      unsigned char *pat;
1339      int len, len_byte;
1340      Lisp_Object trt;
1341      int pos, pos_byte;
1342      int lim, lim_byte;
1343 {
1344   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1345   int forward = n > 0;
1346   /* Number of buffer bytes matched.  Note that this may be different
1347      from len_byte in a multibyte buffer.  */
1348   int match_byte;
1349
1350   if (lim > pos && multibyte)
1351     while (n > 0)
1352       {
1353         while (1)
1354           {
1355             /* Try matching at position POS.  */
1356             int this_pos = pos;
1357             int this_pos_byte = pos_byte;
1358             int this_len = len;
1359             int this_len_byte = len_byte;
1360             unsigned char *p = pat;
1361             if (pos + len > lim)
1362               goto stop;
1363
1364             while (this_len > 0)
1365               {
1366                 int charlen, buf_charlen;
1367                 int pat_ch, buf_ch;
1368
1369                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1370                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1371                                                  ZV_BYTE - this_pos_byte,
1372                                                  buf_charlen);
1373                 TRANSLATE (buf_ch, trt, buf_ch);
1374
1375                 if (buf_ch != pat_ch)
1376                   break;
1377
1378                 this_len_byte -= charlen;
1379                 this_len--;
1380                 p += charlen;
1381
1382                 this_pos_byte += buf_charlen;
1383                 this_pos++;
1384               }
1385
1386             if (this_len == 0)
1387               {
1388                 match_byte = this_pos_byte - pos_byte;
1389                 pos += len;
1390                 pos_byte += match_byte;
1391                 break;
1392               }
1393
1394             INC_BOTH (pos, pos_byte);
1395           }
1396
1397         n--;
1398       }
1399   else if (lim > pos)
1400     while (n > 0)
1401       {
1402         while (1)
1403           {
1404             /* Try matching at position POS.  */
1405             int this_pos = pos;
1406             int this_len = len;
1407             unsigned char *p = pat;
1408
1409             if (pos + len > lim)
1410               goto stop;
1411
1412             while (this_len > 0)
1413               {
1414                 int pat_ch = *p++;
1415                 int buf_ch = FETCH_BYTE (this_pos);
1416                 TRANSLATE (buf_ch, trt, buf_ch);
1417
1418                 if (buf_ch != pat_ch)
1419                   break;
1420
1421                 this_len--;
1422                 this_pos++;
1423               }
1424
1425             if (this_len == 0)
1426               {
1427                 match_byte = len;
1428                 pos += len;
1429                 break;
1430               }
1431
1432             pos++;
1433           }
1434
1435         n--;
1436       }
1437   /* Backwards search.  */
1438   else if (lim < pos && multibyte)
1439     while (n < 0)
1440       {
1441         while (1)
1442           {
1443             /* Try matching at position POS.  */
1444             int this_pos = pos - len;
1445             int this_pos_byte;
1446             int this_len = len;
1447             int this_len_byte = len_byte;
1448             unsigned char *p = pat;
1449
1450             if (pos - len < lim)
1451               goto stop;
1452             this_pos_byte = CHAR_TO_BYTE (this_pos);
1453             match_byte = pos_byte - this_pos_byte;
1454
1455             while (this_len > 0)
1456               {
1457                 int charlen, buf_charlen;
1458                 int pat_ch, buf_ch;
1459
1460                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1461                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1462                                                  ZV_BYTE - this_pos_byte,
1463                                                  buf_charlen);
1464                 TRANSLATE (buf_ch, trt, buf_ch);
1465
1466                 if (buf_ch != pat_ch)
1467                   break;
1468
1469                 this_len_byte -= charlen;
1470                 this_len--;
1471                 p += charlen;
1472                 this_pos_byte += buf_charlen;
1473                 this_pos++;
1474               }
1475
1476             if (this_len == 0)
1477               {
1478                 pos -= len;
1479                 pos_byte -= match_byte;
1480                 break;
1481               }
1482
1483             DEC_BOTH (pos, pos_byte);
1484           }
1485
1486         n++;
1487       }
1488   else if (lim < pos)
1489     while (n < 0)
1490       {
1491         while (1)
1492           {
1493             /* Try matching at position POS.  */
1494             int this_pos = pos - len;
1495             int this_len = len;
1496             unsigned char *p = pat;
1497
1498             if (pos - len < lim)
1499               goto stop;
1500
1501             while (this_len > 0)
1502               {
1503                 int pat_ch = *p++;
1504                 int buf_ch = FETCH_BYTE (this_pos);
1505                 TRANSLATE (buf_ch, trt, buf_ch);
1506
1507                 if (buf_ch != pat_ch)
1508                   break;
1509                 this_len--;
1510                 this_pos++;
1511               }
1512
1513             if (this_len == 0)
1514               {
1515                 match_byte = len;
1516                 pos -= len;
1517                 break;
1518               }
1519
1520             pos--;
1521           }
1522
1523         n++;
1524       }
1525
1526  stop:
1527   if (n == 0)
1528     {
1529       if (forward)
1530         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1531       else
1532         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1533
1534       return pos;
1535     }
1536   else if (n > 0)
1537     return -n;
1538   else
1539     return n;
1540 }
1541 \f
1542 /* Do Boyer-Moore search N times for the string BASE_PAT,
1543    whose length is LEN/LEN_BYTE,
1544    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1545    DIRECTION says which direction we search in.
1546    TRT and INVERSE_TRT are translation tables.
1547    Characters in PAT are already translated by TRT.
1548
1549    This kind of search works if all the characters in BASE_PAT that
1550    have nontrivial translation are the same aside from the last byte.
1551    This makes it possible to translate just the last byte of a
1552    character, and do so after just a simple test of the context.
1553    CHAR_BASE is nonzero iff there is such a non-ASCII character.
1554
1555    If that criterion is not satisfied, do not call this function.  */
1556
1557 static int
1558 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1559              pos, pos_byte, lim, lim_byte, char_base)
1560      int n;
1561      unsigned char *base_pat;
1562      int len, len_byte;
1563      Lisp_Object trt;
1564      Lisp_Object inverse_trt;
1565      int pos, pos_byte;
1566      int lim, lim_byte;
1567      int char_base;
1568 {
1569   int direction = ((n > 0) ? 1 : -1);
1570   register int dirlen;
1571   int infinity, limit, stride_for_teases = 0;
1572   register int *BM_tab;
1573   int *BM_tab_base;
1574   register unsigned char *cursor, *p_limit;
1575   register int i, j;
1576   unsigned char *pat, *pat_end;
1577   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1578
1579   unsigned char simple_translate[0400];
1580   /* These are set to the preceding bytes of a byte to be translated
1581      if char_base is nonzero.  As the maximum byte length of a
1582      multibyte character is 5, we have to check at most four previous
1583      bytes.  */
1584   int translate_prev_byte1 = 0;
1585   int translate_prev_byte2 = 0;
1586   int translate_prev_byte3 = 0;
1587   int translate_prev_byte4 = 0;
1588
1589 #ifdef C_ALLOCA
1590   int BM_tab_space[0400];
1591   BM_tab = &BM_tab_space[0];
1592 #else
1593   BM_tab = (int *) alloca (0400 * sizeof (int));
1594 #endif
1595   /* The general approach is that we are going to maintain that we know */
1596   /* the first (closest to the present position, in whatever direction */
1597   /* we're searching) character that could possibly be the last */
1598   /* (furthest from present position) character of a valid match.  We */
1599   /* advance the state of our knowledge by looking at that character */
1600   /* and seeing whether it indeed matches the last character of the */
1601   /* pattern.  If it does, we take a closer look.  If it does not, we */
1602   /* move our pointer (to putative last characters) as far as is */
1603   /* logically possible.  This amount of movement, which I call a */
1604   /* stride, will be the length of the pattern if the actual character */
1605   /* appears nowhere in the pattern, otherwise it will be the distance */
1606   /* from the last occurrence of that character to the end of the */
1607   /* pattern. */
1608   /* As a coding trick, an enormous stride is coded into the table for */
1609   /* characters that match the last character.  This allows use of only */
1610   /* a single test, a test for having gone past the end of the */
1611   /* permissible match region, to test for both possible matches (when */
1612   /* the stride goes past the end immediately) and failure to */
1613   /* match (where you get nudged past the end one stride at a time). */
1614
1615   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1616   /* is determined only by the last character of the putative match. */
1617   /* If that character does not match, we will stride the proper */
1618   /* distance to propose a match that superimposes it on the last */
1619   /* instance of a character that matches it (per trt), or misses */
1620   /* it entirely if there is none. */
1621
1622   dirlen = len_byte * direction;
1623   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1624
1625   /* Record position after the end of the pattern.  */
1626   pat_end = base_pat + len_byte;
1627   /* BASE_PAT points to a character that we start scanning from.
1628      It is the first character in a forward search,
1629      the last character in a backward search.  */
1630   if (direction < 0)
1631     base_pat = pat_end - 1;
1632
1633   BM_tab_base = BM_tab;
1634   BM_tab += 0400;
1635   j = dirlen;           /* to get it in a register */
1636   /* A character that does not appear in the pattern induces a */
1637   /* stride equal to the pattern length. */
1638   while (BM_tab_base != BM_tab)
1639     {
1640       *--BM_tab = j;
1641       *--BM_tab = j;
1642       *--BM_tab = j;
1643       *--BM_tab = j;
1644     }
1645
1646   /* We use this for translation, instead of TRT itself.
1647      We fill this in to handle the characters that actually
1648      occur in the pattern.  Others don't matter anyway!  */
1649   bzero (simple_translate, sizeof simple_translate);
1650   for (i = 0; i < 0400; i++)
1651     simple_translate[i] = i;
1652
1653   if (char_base)
1654     {
1655       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1656          byte following them are the target of translation.  */
1657       unsigned char str[MAX_MULTIBYTE_LENGTH];
1658       int len = CHAR_STRING (char_base, str);
1659
1660       translate_prev_byte1 = str[len - 2];
1661       if (len > 2)
1662         {
1663           translate_prev_byte2 = str[len - 3];
1664           if (len > 3)
1665             {
1666               translate_prev_byte3 = str[len - 4];
1667               if (len > 4)
1668                 translate_prev_byte4 = str[len - 5];
1669             }
1670         }
1671     }
1672
1673   i = 0;
1674   while (i != infinity)
1675     {
1676       unsigned char *ptr = base_pat + i;
1677       i += direction;
1678       if (i == dirlen)
1679         i = infinity;
1680       if (! NILP (trt))
1681         {
1682           /* If the byte currently looking at is the last of a
1683              character to check case-equivalents, set CH to that
1684              character.  An ASCII character and a non-ASCII character
1685              matching with CHAR_BASE are to be checked.  */
1686           int ch = -1;
1687
1688           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1689             ch = *ptr;
1690           else if (char_base
1691                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1692             {
1693               unsigned char *charstart = ptr - 1;
1694
1695               while (! (CHAR_HEAD_P (*charstart)))
1696                 charstart--;
1697               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1698               if (char_base != (ch & ~0x3F))
1699                 ch = -1;
1700             }
1701
1702           if (ch > 0400)
1703             j = (ch & 0x3F) | 0200;
1704           else
1705             j = *ptr;
1706
1707           if (i == infinity)
1708             stride_for_teases = BM_tab[j];
1709
1710           BM_tab[j] = dirlen - i;
1711           /* A translation table is accompanied by its inverse -- see */
1712           /* comment following downcase_table for details */
1713           if (ch >= 0)
1714             {
1715               int starting_ch = ch;
1716               int starting_j = j;
1717
1718               while (1)
1719                 {
1720                   TRANSLATE (ch, inverse_trt, ch);
1721                   if (ch > 0400)
1722                     j = (ch & 0x3F) | 0200;
1723                   else
1724                     j = ch;
1725
1726                   /* For all the characters that map into CH,
1727                      set up simple_translate to map the last byte
1728                      into STARTING_J.  */
1729                   simple_translate[j] = starting_j;
1730                   if (ch == starting_ch)
1731                     break;
1732                   BM_tab[j] = dirlen - i;
1733                 }
1734             }
1735         }
1736       else
1737         {
1738           j = *ptr;
1739
1740           if (i == infinity)
1741             stride_for_teases = BM_tab[j];
1742           BM_tab[j] = dirlen - i;
1743         }
1744       /* stride_for_teases tells how much to stride if we get a */
1745       /* match on the far character but are subsequently */
1746       /* disappointed, by recording what the stride would have been */
1747       /* for that character if the last character had been */
1748       /* different. */
1749     }
1750   infinity = dirlen - infinity;
1751   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1752   /* loop invariant - POS_BYTE points at where last char (first
1753      char if reverse) of pattern would align in a possible match.  */
1754   while (n != 0)
1755     {
1756       int tail_end;
1757       unsigned char *tail_end_ptr;
1758
1759       /* It's been reported that some (broken) compiler thinks that
1760          Boolean expressions in an arithmetic context are unsigned.
1761          Using an explicit ?1:0 prevents this.  */
1762       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1763           < 0)
1764         return (n * (0 - direction));
1765       /* First we do the part we can by pointers (maybe nothing) */
1766       QUIT;
1767       pat = base_pat;
1768       limit = pos_byte - dirlen + direction;
1769       if (direction > 0)
1770         {
1771           limit = BUFFER_CEILING_OF (limit);
1772           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1773              can take on without hitting edge of buffer or the gap.  */
1774           limit = min (limit, pos_byte + 20000);
1775           limit = min (limit, lim_byte - 1);
1776         }
1777       else
1778         {
1779           limit = BUFFER_FLOOR_OF (limit);
1780           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1781              can take on without hitting edge of buffer or the gap.  */
1782           limit = max (limit, pos_byte - 20000);
1783           limit = max (limit, lim_byte);
1784         }
1785       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1786       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1787
1788       if ((limit - pos_byte) * direction > 20)
1789         {
1790           unsigned char *p2;
1791
1792           p_limit = BYTE_POS_ADDR (limit);
1793           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1794           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1795           while (1)             /* use one cursor setting as long as i can */
1796             {
1797               if (direction > 0) /* worth duplicating */
1798                 {
1799                   /* Use signed comparison if appropriate
1800                      to make cursor+infinity sure to be > p_limit.
1801                      Assuming that the buffer lies in a range of addresses
1802                      that are all "positive" (as ints) or all "negative",
1803                      either kind of comparison will work as long
1804                      as we don't step by infinity.  So pick the kind
1805                      that works when we do step by infinity.  */
1806                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1807                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1808                       cursor += BM_tab[*cursor];
1809                   else
1810                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1811                       cursor += BM_tab[*cursor];
1812                 }
1813               else
1814                 {
1815                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1816                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1817                       cursor += BM_tab[*cursor];
1818                   else
1819                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1820                       cursor += BM_tab[*cursor];
1821                 }
1822 /* If you are here, cursor is beyond the end of the searched region. */
1823 /* This can happen if you match on the far character of the pattern, */
1824 /* because the "stride" of that character is infinity, a number able */
1825 /* to throw you well beyond the end of the search.  It can also */
1826 /* happen if you fail to match within the permitted region and would */
1827 /* otherwise try a character beyond that region */
1828               if ((cursor - p_limit) * direction <= len_byte)
1829                 break;  /* a small overrun is genuine */
1830               cursor -= infinity; /* large overrun = hit */
1831               i = dirlen - direction;
1832               if (! NILP (trt))
1833                 {
1834                   while ((i -= direction) + direction != 0)
1835                     {
1836                       int ch;
1837                       cursor -= direction;
1838                       /* Translate only the last byte of a character.  */
1839                       if (! multibyte
1840                           || ((cursor == tail_end_ptr
1841                                || CHAR_HEAD_P (cursor[1]))
1842                               && (CHAR_HEAD_P (cursor[0])
1843                                   /* Check if this is the last byte of
1844                                      a translable character.  */
1845                                   || (translate_prev_byte1 == cursor[-1]
1846                                       && (CHAR_HEAD_P (translate_prev_byte1)
1847                                           || (translate_prev_byte2 == cursor[-2]
1848                                               && (CHAR_HEAD_P (translate_prev_byte2)
1849                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1850                         ch = simple_translate[*cursor];
1851                       else
1852                         ch = *cursor;
1853                       if (pat[i] != ch)
1854                         break;
1855                     }
1856                 }
1857               else
1858                 {
1859                   while ((i -= direction) + direction != 0)
1860                     {
1861                       cursor -= direction;
1862                       if (pat[i] != *cursor)
1863                         break;
1864                     }
1865                 }
1866               cursor += dirlen - i - direction; /* fix cursor */
1867               if (i + direction == 0)
1868                 {
1869                   int position;
1870
1871                   cursor -= direction;
1872
1873                   position = pos_byte + cursor - p2 + ((direction > 0)
1874                                                        ? 1 - len_byte : 0);
1875                   set_search_regs (position, len_byte);
1876
1877                   if ((n -= direction) != 0)
1878                     cursor += dirlen; /* to resume search */
1879                   else
1880                     return ((direction > 0)
1881                             ? search_regs.end[0] : search_regs.start[0]);
1882                 }
1883               else
1884                 cursor += stride_for_teases; /* <sigh> we lose -  */
1885             }
1886           pos_byte += cursor - p2;
1887         }
1888       else
1889         /* Now we'll pick up a clump that has to be done the hard */
1890         /* way because it covers a discontinuity */
1891         {
1892           limit = ((direction > 0)
1893                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1894                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1895           limit = ((direction > 0)
1896                    ? min (limit + len_byte, lim_byte - 1)
1897                    : max (limit - len_byte, lim_byte));
1898           /* LIMIT is now the last value POS_BYTE can have
1899              and still be valid for a possible match.  */
1900           while (1)
1901             {
1902               /* This loop can be coded for space rather than */
1903               /* speed because it will usually run only once. */
1904               /* (the reach is at most len + 21, and typically */
1905               /* does not exceed len) */
1906               while ((limit - pos_byte) * direction >= 0)
1907                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1908               /* now run the same tests to distinguish going off the */
1909               /* end, a match or a phony match. */
1910               if ((pos_byte - limit) * direction <= len_byte)
1911                 break;  /* ran off the end */
1912               /* Found what might be a match.
1913                  Set POS_BYTE back to last (first if reverse) pos.  */
1914               pos_byte -= infinity;
1915               i = dirlen - direction;
1916               while ((i -= direction) + direction != 0)
1917                 {
1918                   int ch;
1919                   unsigned char *ptr;
1920                   pos_byte -= direction;
1921                   ptr = BYTE_POS_ADDR (pos_byte);
1922                   /* Translate only the last byte of a character.  */
1923                   if (! multibyte
1924                       || ((ptr == tail_end_ptr
1925                            || CHAR_HEAD_P (ptr[1]))
1926                           && (CHAR_HEAD_P (ptr[0])
1927                               /* Check if this is the last byte of a
1928                                  translable character.  */
1929                               || (translate_prev_byte1 == ptr[-1]
1930                                   && (CHAR_HEAD_P (translate_prev_byte1)
1931                                       || (translate_prev_byte2 == ptr[-2]
1932                                           && (CHAR_HEAD_P (translate_prev_byte2)
1933                                               || translate_prev_byte3 == ptr[-3])))))))
1934                     ch = simple_translate[*ptr];
1935                   else
1936                     ch = *ptr;
1937                   if (pat[i] != ch)
1938                     break;
1939                 }
1940               /* Above loop has moved POS_BYTE part or all the way
1941                  back to the first pos (last pos if reverse).
1942                  Set it once again at the last (first if reverse) char.  */
1943               pos_byte += dirlen - i- direction;
1944               if (i + direction == 0)
1945                 {
1946                   int position;
1947                   pos_byte -= direction;
1948
1949                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1950
1951                   set_search_regs (position, len_byte);
1952
1953                   if ((n -= direction) != 0)
1954                     pos_byte += dirlen; /* to resume search */
1955                   else
1956                     return ((direction > 0)
1957                             ? search_regs.end[0] : search_regs.start[0]);
1958                 }
1959               else
1960                 pos_byte += stride_for_teases;
1961             }
1962           }
1963       /* We have done one clump.  Can we continue? */
1964       if ((lim_byte - pos_byte) * direction < 0)
1965         return ((0 - n) * direction);
1966     }
1967   return BYTE_TO_CHAR (pos_byte);
1968 }
1969
1970 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1971    for the overall match just found in the current buffer.
1972    Also clear out the match data for registers 1 and up.  */
1973
1974 static void
1975 set_search_regs (beg_byte, nbytes)
1976      int beg_byte, nbytes;
1977 {
1978   int i;
1979
1980   /* Make sure we have registers in which to store
1981      the match position.  */
1982   if (search_regs.num_regs == 0)
1983     {
1984       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1985       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1986       search_regs.num_regs = 2;
1987     }
1988
1989   /* Clear out the other registers.  */
1990   for (i = 1; i < search_regs.num_regs; i++)
1991     {
1992       search_regs.start[i] = -1;
1993       search_regs.end[i] = -1;
1994     }
1995
1996   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1997   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1998   XSETBUFFER (last_thing_searched, current_buffer);
1999 }
2000 \f
2001 /* Given a string of words separated by word delimiters,
2002   compute a regexp that matches those exact words
2003   separated by arbitrary punctuation.  */
2004
2005 static Lisp_Object
2006 wordify (string)
2007      Lisp_Object string;
2008 {
2009   register unsigned char *p, *o;
2010   register int i, i_byte, len, punct_count = 0, word_count = 0;
2011   Lisp_Object val;
2012   int prev_c = 0;
2013   int adjust;
2014
2015   CHECK_STRING (string);
2016   p = SDATA (string);
2017   len = SCHARS (string);
2018
2019   for (i = 0, i_byte = 0; i < len; )
2020     {
2021       int c;
2022
2023       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2024
2025       if (SYNTAX (c) != Sword)
2026         {
2027           punct_count++;
2028           if (i > 0 && SYNTAX (prev_c) == Sword)
2029             word_count++;
2030         }
2031
2032       prev_c = c;
2033     }
2034
2035   if (SYNTAX (prev_c) == Sword)
2036     word_count++;
2037   if (!word_count)
2038     return empty_string;
2039
2040   adjust = - punct_count + 5 * (word_count - 1) + 4;
2041   if (STRING_MULTIBYTE (string))
2042     val = make_uninit_multibyte_string (len + adjust,
2043                                         SBYTES (string)
2044                                         + adjust);
2045   else
2046     val = make_uninit_string (len + adjust);
2047
2048   o = SDATA (val);
2049   *o++ = '\\';
2050   *o++ = 'b';
2051   prev_c = 0;
2052
2053   for (i = 0, i_byte = 0; i < len; )
2054     {
2055       int c;
2056       int i_byte_orig = i_byte;
2057
2058       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2059
2060       if (SYNTAX (c) == Sword)
2061         {
2062           bcopy (SDATA (string) + i_byte_orig, o,
2063                  i_byte - i_byte_orig);
2064           o += i_byte - i_byte_orig;
2065         }
2066       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2067         {
2068           *o++ = '\\';
2069           *o++ = 'W';
2070           *o++ = '\\';
2071           *o++ = 'W';
2072           *o++ = '*';
2073         }
2074
2075       prev_c = c;
2076     }
2077
2078   *o++ = '\\';
2079   *o++ = 'b';
2080
2081   return val;
2082 }
2083 \f
2084 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2085        "MSearch backward: ",
2086        doc: /* Search backward from point for STRING.
2087 Set point to the beginning of the occurrence found, and return point.
2088 An optional second argument bounds the search; it is a buffer position.
2089 The match found must not extend before that position.
2090 Optional third argument, if t, means if fail just return nil (no error).
2091  If not nil and not t, position at limit of search and return nil.
2092 Optional fourth argument is repeat count--search for successive occurrences.
2093
2094 Search case-sensitivity is determined by the value of the variable
2095 `case-fold-search', which see.
2096
2097 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2098      (string, bound, noerror, count)
2099      Lisp_Object string, bound, noerror, count;
2100 {
2101   return search_command (string, bound, noerror, count, -1, 0, 0);
2102 }
2103
2104 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2105        doc: /* Search forward from point for STRING.
2106 Set point to the end of the occurrence found, and return point.
2107 An optional second argument bounds the search; it is a buffer position.
2108 The match found must not extend after that position.  nil is equivalent
2109   to (point-max).
2110 Optional third argument, if t, means if fail just return nil (no error).
2111   If not nil and not t, move to limit of search and return nil.
2112 Optional fourth argument is repeat count--search for successive occurrences.
2113
2114 Search case-sensitivity is determined by the value of the variable
2115 `case-fold-search', which see.
2116
2117 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2118      (string, bound, noerror, count)
2119      Lisp_Object string, bound, noerror, count;
2120 {
2121   return search_command (string, bound, noerror, count, 1, 0, 0);
2122 }
2123
2124 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2125        "sWord search backward: ",
2126        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2127 Set point to the beginning of the occurrence found, and return point.
2128 An optional second argument bounds the search; it is a buffer position.
2129 The match found must not extend before that position.
2130 Optional third argument, if t, means if fail just return nil (no error).
2131   If not nil and not t, move to limit of search and return nil.
2132 Optional fourth argument is repeat count--search for successive occurrences.  */)
2133      (string, bound, noerror, count)
2134      Lisp_Object string, bound, noerror, count;
2135 {
2136   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2137 }
2138
2139 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2140        "sWord search: ",
2141        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2142 Set point to the end of the occurrence found, and return point.
2143 An optional second argument bounds the search; it is a buffer position.
2144 The match found must not extend after that position.
2145 Optional third argument, if t, means if fail just return nil (no error).
2146   If not nil and not t, move to limit of search and return nil.
2147 Optional fourth argument is repeat count--search for successive occurrences.  */)
2148      (string, bound, noerror, count)
2149      Lisp_Object string, bound, noerror, count;
2150 {
2151   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2152 }
2153
2154 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2155        "sRE search backward: ",
2156        doc: /* Search backward from point for match for regular expression REGEXP.
2157 Set point to the beginning of the match, and return point.
2158 The match found is the one starting last in the buffer
2159 and yet ending before the origin of the search.
2160 An optional second argument bounds the search; it is a buffer position.
2161 The match found must start at or after that position.
2162 Optional third argument, if t, means if fail just return nil (no error).
2163   If not nil and not t, move to limit of search and return nil.
2164 Optional fourth argument is repeat count--search for successive occurrences.
2165 See also the functions `match-beginning', `match-end', `match-string',
2166 and `replace-match'.  */)
2167      (regexp, bound, noerror, count)
2168      Lisp_Object regexp, bound, noerror, count;
2169 {
2170   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2171 }
2172
2173 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2174        "sRE search: ",
2175        doc: /* Search forward from point for regular expression REGEXP.
2176 Set point to the end of the occurrence found, and return point.
2177 An optional second argument bounds the search; it is a buffer position.
2178 The match found must not extend after that position.
2179 Optional third argument, if t, means if fail just return nil (no error).
2180   If not nil and not t, move to limit of search and return nil.
2181 Optional fourth argument is repeat count--search for successive occurrences.
2182 See also the functions `match-beginning', `match-end', `match-string',
2183 and `replace-match'.  */)
2184      (regexp, bound, noerror, count)
2185      Lisp_Object regexp, bound, noerror, count;
2186 {
2187   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2188 }
2189
2190 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2191        "sPosix search backward: ",
2192        doc: /* Search backward from point for match for regular expression REGEXP.
2193 Find the longest match in accord with Posix regular expression rules.
2194 Set point to the beginning of the match, and return point.
2195 The match found is the one starting last in the buffer
2196 and yet ending before the origin of the search.
2197 An optional second argument bounds the search; it is a buffer position.
2198 The match found must start at or after that position.
2199 Optional third argument, if t, means if fail just return nil (no error).
2200   If not nil and not t, move to limit of search and return nil.
2201 Optional fourth argument is repeat count--search for successive occurrences.
2202 See also the functions `match-beginning', `match-end', `match-string',
2203 and `replace-match'.  */)
2204      (regexp, bound, noerror, count)
2205      Lisp_Object regexp, bound, noerror, count;
2206 {
2207   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2208 }
2209
2210 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2211        "sPosix search: ",
2212        doc: /* Search forward from point for regular expression REGEXP.
2213 Find the longest match in accord with Posix regular expression rules.
2214 Set point to the end of the occurrence found, and return point.
2215 An optional second argument bounds the search; it is a buffer position.
2216 The match found must not extend after that position.
2217 Optional third argument, if t, means if fail just return nil (no error).
2218   If not nil and not t, move to limit of search and return nil.
2219 Optional fourth argument is repeat count--search for successive occurrences.
2220 See also the functions `match-beginning', `match-end', `match-string',
2221 and `replace-match'.  */)
2222      (regexp, bound, noerror, count)
2223      Lisp_Object regexp, bound, noerror, count;
2224 {
2225   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2226 }
2227 \f
2228 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2229        doc: /* Replace text matched by last search with NEWTEXT.
2230 Leave point at the end of the replacement text.
2231
2232 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2233 Otherwise maybe capitalize the whole text, or maybe just word initials,
2234 based on the replaced text.
2235 If the replaced text has only capital letters
2236 and has at least one multiletter word, convert NEWTEXT to all caps.
2237 Otherwise if all words are capitalized in the replaced text,
2238 capitalize each word in NEWTEXT.
2239
2240 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2241 Otherwise treat `\\' as special:
2242   `\\&' in NEWTEXT means substitute original matched text.
2243   `\\N' means substitute what matched the Nth `\\(...\\)'.
2244        If Nth parens didn't match, substitute nothing.
2245   `\\\\' means insert one `\\'.
2246 Case conversion does not apply to these substitutions.
2247
2248 FIXEDCASE and LITERAL are optional arguments.
2249
2250 The optional fourth argument STRING can be a string to modify.
2251 This is meaningful when the previous match was done against STRING,
2252 using `string-match'.  When used this way, `replace-match'
2253 creates and returns a new string made by copying STRING and replacing
2254 the part of STRING that was matched.
2255
2256 The optional fifth argument SUBEXP specifies a subexpression;
2257 it says to replace just that subexpression with NEWTEXT,
2258 rather than replacing the entire matched text.
2259 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2260 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2261 NEWTEXT in place of subexp N.
2262 This is useful only after a regular expression search or match,
2263 since only regular expressions have distinguished subexpressions.  */)
2264      (newtext, fixedcase, literal, string, subexp)
2265      Lisp_Object newtext, fixedcase, literal, string, subexp;
2266 {
2267   enum { nochange, all_caps, cap_initial } case_action;
2268   register int pos, pos_byte;
2269   int some_multiletter_word;
2270   int some_lowercase;
2271   int some_uppercase;
2272   int some_nonuppercase_initial;
2273   register int c, prevc;
2274   int sub;
2275   int opoint, newpoint;
2276
2277   CHECK_STRING (newtext);
2278
2279   if (! NILP (string))
2280     CHECK_STRING (string);
2281
2282   case_action = nochange;       /* We tried an initialization */
2283                                 /* but some C compilers blew it */
2284
2285   if (search_regs.num_regs <= 0)
2286     error ("`replace-match' called before any match found");
2287
2288   if (NILP (subexp))
2289     sub = 0;
2290   else
2291     {
2292       CHECK_NUMBER (subexp);
2293       sub = XINT (subexp);
2294       if (sub < 0 || sub >= search_regs.num_regs)
2295         args_out_of_range (subexp, make_number (search_regs.num_regs));
2296     }
2297
2298   if (NILP (string))
2299     {
2300       if (search_regs.start[sub] < BEGV
2301           || search_regs.start[sub] > search_regs.end[sub]
2302           || search_regs.end[sub] > ZV)
2303         args_out_of_range (make_number (search_regs.start[sub]),
2304                            make_number (search_regs.end[sub]));
2305     }
2306   else
2307     {
2308       if (search_regs.start[sub] < 0
2309           || search_regs.start[sub] > search_regs.end[sub]
2310           || search_regs.end[sub] > SCHARS (string))
2311         args_out_of_range (make_number (search_regs.start[sub]),
2312                            make_number (search_regs.end[sub]));
2313     }
2314
2315   if (NILP (fixedcase))
2316     {
2317       /* Decide how to casify by examining the matched text. */
2318       int last;
2319
2320       pos = search_regs.start[sub];
2321       last = search_regs.end[sub];
2322
2323       if (NILP (string))
2324         pos_byte = CHAR_TO_BYTE (pos);
2325       else
2326         pos_byte = string_char_to_byte (string, pos);
2327
2328       prevc = '\n';
2329       case_action = all_caps;
2330
2331       /* some_multiletter_word is set nonzero if any original word
2332          is more than one letter long. */
2333       some_multiletter_word = 0;
2334       some_lowercase = 0;
2335       some_nonuppercase_initial = 0;
2336       some_uppercase = 0;
2337
2338       while (pos < last)
2339         {
2340           if (NILP (string))
2341             {
2342               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2343               INC_BOTH (pos, pos_byte);
2344             }
2345           else
2346             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2347
2348           if (LOWERCASEP (c))
2349             {
2350               /* Cannot be all caps if any original char is lower case */
2351
2352               some_lowercase = 1;
2353               if (SYNTAX (prevc) != Sword)
2354                 some_nonuppercase_initial = 1;
2355               else
2356                 some_multiletter_word = 1;
2357             }
2358           else if (UPPERCASEP (c))
2359             {
2360               some_uppercase = 1;
2361               if (SYNTAX (prevc) != Sword)
2362                 ;
2363               else
2364                 some_multiletter_word = 1;
2365             }
2366           else
2367             {
2368               /* If the initial is a caseless word constituent,
2369                  treat that like a lowercase initial.  */
2370               if (SYNTAX (prevc) != Sword)
2371                 some_nonuppercase_initial = 1;
2372             }
2373
2374           prevc = c;
2375         }
2376
2377       /* Convert to all caps if the old text is all caps
2378          and has at least one multiletter word.  */
2379       if (! some_lowercase && some_multiletter_word)
2380         case_action = all_caps;
2381       /* Capitalize each word, if the old text has all capitalized words.  */
2382       else if (!some_nonuppercase_initial && some_multiletter_word)
2383         case_action = cap_initial;
2384       else if (!some_nonuppercase_initial && some_uppercase)
2385         /* Should x -> yz, operating on X, give Yz or YZ?
2386            We'll assume the latter.  */
2387         case_action = all_caps;
2388       else
2389         case_action = nochange;
2390     }
2391
2392   /* Do replacement in a string.  */
2393   if (!NILP (string))
2394     {
2395       Lisp_Object before, after;
2396
2397       before = Fsubstring (string, make_number (0),
2398                            make_number (search_regs.start[sub]));
2399       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2400
2401       /* Substitute parts of the match into NEWTEXT
2402          if desired.  */
2403       if (NILP (literal))
2404         {
2405           int lastpos = 0;
2406           int lastpos_byte = 0;
2407           /* We build up the substituted string in ACCUM.  */
2408           Lisp_Object accum;
2409           Lisp_Object middle;
2410           int length = SBYTES (newtext);
2411
2412           accum = Qnil;
2413
2414           for (pos_byte = 0, pos = 0; pos_byte < length;)
2415             {
2416               int substart = -1;
2417               int subend = 0;
2418               int delbackslash = 0;
2419
2420               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2421
2422               if (c == '\\')
2423                 {
2424                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2425
2426                   if (c == '&')
2427                     {
2428                       substart = search_regs.start[sub];
2429                       subend = search_regs.end[sub];
2430                     }
2431                   else if (c >= '1' && c <= '9')
2432                     {
2433                       if (search_regs.start[c - '0'] >= 0
2434                           && c <= search_regs.num_regs + '0')
2435                         {
2436                           substart = search_regs.start[c - '0'];
2437                           subend = search_regs.end[c - '0'];
2438                         }
2439                       else
2440                         {
2441                           /* If that subexp did not match,
2442                              replace \\N with nothing.  */
2443                           substart = 0;
2444                           subend = 0;
2445                         }
2446                     }
2447                   else if (c == '\\')
2448                     delbackslash = 1;
2449                   else
2450                     error ("Invalid use of `\\' in replacement text");
2451                 }
2452               if (substart >= 0)
2453                 {
2454                   if (pos - 2 != lastpos)
2455                     middle = substring_both (newtext, lastpos,
2456                                              lastpos_byte,
2457                                              pos - 2, pos_byte - 2);
2458                   else
2459                     middle = Qnil;
2460                   accum = concat3 (accum, middle,
2461                                    Fsubstring (string,
2462                                                make_number (substart),
2463                                                make_number (subend)));
2464                   lastpos = pos;
2465                   lastpos_byte = pos_byte;
2466                 }
2467               else if (delbackslash)
2468                 {
2469                   middle = substring_both (newtext, lastpos,
2470                                            lastpos_byte,
2471                                            pos - 1, pos_byte - 1);
2472
2473                   accum = concat2 (accum, middle);
2474                   lastpos = pos;
2475                   lastpos_byte = pos_byte;
2476                 }
2477             }
2478
2479           if (pos != lastpos)
2480             middle = substring_both (newtext, lastpos,
2481                                      lastpos_byte,
2482                                      pos, pos_byte);
2483           else
2484             middle = Qnil;
2485
2486           newtext = concat2 (accum, middle);
2487         }
2488
2489       /* Do case substitution in NEWTEXT if desired.  */
2490       if (case_action == all_caps)
2491         newtext = Fupcase (newtext);
2492       else if (case_action == cap_initial)
2493         newtext = Fupcase_initials (newtext);
2494
2495       return concat3 (before, newtext, after);
2496     }
2497
2498   /* Record point, then move (quietly) to the start of the match.  */
2499   if (PT >= search_regs.end[sub])
2500     opoint = PT - ZV;
2501   else if (PT > search_regs.start[sub])
2502     opoint = search_regs.end[sub] - ZV;
2503   else
2504     opoint = PT;
2505
2506   /* If we want non-literal replacement,
2507      perform substitution on the replacement string.  */
2508   if (NILP (literal))
2509     {
2510       int length = SBYTES (newtext);
2511       unsigned char *substed;
2512       int substed_alloc_size, substed_len;
2513       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2514       int str_multibyte = STRING_MULTIBYTE (newtext);
2515       Lisp_Object rev_tbl;
2516       int really_changed = 0;
2517
2518       rev_tbl = Qnil;
2519
2520       substed_alloc_size = length * 2 + 100;
2521       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2522       substed_len = 0;
2523
2524       /* Go thru NEWTEXT, producing the actual text to insert in
2525          SUBSTED while adjusting multibyteness to that of the current
2526          buffer.  */
2527
2528       for (pos_byte = 0, pos = 0; pos_byte < length;)
2529         {
2530           unsigned char str[MAX_MULTIBYTE_LENGTH];
2531           unsigned char *add_stuff = NULL;
2532           int add_len = 0;
2533           int idx = -1;
2534
2535           if (str_multibyte)
2536             {
2537               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2538               if (!buf_multibyte)
2539                 c = multibyte_char_to_unibyte (c, rev_tbl);
2540             }
2541           else
2542             {
2543               /* Note that we don't have to increment POS.  */
2544               c = SREF (newtext, pos_byte++);
2545               if (buf_multibyte)
2546                 c = unibyte_char_to_multibyte (c);
2547             }
2548
2549           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2550              or set IDX to a match index, which means put that part
2551              of the buffer text into SUBSTED.  */
2552
2553           if (c == '\\')
2554             {
2555               really_changed = 1;
2556
2557               if (str_multibyte)
2558                 {
2559                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2560                                                       pos, pos_byte);
2561                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2562                     c = multibyte_char_to_unibyte (c, rev_tbl);
2563                 }
2564               else
2565                 {
2566                   c = SREF (newtext, pos_byte++);
2567                   if (buf_multibyte)
2568                     c = unibyte_char_to_multibyte (c);
2569                 }
2570
2571               if (c == '&')
2572                 idx = sub;
2573               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2574                 {
2575                   if (search_regs.start[c - '0'] >= 1)
2576                     idx = c - '0';
2577                 }
2578               else if (c == '\\')
2579                 add_len = 1, add_stuff = "\\";
2580               else
2581                 {
2582                   xfree (substed);
2583                   error ("Invalid use of `\\' in replacement text");
2584                 }
2585             }
2586           else
2587             {
2588               add_len = CHAR_STRING (c, str);
2589               add_stuff = str;
2590             }
2591
2592           /* If we want to copy part of a previous match,
2593              set up ADD_STUFF and ADD_LEN to point to it.  */
2594           if (idx >= 0)
2595             {
2596               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2597               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2598               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2599                 move_gap (search_regs.start[idx]);
2600               add_stuff = BYTE_POS_ADDR (begbyte);
2601             }
2602
2603           /* Now the stuff we want to add to SUBSTED
2604              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2605
2606           /* Make sure SUBSTED is big enough.  */
2607           if (substed_len + add_len >= substed_alloc_size)
2608             {
2609               substed_alloc_size = substed_len + add_len + 500;
2610               substed = (unsigned char *) xrealloc (substed,
2611                                                     substed_alloc_size + 1);
2612             }
2613
2614           /* Now add to the end of SUBSTED.  */
2615           if (add_stuff)
2616             {
2617               bcopy (add_stuff, substed + substed_len, add_len);
2618               substed_len += add_len;
2619             }
2620         }
2621
2622       if (really_changed)
2623         {
2624           if (buf_multibyte)
2625             {
2626               int nchars = multibyte_chars_in_text (substed, substed_len);
2627
2628               newtext = make_multibyte_string (substed, nchars, substed_len);
2629             }
2630           else
2631             newtext = make_unibyte_string (substed, substed_len);
2632         }
2633       xfree (substed);
2634     }
2635
2636   /* Replace the old text with the new in the cleanest possible way.  */
2637   replace_range (search_regs.start[sub], search_regs.end[sub],
2638                  newtext, 1, 0, 1);
2639   newpoint = search_regs.start[sub] + SCHARS (newtext);
2640
2641   if (case_action == all_caps)
2642     Fupcase_region (make_number (search_regs.start[sub]),
2643                     make_number (newpoint));
2644   else if (case_action == cap_initial)
2645     Fupcase_initials_region (make_number (search_regs.start[sub]),
2646                              make_number (newpoint));
2647
2648   /* Adjust search data for this change.  */
2649   {
2650     int oldend = search_regs.end[sub];
2651     int oldstart = search_regs.start[sub];
2652     int change = newpoint - search_regs.end[sub];
2653     int i;
2654
2655     for (i = 0; i < search_regs.num_regs; i++)
2656       {
2657         if (search_regs.start[i] >= oldend)
2658           search_regs.start[i] += change;
2659         else if (search_regs.start[i] > oldstart)
2660           search_regs.start[i] = oldstart;
2661         if (search_regs.end[i] >= oldend)
2662           search_regs.end[i] += change;
2663         else if (search_regs.end[i] > oldstart)
2664           search_regs.end[i] = oldstart;
2665       }
2666   }
2667
2668   /* Put point back where it was in the text.  */
2669   if (opoint <= 0)
2670     TEMP_SET_PT (opoint + ZV);
2671   else
2672     TEMP_SET_PT (opoint);
2673
2674   /* Now move point "officially" to the start of the inserted replacement.  */
2675   move_if_not_intangible (newpoint);
2676
2677   return Qnil;
2678 }
2679 \f
2680 static Lisp_Object
2681 match_limit (num, beginningp)
2682      Lisp_Object num;
2683      int beginningp;
2684 {
2685   register int n;
2686
2687   CHECK_NUMBER (num);
2688   n = XINT (num);
2689   if (n < 0)
2690     args_out_of_range (num, make_number (0));
2691   if (search_regs.num_regs <= 0)
2692     error ("No match data, because no search succeeded");
2693   if (n >= search_regs.num_regs
2694       || search_regs.start[n] < 0)
2695     return Qnil;
2696   return (make_number ((beginningp) ? search_regs.start[n]
2697                                     : search_regs.end[n]));
2698 }
2699
2700 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2701        doc: /* Return position of start of text matched by last search.
2702 SUBEXP, a number, specifies which parenthesized expression in the last
2703   regexp.
2704 Value is nil if SUBEXPth pair didn't match, or there were less than
2705   SUBEXP pairs.
2706 Zero means the entire text matched by the whole regexp or whole string.  */)
2707      (subexp)
2708      Lisp_Object subexp;
2709 {
2710   return match_limit (subexp, 1);
2711 }
2712
2713 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2714        doc: /* Return position of end of text matched by last search.
2715 SUBEXP, a number, specifies which parenthesized expression in the last
2716   regexp.
2717 Value is nil if SUBEXPth pair didn't match, or there were less than
2718   SUBEXP pairs.
2719 Zero means the entire text matched by the whole regexp or whole string.  */)
2720      (subexp)
2721      Lisp_Object subexp;
2722 {
2723   return match_limit (subexp, 0);
2724 }
2725
2726 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2727        doc: /* Return a list containing all info on what the last search matched.
2728 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2729 All the elements are markers or nil (nil if the Nth pair didn't match)
2730 if the last match was on a buffer; integers or nil if a string was matched.
2731 Use `store-match-data' to reinstate the data in this list.
2732
2733 If INTEGERS (the optional first argument) is non-nil, always use
2734 integers \(rather than markers) to represent buffer positions.  In
2735 this case, and if the last match was in a buffer, the buffer will get
2736 stored as one additional element at the end of the list.
2737
2738 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2739 enough to hold all the values, and if INTEGERS is non-nil, no consing
2740 is done.
2741
2742 If optional third arg RESEAT is non-nil, any previous markers on the
2743 REUSE list will be modified to point to nowhere.
2744
2745 Return value is undefined if the last search failed.  */)
2746   (integers, reuse, reseat)
2747      Lisp_Object integers, reuse, reseat;
2748 {
2749   Lisp_Object tail, prev;
2750   Lisp_Object *data;
2751   int i, len;
2752
2753   if (!NILP (reseat))
2754     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2755       if (MARKERP (XCAR (tail)))
2756         {
2757           unchain_marker (XMARKER (XCAR (tail)));
2758           XSETCAR (tail, Qnil);
2759         }
2760
2761   if (NILP (last_thing_searched))
2762     return Qnil;
2763
2764   prev = Qnil;
2765
2766   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2767                                  * sizeof (Lisp_Object));
2768
2769   len = 0;
2770   for (i = 0; i < search_regs.num_regs; i++)
2771     {
2772       int start = search_regs.start[i];
2773       if (start >= 0)
2774         {
2775           if (EQ (last_thing_searched, Qt)
2776               || ! NILP (integers))
2777             {
2778               XSETFASTINT (data[2 * i], start);
2779               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2780             }
2781           else if (BUFFERP (last_thing_searched))
2782             {
2783               data[2 * i] = Fmake_marker ();
2784               Fset_marker (data[2 * i],
2785                            make_number (start),
2786                            last_thing_searched);
2787               data[2 * i + 1] = Fmake_marker ();
2788               Fset_marker (data[2 * i + 1],
2789                            make_number (search_regs.end[i]),
2790                            last_thing_searched);
2791             }
2792           else
2793             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2794             abort ();
2795
2796           len = 2 * i + 2;
2797         }
2798       else
2799         data[2 * i] = data[2 * i + 1] = Qnil;
2800     }
2801
2802   if (BUFFERP (last_thing_searched) && !NILP (integers))
2803     {
2804       data[len] = last_thing_searched;
2805       len++;
2806     }
2807
2808   /* If REUSE is not usable, cons up the values and return them.  */
2809   if (! CONSP (reuse))
2810     return Flist (len, data);
2811
2812   /* If REUSE is a list, store as many value elements as will fit
2813      into the elements of REUSE.  */
2814   for (i = 0, tail = reuse; CONSP (tail);
2815        i++, tail = XCDR (tail))
2816     {
2817       if (i < len)
2818         XSETCAR (tail, data[i]);
2819       else
2820         XSETCAR (tail, Qnil);
2821       prev = tail;
2822     }
2823
2824   /* If we couldn't fit all value elements into REUSE,
2825      cons up the rest of them and add them to the end of REUSE.  */
2826   if (i < len)
2827     XSETCDR (prev, Flist (len - i, data + i));
2828
2829   return reuse;
2830 }
2831
2832 /* Internal usage only:
2833    If RESEAT is `evaporate', put the markers back on the free list
2834    immediately.  No other references to the markers must exist in this case,
2835    so it is used only internally on the unwind stack and save-match-data from
2836    Lisp.  */
2837
2838 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2839        doc: /* Set internal data on last search match from elements of LIST.
2840 LIST should have been created by calling `match-data' previously.
2841
2842 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2843     (list, reseat)
2844      register Lisp_Object list, reseat;
2845 {
2846   register int i;
2847   register Lisp_Object marker;
2848
2849   if (running_asynch_code)
2850     save_search_regs ();
2851
2852   if (!CONSP (list) && !NILP (list))
2853     list = wrong_type_argument (Qconsp, list);
2854
2855   /* Unless we find a marker with a buffer or an explicit buffer
2856      in LIST, assume that this match data came from a string.  */
2857   last_thing_searched = Qt;
2858
2859   /* Allocate registers if they don't already exist.  */
2860   {
2861     int length = XFASTINT (Flength (list)) / 2;
2862
2863     if (length > search_regs.num_regs)
2864       {
2865         if (search_regs.num_regs == 0)
2866           {
2867             search_regs.start
2868               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2869             search_regs.end
2870               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2871           }
2872         else
2873           {
2874             search_regs.start
2875               = (regoff_t *) xrealloc (search_regs.start,
2876                                        length * sizeof (regoff_t));
2877             search_regs.end
2878               = (regoff_t *) xrealloc (search_regs.end,
2879                                        length * sizeof (regoff_t));
2880           }
2881
2882         for (i = search_regs.num_regs; i < length; i++)
2883           search_regs.start[i] = -1;
2884
2885         search_regs.num_regs = length;
2886       }
2887
2888     for (i = 0; CONSP (list); i++)
2889       {
2890         marker = XCAR (list);
2891         if (BUFFERP (marker))
2892           {
2893             last_thing_searched = marker;
2894             break;
2895           }
2896         if (i >= length)
2897           break;
2898         if (NILP (marker))
2899           {
2900             search_regs.start[i] = -1;
2901             list = XCDR (list);
2902           }
2903         else
2904           {
2905             int from;
2906             Lisp_Object m;
2907
2908             m = marker;
2909             if (MARKERP (marker))
2910               {
2911                 if (XMARKER (marker)->buffer == 0)
2912                   XSETFASTINT (marker, 0);
2913                 else
2914                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2915               }
2916
2917             CHECK_NUMBER_COERCE_MARKER (marker);
2918             from = XINT (marker);
2919
2920             if (!NILP (reseat) && MARKERP (m))
2921               {
2922                 if (EQ (reseat, Qevaporate))
2923                   free_marker (m);
2924                 else
2925                   unchain_marker (XMARKER (m));
2926                 XSETCAR (list, Qnil);
2927               }
2928
2929             if ((list = XCDR (list), !CONSP (list)))
2930               break;
2931
2932             m = marker = XCAR (list);
2933
2934             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2935               XSETFASTINT (marker, 0);
2936
2937             CHECK_NUMBER_COERCE_MARKER (marker);
2938             search_regs.start[i] = from;
2939             search_regs.end[i] = XINT (marker);
2940
2941             if (!NILP (reseat) && MARKERP (m))
2942               {
2943                 if (EQ (reseat, Qevaporate))
2944                   free_marker (m);
2945                 else
2946                   unchain_marker (XMARKER (m));
2947                 XSETCAR (list, Qnil);
2948               }
2949           }
2950         list = XCDR (list);
2951       }
2952
2953     for (; i < search_regs.num_regs; i++)
2954       search_regs.start[i] = -1;
2955   }
2956
2957   return Qnil;
2958 }
2959
2960 /* If non-zero the match data have been saved in saved_search_regs
2961    during the execution of a sentinel or filter. */
2962 static int search_regs_saved;
2963 static struct re_registers saved_search_regs;
2964 static Lisp_Object saved_last_thing_searched;
2965
2966 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2967    if asynchronous code (filter or sentinel) is running. */
2968 static void
2969 save_search_regs ()
2970 {
2971   if (!search_regs_saved)
2972     {
2973       saved_search_regs.num_regs = search_regs.num_regs;
2974       saved_search_regs.start = search_regs.start;
2975       saved_search_regs.end = search_regs.end;
2976       saved_last_thing_searched = last_thing_searched;
2977       last_thing_searched = Qnil;
2978       search_regs.num_regs = 0;
2979       search_regs.start = 0;
2980       search_regs.end = 0;
2981
2982       search_regs_saved = 1;
2983     }
2984 }
2985
2986 /* Called upon exit from filters and sentinels. */
2987 void
2988 restore_search_regs ()
2989 {
2990   if (search_regs_saved)
2991     {
2992       if (search_regs.num_regs > 0)
2993         {
2994           xfree (search_regs.start);
2995           xfree (search_regs.end);
2996         }
2997       search_regs.num_regs = saved_search_regs.num_regs;
2998       search_regs.start = saved_search_regs.start;
2999       search_regs.end = saved_search_regs.end;
3000       last_thing_searched = saved_last_thing_searched;
3001       saved_last_thing_searched = Qnil;
3002       search_regs_saved = 0;
3003     }
3004 }
3005
3006 static Lisp_Object
3007 unwind_set_match_data (list)
3008      Lisp_Object list;
3009 {
3010   /* It is safe to free (evaporate) the markers immediately.  */
3011   return Fset_match_data (list, Qevaporate);
3012 }
3013
3014 /* Called to unwind protect the match data.  */
3015 void
3016 record_unwind_save_match_data ()
3017 {
3018   record_unwind_protect (unwind_set_match_data,
3019                          Fmatch_data (Qnil, Qnil, Qnil));
3020 }
3021
3022 /* Quote a string to inactivate reg-expr chars */
3023
3024 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3025        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3026      (string)
3027      Lisp_Object string;
3028 {
3029   register unsigned char *in, *out, *end;
3030   register unsigned char *temp;
3031   int backslashes_added = 0;
3032
3033   CHECK_STRING (string);
3034
3035   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3036
3037   /* Now copy the data into the new string, inserting escapes. */
3038
3039   in = SDATA (string);
3040   end = in + SBYTES (string);
3041   out = temp;
3042
3043   for (; in != end; in++)
3044     {
3045       if (*in == '[' || *in == ']'
3046           || *in == '*' || *in == '.' || *in == '\\'
3047           || *in == '?' || *in == '+'
3048           || *in == '^' || *in == '$')
3049         *out++ = '\\', backslashes_added++;
3050       *out++ = *in;
3051     }
3052
3053   return make_specified_string (temp,
3054                                 SCHARS (string) + backslashes_added,
3055                                 out - temp,
3056                                 STRING_MULTIBYTE (string));
3057 }
3058 \f
3059 void
3060 syms_of_search ()
3061 {
3062   register int i;
3063
3064   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3065     {
3066       searchbufs[i].buf.allocated = 100;
3067       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3068       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3069       searchbufs[i].regexp = Qnil;
3070       searchbufs[i].whitespace_regexp = Qnil;
3071       staticpro (&searchbufs[i].regexp);
3072       staticpro (&searchbufs[i].whitespace_regexp);
3073       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3074     }
3075   searchbuf_head = &searchbufs[0];
3076
3077   Qsearch_failed = intern ("search-failed");
3078   staticpro (&Qsearch_failed);
3079   Qinvalid_regexp = intern ("invalid-regexp");
3080   staticpro (&Qinvalid_regexp);
3081
3082   Fput (Qsearch_failed, Qerror_conditions,
3083         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3084   Fput (Qsearch_failed, Qerror_message,
3085         build_string ("Search failed"));
3086
3087   Fput (Qinvalid_regexp, Qerror_conditions,
3088         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3089   Fput (Qinvalid_regexp, Qerror_message,
3090         build_string ("Invalid regexp"));
3091
3092   last_thing_searched = Qnil;
3093   staticpro (&last_thing_searched);
3094
3095   saved_last_thing_searched = Qnil;
3096   staticpro (&saved_last_thing_searched);
3097
3098   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3099       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3100 Some commands use this for user-specified regexps.
3101 Spaces that occur inside character classes or repetition operators
3102 or other such regexp constructs are not replaced with this.
3103 A value of nil (which is the normal value) means treat spaces literally.  */);
3104   Vsearch_spaces_regexp = Qnil;
3105
3106   defsubr (&Slooking_at);
3107   defsubr (&Sposix_looking_at);
3108   defsubr (&Sstring_match);
3109   defsubr (&Sposix_string_match);
3110   defsubr (&Ssearch_forward);
3111   defsubr (&Ssearch_backward);
3112   defsubr (&Sword_search_forward);
3113   defsubr (&Sword_search_backward);
3114   defsubr (&Sre_search_forward);
3115   defsubr (&Sre_search_backward);
3116   defsubr (&Sposix_search_forward);
3117   defsubr (&Sposix_search_backward);
3118   defsubr (&Sreplace_match);
3119   defsubr (&Smatch_beginning);
3120   defsubr (&Smatch_end);
3121   defsubr (&Smatch_data);
3122   defsubr (&Sset_match_data);
3123   defsubr (&Sregexp_quote);
3124 }
3125
3126 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3127    (do not change this comment) */