src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999, 2004
   3              Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   struct re_pattern_buffer buf;
  46   char fastmap[0400];
  47   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  48   char posix;
  49 };
  50
  51 /* The instances of that struct.  */
  52 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  53
  54 /* The head of the linked list; points to the most recently used buffer.  */
  55 struct regexp_cache *searchbuf_head;
  56
  57
  58 /* Every call to re_match, etc., must pass &search_regs as the regs
  59    argument unless you can show it is unnecessary (i.e., if re_match
  60    is certainly going to be called again before region-around-match
  61    can be called).
  62
  63    Since the registers are now dynamically allocated, we need to make
  64    sure not to refer to the Nth register before checking that it has
  65    been allocated by checking search_regs.num_regs.
  66
  67    The regex code keeps track of whether it has allocated the search
  68    buffer using bits in the re_pattern_buffer.  This means that whenever
  69    you compile a new pattern, it completely forgets whether it has
  70    allocated any registers, and will allocate new registers the next
  71    time you call a searching or matching function.  Therefore, we need
  72    to call re_set_registers after compiling a new pattern or after
  73    setting the match registers, so that the regex functions will be
  74    able to free or re-allocate it properly.  */
  75 static struct re_registers search_regs;
  76
  77 /* The buffer in which the last search was performed, or
  78    Qt if the last search was done in a string;
  79    Qnil if no searching has been done yet.  */
  80 static Lisp_Object last_thing_searched;
  81
  82 /* error condition signaled when regexp compile_pattern fails */
  83
  84 Lisp_Object Qinvalid_regexp;
  85
  86 Lisp_Object Vsearch_spaces_regexp;
  87
  88 static void set_search_regs ();
  89 static void save_search_regs ();
  90 static int simple_search ();
  91 static int boyer_moore ();
  92 static int search_buffer ();
  93
  94 static void
  95 matcher_overflow ()
  96 {
  97   error ("Stack overflow in regexp matcher");
  98 }
  99
 100 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 101    PATTERN is the pattern to compile.
 102    CP is the place to put the result.
 103    TRANSLATE is a translation table for ignoring case, or nil for none.
 104    REGP is the structure that says where to store the "register"
 105    values that will result from matching this pattern.
 106    If it is 0, we should compile the pattern not to record any
 107    subexpression bounds.
 108    POSIX is nonzero if we want full backtracking (POSIX style)
 109    for this pattern.  0 means backtrack only enough to get a valid match.
 110    MULTIBYTE is nonzero if we want to handle multibyte characters in
 111    PATTERN.  0 means all multibyte characters are recognized just as
 112    sequences of binary data.
 113
 114    The behavior also depends on Vsearch_spaces_regexp.  */
 115
 116 static void
 117 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 118      struct regexp_cache *cp;
 119      Lisp_Object pattern;
 120      Lisp_Object translate;
 121      struct re_registers *regp;
 122      int posix;
 123      int multibyte;
 124 {
 125   unsigned char *raw_pattern;
 126   int raw_pattern_size;
 127   char *val;
 128   reg_syntax_t old;
 129
 130   /* MULTIBYTE says whether the text to be searched is multibyte.
 131      We must convert PATTERN to match that, or we will not really
 132      find things right.  */
 133
 134   if (multibyte == STRING_MULTIBYTE (pattern))
 135     {
 136       raw_pattern = (unsigned char *) SDATA (pattern);
 137       raw_pattern_size = SBYTES (pattern);
 138     }
 139   else if (multibyte)
 140     {
 141       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 142                                                   SCHARS (pattern));
 143       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 144       copy_text (SDATA (pattern), raw_pattern,
 145                  SCHARS (pattern), 0, 1);
 146     }
 147   else
 148     {
 149       /* Converting multibyte to single-byte.
 150
 151          ??? Perhaps this conversion should be done in a special way
 152          by subtracting nonascii-insert-offset from each non-ASCII char,
 153          so that only the multibyte chars which really correspond to
 154          the chosen single-byte character set can possibly match.  */
 155       raw_pattern_size = SCHARS (pattern);
 156       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 157       copy_text (SDATA (pattern), raw_pattern,
 158                  SBYTES (pattern), 1, 0);
 159     }
 160
 161   cp->regexp = Qnil;
 162   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 163   cp->posix = posix;
 164   cp->buf.multibyte = multibyte;
 165   cp->whitespace_regexp = Vsearch_spaces_regexp;
 166   BLOCK_INPUT;
 167   old = re_set_syntax (RE_SYNTAX_EMACS
 168                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 169
 170   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 171                             : SDATA (Vsearch_spaces_regexp));
 172
 173   val = (char *) re_compile_pattern ((char *)raw_pattern,
 174                                      raw_pattern_size, &cp->buf);
 175
 176   re_set_whitespace_regexp (NULL);
 177
 178   re_set_syntax (old);
 179   UNBLOCK_INPUT;
 180   if (val)
 181     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 182
 183   cp->regexp = Fcopy_sequence (pattern);
 184 }
 185
 186 /* Shrink each compiled regexp buffer in the cache
 187    to the size actually used right now.
 188    This is called from garbage collection.  */
 189
 190 void
 191 shrink_regexp_cache ()
 192 {
 193   struct regexp_cache *cp;
 194
 195   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 196     {
 197       cp->buf.allocated = cp->buf.used;
 198       cp->buf.buffer
 199         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 200     }
 201 }
 202
 203 /* Compile a regexp if necessary, but first check to see if there's one in
 204    the cache.
 205    PATTERN is the pattern to compile.
 206    TRANSLATE is a translation table for ignoring case, or nil for none.
 207    REGP is the structure that says where to store the "register"
 208    values that will result from matching this pattern.
 209    If it is 0, we should compile the pattern not to record any
 210    subexpression bounds.
 211    POSIX is nonzero if we want full backtracking (POSIX style)
 212    for this pattern.  0 means backtrack only enough to get a valid match.  */
 213
 214 struct re_pattern_buffer *
 215 compile_pattern (pattern, regp, translate, posix, multibyte)
 216      Lisp_Object pattern;
 217      struct re_registers *regp;
 218      Lisp_Object translate;
 219      int posix, multibyte;
 220 {
 221   struct regexp_cache *cp, **cpp;
 222
 223   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 224     {
 225       cp = *cpp;
 226       /* Entries are initialized to nil, and may be set to nil by
 227          compile_pattern_1 if the pattern isn't valid.  Don't apply
 228          string accessors in those cases.  However, compile_pattern_1
 229          is only applied to the cache entry we pick here to reuse.  So
 230          nil should never appear before a non-nil entry.  */
 231       if (NILP (cp->regexp))
 232         goto compile_it;
 233       if (SCHARS (cp->regexp) == SCHARS (pattern)
 234           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 235           && !NILP (Fstring_equal (cp->regexp, pattern))
 236           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 237           && cp->posix == posix
 238           && cp->buf.multibyte == multibyte
 239           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 240         break;
 241
 242       /* If we're at the end of the cache, compile into the nil cell
 243          we found, or the last (least recently used) cell with a
 244          string value.  */
 245       if (cp->next == 0)
 246         {
 247         compile_it:
 248           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 249           break;
 250         }
 251     }
 252
 253   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 254      either because we found it in the cache or because we just compiled it.
 255      Move it to the front of the queue to mark it as most recently used.  */
 256   *cpp = cp->next;
 257   cp->next = searchbuf_head;
 258   searchbuf_head = cp;
 259
 260   /* Advise the searching functions about the space we have allocated
 261      for register data.  */
 262   if (regp)
 263     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 264
 265   return &cp->buf;
 266 }
 267
 268 /* Error condition used for failing searches */
 269 Lisp_Object Qsearch_failed;
 270
 271 Lisp_Object
 272 signal_failure (arg)
 273      Lisp_Object arg;
 274 {
 275   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 276   return Qnil;
 277 }
 278 \f
 279 static Lisp_Object
 280 looking_at_1 (string, posix)
 281      Lisp_Object string;
 282      int posix;
 283 {
 284   Lisp_Object val;
 285   unsigned char *p1, *p2;
 286   int s1, s2;
 287   register int i;
 288   struct re_pattern_buffer *bufp;
 289
 290   if (running_asynch_code)
 291     save_search_regs ();
 292
 293   CHECK_STRING (string);
 294   bufp = compile_pattern (string, &search_regs,
 295                           (!NILP (current_buffer->case_fold_search)
 296                            ? current_buffer->case_canon_table : Qnil),
 297                           posix,
 298                           !NILP (current_buffer->enable_multibyte_characters));
 299
 300   immediate_quit = 1;
 301   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 302
 303   /* Get pointers and sizes of the two strings
 304      that make up the visible portion of the buffer. */
 305
 306   p1 = BEGV_ADDR;
 307   s1 = GPT_BYTE - BEGV_BYTE;
 308   p2 = GAP_END_ADDR;
 309   s2 = ZV_BYTE - GPT_BYTE;
 310   if (s1 < 0)
 311     {
 312       p2 = p1;
 313       s2 = ZV_BYTE - BEGV_BYTE;
 314       s1 = 0;
 315     }
 316   if (s2 < 0)
 317     {
 318       s1 = ZV_BYTE - BEGV_BYTE;
 319       s2 = 0;
 320     }
 321
 322   re_match_object = Qnil;
 323
 324   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 325                   PT_BYTE - BEGV_BYTE, &search_regs,
 326                   ZV_BYTE - BEGV_BYTE);
 327   immediate_quit = 0;
 328
 329   if (i == -2)
 330     matcher_overflow ();
 331
 332   val = (0 <= i ? Qt : Qnil);
 333   if (i >= 0)
 334     for (i = 0; i < search_regs.num_regs; i++)
 335       if (search_regs.start[i] >= 0)
 336         {
 337           search_regs.start[i]
 338             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 339           search_regs.end[i]
 340             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 341         }
 342   XSETBUFFER (last_thing_searched, current_buffer);
 343   return val;
 344 }
 345
 346 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 347        doc: /* Return t if text after point matches regular expression REGEXP.
 348 This function modifies the match data that `match-beginning',
 349 `match-end' and `match-data' access; save and restore the match
 350 data if you want to preserve them.  */)
 351      (regexp)
 352      Lisp_Object regexp;
 353 {
 354   return looking_at_1 (regexp, 0);
 355 }
 356
 357 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 358        doc: /* Return t if text after point matches regular expression REGEXP.
 359 Find the longest match, in accord with Posix regular expression rules.
 360 This function modifies the match data that `match-beginning',
 361 `match-end' and `match-data' access; save and restore the match
 362 data if you want to preserve them.  */)
 363      (regexp)
 364      Lisp_Object regexp;
 365 {
 366   return looking_at_1 (regexp, 1);
 367 }
 368 \f
 369 static Lisp_Object
 370 string_match_1 (regexp, string, start, posix)
 371      Lisp_Object regexp, string, start;
 372      int posix;
 373 {
 374   int val;
 375   struct re_pattern_buffer *bufp;
 376   int pos, pos_byte;
 377   int i;
 378
 379   if (running_asynch_code)
 380     save_search_regs ();
 381
 382   CHECK_STRING (regexp);
 383   CHECK_STRING (string);
 384
 385   if (NILP (start))
 386     pos = 0, pos_byte = 0;
 387   else
 388     {
 389       int len = SCHARS (string);
 390
 391       CHECK_NUMBER (start);
 392       pos = XINT (start);
 393       if (pos < 0 && -pos <= len)
 394         pos = len + pos;
 395       else if (0 > pos || pos > len)
 396         args_out_of_range (string, start);
 397       pos_byte = string_char_to_byte (string, pos);
 398     }
 399
 400   bufp = compile_pattern (regexp, &search_regs,
 401                           (!NILP (current_buffer->case_fold_search)
 402                            ? current_buffer->case_canon_table : Qnil),
 403                           posix,
 404                           STRING_MULTIBYTE (string));
 405   immediate_quit = 1;
 406   re_match_object = string;
 407
 408   val = re_search (bufp, (char *) SDATA (string),
 409                    SBYTES (string), pos_byte,
 410                    SBYTES (string) - pos_byte,
 411                    &search_regs);
 412   immediate_quit = 0;
 413   last_thing_searched = Qt;
 414   if (val == -2)
 415     matcher_overflow ();
 416   if (val < 0) return Qnil;
 417
 418   for (i = 0; i < search_regs.num_regs; i++)
 419     if (search_regs.start[i] >= 0)
 420       {
 421         search_regs.start[i]
 422           = string_byte_to_char (string, search_regs.start[i]);
 423         search_regs.end[i]
 424           = string_byte_to_char (string, search_regs.end[i]);
 425       }
 426
 427   return make_number (string_byte_to_char (string, val));
 428 }
 429
 430 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 431        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 432 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 433 If third arg START is non-nil, start search at that index in STRING.
 434 For index of first char beyond the match, do (match-end 0).
 435 `match-end' and `match-beginning' also give indices of substrings
 436 matched by parenthesis constructs in the pattern.
 437
 438 You can use the function `match-string' to extract the substrings
 439 matched by the parenthesis constructions in REGEXP. */)
 440      (regexp, string, start)
 441      Lisp_Object regexp, string, start;
 442 {
 443   return string_match_1 (regexp, string, start, 0);
 444 }
 445
 446 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 447        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 448 Find the longest match, in accord with Posix regular expression rules.
 449 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 450 If third arg START is non-nil, start search at that index in STRING.
 451 For index of first char beyond the match, do (match-end 0).
 452 `match-end' and `match-beginning' also give indices of substrings
 453 matched by parenthesis constructs in the pattern.  */)
 454      (regexp, string, start)
 455      Lisp_Object regexp, string, start;
 456 {
 457   return string_match_1 (regexp, string, start, 1);
 458 }
 459
 460 /* Match REGEXP against STRING, searching all of STRING,
 461    and return the index of the match, or negative on failure.
 462    This does not clobber the match data.  */
 463
 464 int
 465 fast_string_match (regexp, string)
 466      Lisp_Object regexp, string;
 467 {
 468   int val;
 469   struct re_pattern_buffer *bufp;
 470
 471   bufp = compile_pattern (regexp, 0, Qnil,
 472                           0, STRING_MULTIBYTE (string));
 473   immediate_quit = 1;
 474   re_match_object = string;
 475
 476   val = re_search (bufp, (char *) SDATA (string),
 477                    SBYTES (string), 0,
 478                    SBYTES (string), 0);
 479   immediate_quit = 0;
 480   return val;
 481 }
 482
 483 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 484    and return the index of the match, or negative on failure.
 485    This does not clobber the match data.
 486    We assume that STRING contains single-byte characters.  */
 487
 488 extern Lisp_Object Vascii_downcase_table;
 489
 490 int
 491 fast_c_string_match_ignore_case (regexp, string)
 492      Lisp_Object regexp;
 493      const char *string;
 494 {
 495   int val;
 496   struct re_pattern_buffer *bufp;
 497   int len = strlen (string);
 498
 499   regexp = string_make_unibyte (regexp);
 500   re_match_object = Qt;
 501   bufp = compile_pattern (regexp, 0,
 502                           Vascii_canon_table, 0,
 503                           0);
 504   immediate_quit = 1;
 505   val = re_search (bufp, string, len, 0, len, 0);
 506   immediate_quit = 0;
 507   return val;
 508 }
 509
 510 /* Like fast_string_match but ignore case.  */
 511
 512 int
 513 fast_string_match_ignore_case (regexp, string)
 514      Lisp_Object regexp, string;
 515 {
 516   int val;
 517   struct re_pattern_buffer *bufp;
 518
 519   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 520                           0, STRING_MULTIBYTE (string));
 521   immediate_quit = 1;
 522   re_match_object = string;
 523
 524   val = re_search (bufp, (char *) SDATA (string),
 525                    SBYTES (string), 0,
 526                    SBYTES (string), 0);
 527   immediate_quit = 0;
 528   return val;
 529 }
 530 \f
 531 /* The newline cache: remembering which sections of text have no newlines.  */
 532
 533 /* If the user has requested newline caching, make sure it's on.
 534    Otherwise, make sure it's off.
 535    This is our cheezy way of associating an action with the change of
 536    state of a buffer-local variable.  */
 537 static void
 538 newline_cache_on_off (buf)
 539      struct buffer *buf;
 540 {
 541   if (NILP (buf->cache_long_line_scans))
 542     {
 543       /* It should be off.  */
 544       if (buf->newline_cache)
 545         {
 546           free_region_cache (buf->newline_cache);
 547           buf->newline_cache = 0;
 548         }
 549     }
 550   else
 551     {
 552       /* It should be on.  */
 553       if (buf->newline_cache == 0)
 554         buf->newline_cache = new_region_cache ();
 555     }
 556 }
 557
 558 \f
 559 /* Search for COUNT instances of the character TARGET between START and END.
 560
 561    If COUNT is positive, search forwards; END must be >= START.
 562    If COUNT is negative, search backwards for the -COUNTth instance;
 563       END must be <= START.
 564    If COUNT is zero, do anything you please; run rogue, for all I care.
 565
 566    If END is zero, use BEGV or ZV instead, as appropriate for the
 567    direction indicated by COUNT.
 568
 569    If we find COUNT instances, set *SHORTAGE to zero, and return the
 570    position past the COUNTth match.  Note that for reverse motion
 571    this is not the same as the usual convention for Emacs motion commands.
 572
 573    If we don't find COUNT instances before reaching END, set *SHORTAGE
 574    to the number of TARGETs left unfound, and return END.
 575
 576    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 577    except when inside redisplay.  */
 578
 579 int
 580 scan_buffer (target, start, end, count, shortage, allow_quit)
 581      register int target;
 582      int start, end;
 583      int count;
 584      int *shortage;
 585      int allow_quit;
 586 {
 587   struct region_cache *newline_cache;
 588   int direction;
 589
 590   if (count > 0)
 591     {
 592       direction = 1;
 593       if (! end) end = ZV;
 594     }
 595   else
 596     {
 597       direction = -1;
 598       if (! end) end = BEGV;
 599     }
 600
 601   newline_cache_on_off (current_buffer);
 602   newline_cache = current_buffer->newline_cache;
 603
 604   if (shortage != 0)
 605     *shortage = 0;
 606
 607   immediate_quit = allow_quit;
 608
 609   if (count > 0)
 610     while (start != end)
 611       {
 612         /* Our innermost scanning loop is very simple; it doesn't know
 613            about gaps, buffer ends, or the newline cache.  ceiling is
 614            the position of the last character before the next such
 615            obstacle --- the last character the dumb search loop should
 616            examine.  */
 617         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 618         int start_byte = CHAR_TO_BYTE (start);
 619         int tem;
 620
 621         /* If we're looking for a newline, consult the newline cache
 622            to see where we can avoid some scanning.  */
 623         if (target == '\n' && newline_cache)
 624           {
 625             int next_change;
 626             immediate_quit = 0;
 627             while (region_cache_forward
 628                    (current_buffer, newline_cache, start_byte, &next_change))
 629               start_byte = next_change;
 630             immediate_quit = allow_quit;
 631
 632             /* START should never be after END.  */
 633             if (start_byte > ceiling_byte)
 634               start_byte = ceiling_byte;
 635
 636             /* Now the text after start is an unknown region, and
 637                next_change is the position of the next known region. */
 638             ceiling_byte = min (next_change - 1, ceiling_byte);
 639           }
 640
 641         /* The dumb loop can only scan text stored in contiguous
 642            bytes. BUFFER_CEILING_OF returns the last character
 643            position that is contiguous, so the ceiling is the
 644            position after that.  */
 645         tem = BUFFER_CEILING_OF (start_byte);
 646         ceiling_byte = min (tem, ceiling_byte);
 647
 648         {
 649           /* The termination address of the dumb loop.  */
 650           register unsigned char *ceiling_addr
 651             = BYTE_POS_ADDR (ceiling_byte) + 1;
 652           register unsigned char *cursor
 653             = BYTE_POS_ADDR (start_byte);
 654           unsigned char *base = cursor;
 655
 656           while (cursor < ceiling_addr)
 657             {
 658               unsigned char *scan_start = cursor;
 659
 660               /* The dumb loop.  */
 661               while (*cursor != target && ++cursor < ceiling_addr)
 662                 ;
 663
 664               /* If we're looking for newlines, cache the fact that
 665                  the region from start to cursor is free of them. */
 666               if (target == '\n' && newline_cache)
 667                 know_region_cache (current_buffer, newline_cache,
 668                                    start_byte + scan_start - base,
 669                                    start_byte + cursor - base);
 670
 671               /* Did we find the target character?  */
 672               if (cursor < ceiling_addr)
 673                 {
 674                   if (--count == 0)
 675                     {
 676                       immediate_quit = 0;
 677                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 678                     }
 679                   cursor++;
 680                 }
 681             }
 682
 683           start = BYTE_TO_CHAR (start_byte + cursor - base);
 684         }
 685       }
 686   else
 687     while (start > end)
 688       {
 689         /* The last character to check before the next obstacle.  */
 690         int ceiling_byte = CHAR_TO_BYTE (end);
 691         int start_byte = CHAR_TO_BYTE (start);
 692         int tem;
 693
 694         /* Consult the newline cache, if appropriate.  */
 695         if (target == '\n' && newline_cache)
 696           {
 697             int next_change;
 698             immediate_quit = 0;
 699             while (region_cache_backward
 700                    (current_buffer, newline_cache, start_byte, &next_change))
 701               start_byte = next_change;
 702             immediate_quit = allow_quit;
 703
 704             /* Start should never be at or before end.  */
 705             if (start_byte <= ceiling_byte)
 706               start_byte = ceiling_byte + 1;
 707
 708             /* Now the text before start is an unknown region, and
 709                next_change is the position of the next known region. */
 710             ceiling_byte = max (next_change, ceiling_byte);
 711           }
 712
 713         /* Stop scanning before the gap.  */
 714         tem = BUFFER_FLOOR_OF (start_byte - 1);
 715         ceiling_byte = max (tem, ceiling_byte);
 716
 717         {
 718           /* The termination address of the dumb loop.  */
 719           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 720           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 721           unsigned char *base = cursor;
 722
 723           while (cursor >= ceiling_addr)
 724             {
 725               unsigned char *scan_start = cursor;
 726
 727               while (*cursor != target && --cursor >= ceiling_addr)
 728                 ;
 729
 730               /* If we're looking for newlines, cache the fact that
 731                  the region from after the cursor to start is free of them.  */
 732               if (target == '\n' && newline_cache)
 733                 know_region_cache (current_buffer, newline_cache,
 734                                    start_byte + cursor - base,
 735                                    start_byte + scan_start - base);
 736
 737               /* Did we find the target character?  */
 738               if (cursor >= ceiling_addr)
 739                 {
 740                   if (++count >= 0)
 741                     {
 742                       immediate_quit = 0;
 743                       return BYTE_TO_CHAR (start_byte + cursor - base);
 744                     }
 745                   cursor--;
 746                 }
 747             }
 748
 749           start = BYTE_TO_CHAR (start_byte + cursor - base);
 750         }
 751       }
 752
 753   immediate_quit = 0;
 754   if (shortage != 0)
 755     *shortage = count * direction;
 756   return start;
 757 }
 758 \f
 759 /* Search for COUNT instances of a line boundary, which means either a
 760    newline or (if selective display enabled) a carriage return.
 761    Start at START.  If COUNT is negative, search backwards.
 762
 763    We report the resulting position by calling TEMP_SET_PT_BOTH.
 764
 765    If we find COUNT instances. we position after (always after,
 766    even if scanning backwards) the COUNTth match, and return 0.
 767
 768    If we don't find COUNT instances before reaching the end of the
 769    buffer (or the beginning, if scanning backwards), we return
 770    the number of line boundaries left unfound, and position at
 771    the limit we bumped up against.
 772
 773    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 774    except in special cases.  */
 775
 776 int
 777 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 778      int start, start_byte;
 779      int limit, limit_byte;
 780      register int count;
 781      int allow_quit;
 782 {
 783   int direction = ((count > 0) ? 1 : -1);
 784
 785   register unsigned char *cursor;
 786   unsigned char *base;
 787
 788   register int ceiling;
 789   register unsigned char *ceiling_addr;
 790
 791   int old_immediate_quit = immediate_quit;
 792
 793   /* The code that follows is like scan_buffer
 794      but checks for either newline or carriage return.  */
 795
 796   if (allow_quit)
 797     immediate_quit++;
 798
 799   start_byte = CHAR_TO_BYTE (start);
 800
 801   if (count > 0)
 802     {
 803       while (start_byte < limit_byte)
 804         {
 805           ceiling =  BUFFER_CEILING_OF (start_byte);
 806           ceiling = min (limit_byte - 1, ceiling);
 807           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 808           base = (cursor = BYTE_POS_ADDR (start_byte));
 809           while (1)
 810             {
 811               while (*cursor != '\n' && ++cursor != ceiling_addr)
 812                 ;
 813
 814               if (cursor != ceiling_addr)
 815                 {
 816                   if (--count == 0)
 817                     {
 818                       immediate_quit = old_immediate_quit;
 819                       start_byte = start_byte + cursor - base + 1;
 820                       start = BYTE_TO_CHAR (start_byte);
 821                       TEMP_SET_PT_BOTH (start, start_byte);
 822                       return 0;
 823                     }
 824                   else
 825                     if (++cursor == ceiling_addr)
 826                       break;
 827                 }
 828               else
 829                 break;
 830             }
 831           start_byte += cursor - base;
 832         }
 833     }
 834   else
 835     {
 836       while (start_byte > limit_byte)
 837         {
 838           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 839           ceiling = max (limit_byte, ceiling);
 840           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 841           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 842           while (1)
 843             {
 844               while (--cursor != ceiling_addr && *cursor != '\n')
 845                 ;
 846
 847               if (cursor != ceiling_addr)
 848                 {
 849                   if (++count == 0)
 850                     {
 851                       immediate_quit = old_immediate_quit;
 852                       /* Return the position AFTER the match we found.  */
 853                       start_byte = start_byte + cursor - base + 1;
 854                       start = BYTE_TO_CHAR (start_byte);
 855                       TEMP_SET_PT_BOTH (start, start_byte);
 856                       return 0;
 857                     }
 858                 }
 859               else
 860                 break;
 861             }
 862           /* Here we add 1 to compensate for the last decrement
 863              of CURSOR, which took it past the valid range.  */
 864           start_byte += cursor - base + 1;
 865         }
 866     }
 867
 868   TEMP_SET_PT_BOTH (limit, limit_byte);
 869   immediate_quit = old_immediate_quit;
 870
 871   return count * direction;
 872 }
 873
 874 int
 875 find_next_newline_no_quit (from, cnt)
 876      register int from, cnt;
 877 {
 878   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 879 }
 880
 881 /* Like find_next_newline, but returns position before the newline,
 882    not after, and only search up to TO.  This isn't just
 883    find_next_newline (...)-1, because you might hit TO.  */
 884
 885 int
 886 find_before_next_newline (from, to, cnt)
 887      int from, to, cnt;
 888 {
 889   int shortage;
 890   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 891
 892   if (shortage == 0)
 893     pos--;
 894
 895   return pos;
 896 }
 897 \f
 898 /* Subroutines of Lisp buffer search functions. */
 899
 900 static Lisp_Object
 901 search_command (string, bound, noerror, count, direction, RE, posix)
 902      Lisp_Object string, bound, noerror, count;
 903      int direction;
 904      int RE;
 905      int posix;
 906 {
 907   register int np;
 908   int lim, lim_byte;
 909   int n = direction;
 910
 911   if (!NILP (count))
 912     {
 913       CHECK_NUMBER (count);
 914       n *= XINT (count);
 915     }
 916
 917   CHECK_STRING (string);
 918   if (NILP (bound))
 919     {
 920       if (n > 0)
 921         lim = ZV, lim_byte = ZV_BYTE;
 922       else
 923         lim = BEGV, lim_byte = BEGV_BYTE;
 924     }
 925   else
 926     {
 927       CHECK_NUMBER_COERCE_MARKER (bound);
 928       lim = XINT (bound);
 929       if (n > 0 ? lim < PT : lim > PT)
 930         error ("Invalid search bound (wrong side of point)");
 931       if (lim > ZV)
 932         lim = ZV, lim_byte = ZV_BYTE;
 933       else if (lim < BEGV)
 934         lim = BEGV, lim_byte = BEGV_BYTE;
 935       else
 936         lim_byte = CHAR_TO_BYTE (lim);
 937     }
 938
 939   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 940                       (!NILP (current_buffer->case_fold_search)
 941                        ? current_buffer->case_canon_table
 942                        : Qnil),
 943                       (!NILP (current_buffer->case_fold_search)
 944                        ? current_buffer->case_eqv_table
 945                        : Qnil),
 946                       posix);
 947   if (np <= 0)
 948     {
 949       if (NILP (noerror))
 950         return signal_failure (string);
 951       if (!EQ (noerror, Qt))
 952         {
 953           if (lim < BEGV || lim > ZV)
 954             abort ();
 955           SET_PT_BOTH (lim, lim_byte);
 956           return Qnil;
 957 #if 0 /* This would be clean, but maybe programs depend on
 958          a value of nil here.  */
 959           np = lim;
 960 #endif
 961         }
 962       else
 963         return Qnil;
 964     }
 965
 966   if (np < BEGV || np > ZV)
 967     abort ();
 968
 969   SET_PT (np);
 970
 971   return make_number (np);
 972 }
 973 \f
 974 /* Return 1 if REGEXP it matches just one constant string.  */
 975
 976 static int
 977 trivial_regexp_p (regexp)
 978      Lisp_Object regexp;
 979 {
 980   int len = SBYTES (regexp);
 981   unsigned char *s = SDATA (regexp);
 982   while (--len >= 0)
 983     {
 984       switch (*s++)
 985         {
 986         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 987           return 0;
 988         case '\\':
 989           if (--len < 0)
 990             return 0;
 991           switch (*s++)
 992             {
 993             case '|': case '(': case ')': case '`': case '\'': case 'b':
 994             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 995             case 'S': case '=': case '{': case '}': case '_':
 996             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 997             case '1': case '2': case '3': case '4': case '5':
 998             case '6': case '7': case '8': case '9':
 999               return 0;
1000             }
1001         }
1002     }
1003   return 1;
1004 }
1005
1006 /* Search for the n'th occurrence of STRING in the current buffer,
1007    starting at position POS and stopping at position LIM,
1008    treating STRING as a literal string if RE is false or as
1009    a regular expression if RE is true.
1010
1011    If N is positive, searching is forward and LIM must be greater than POS.
1012    If N is negative, searching is backward and LIM must be less than POS.
1013
1014    Returns -x if x occurrences remain to be found (x > 0),
1015    or else the position at the beginning of the Nth occurrence
1016    (if searching backward) or the end (if searching forward).
1017
1018    POSIX is nonzero if we want full backtracking (POSIX style)
1019    for this pattern.  0 means backtrack only enough to get a valid match.  */
1020
1021 #define TRANSLATE(out, trt, d)                  \
1022 do                                              \
1023   {                                             \
1024     if (! NILP (trt))                           \
1025       {                                         \
1026         Lisp_Object temp;                       \
1027         temp = Faref (trt, make_number (d));    \
1028         if (INTEGERP (temp))                    \
1029           out = XINT (temp);                    \
1030         else                                    \
1031           out = d;                              \
1032       }                                         \
1033     else                                        \
1034       out = d;                                  \
1035   }                                             \
1036 while (0)
1037
1038 static int
1039 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1040                RE, trt, inverse_trt, posix)
1041      Lisp_Object string;
1042      int pos;
1043      int pos_byte;
1044      int lim;
1045      int lim_byte;
1046      int n;
1047      int RE;
1048      Lisp_Object trt;
1049      Lisp_Object inverse_trt;
1050      int posix;
1051 {
1052   int len = SCHARS (string);
1053   int len_byte = SBYTES (string);
1054   register int i;
1055
1056   if (running_asynch_code)
1057     save_search_regs ();
1058
1059   /* Searching 0 times means don't move.  */
1060   /* Null string is found at starting position.  */
1061   if (len == 0 || n == 0)
1062     {
1063       set_search_regs (pos_byte, 0);
1064       return pos;
1065     }
1066
1067   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1068     {
1069       unsigned char *p1, *p2;
1070       int s1, s2;
1071       struct re_pattern_buffer *bufp;
1072
1073       bufp = compile_pattern (string, &search_regs, trt, posix,
1074                               !NILP (current_buffer->enable_multibyte_characters));
1075
1076       immediate_quit = 1;       /* Quit immediately if user types ^G,
1077                                    because letting this function finish
1078                                    can take too long. */
1079       QUIT;                     /* Do a pending quit right away,
1080                                    to avoid paradoxical behavior */
1081       /* Get pointers and sizes of the two strings
1082          that make up the visible portion of the buffer. */
1083
1084       p1 = BEGV_ADDR;
1085       s1 = GPT_BYTE - BEGV_BYTE;
1086       p2 = GAP_END_ADDR;
1087       s2 = ZV_BYTE - GPT_BYTE;
1088       if (s1 < 0)
1089         {
1090           p2 = p1;
1091           s2 = ZV_BYTE - BEGV_BYTE;
1092           s1 = 0;
1093         }
1094       if (s2 < 0)
1095         {
1096           s1 = ZV_BYTE - BEGV_BYTE;
1097           s2 = 0;
1098         }
1099       re_match_object = Qnil;
1100
1101       while (n < 0)
1102         {
1103           int val;
1104           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1105                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106                              &search_regs,
1107                              /* Don't allow match past current point */
1108                              pos_byte - BEGV_BYTE);
1109           if (val == -2)
1110             {
1111               matcher_overflow ();
1112             }
1113           if (val >= 0)
1114             {
1115               pos_byte = search_regs.start[0] + BEGV_BYTE;
1116               for (i = 0; i < search_regs.num_regs; i++)
1117                 if (search_regs.start[i] >= 0)
1118                   {
1119                     search_regs.start[i]
1120                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1121                     search_regs.end[i]
1122                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1123                   }
1124               XSETBUFFER (last_thing_searched, current_buffer);
1125               /* Set pos to the new position. */
1126               pos = search_regs.start[0];
1127             }
1128           else
1129             {
1130               immediate_quit = 0;
1131               return (n);
1132             }
1133           n++;
1134         }
1135       while (n > 0)
1136         {
1137           int val;
1138           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1139                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1140                              &search_regs,
1141                              lim_byte - BEGV_BYTE);
1142           if (val == -2)
1143             {
1144               matcher_overflow ();
1145             }
1146           if (val >= 0)
1147             {
1148               pos_byte = search_regs.end[0] + BEGV_BYTE;
1149               for (i = 0; i < search_regs.num_regs; i++)
1150                 if (search_regs.start[i] >= 0)
1151                   {
1152                     search_regs.start[i]
1153                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1154                     search_regs.end[i]
1155                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1156                   }
1157               XSETBUFFER (last_thing_searched, current_buffer);
1158               pos = search_regs.end[0];
1159             }
1160           else
1161             {
1162               immediate_quit = 0;
1163               return (0 - n);
1164             }
1165           n--;
1166         }
1167       immediate_quit = 0;
1168       return (pos);
1169     }
1170   else                          /* non-RE case */
1171     {
1172       unsigned char *raw_pattern, *pat;
1173       int raw_pattern_size;
1174       int raw_pattern_size_byte;
1175       unsigned char *patbuf;
1176       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1177       unsigned char *base_pat = SDATA (string);
1178       /* Set to nozero if we find a non-ASCII char that need
1179          translation.  */
1180       int charset_base = 0;
1181       int boyer_moore_ok = 1;
1182
1183       /* MULTIBYTE says whether the text to be searched is multibyte.
1184          We must convert PATTERN to match that, or we will not really
1185          find things right.  */
1186
1187       if (multibyte == STRING_MULTIBYTE (string))
1188         {
1189           raw_pattern = (unsigned char *) SDATA (string);
1190           raw_pattern_size = SCHARS (string);
1191           raw_pattern_size_byte = SBYTES (string);
1192         }
1193       else if (multibyte)
1194         {
1195           raw_pattern_size = SCHARS (string);
1196           raw_pattern_size_byte
1197             = count_size_as_multibyte (SDATA (string),
1198                                        raw_pattern_size);
1199           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1200           copy_text (SDATA (string), raw_pattern,
1201                      SCHARS (string), 0, 1);
1202         }
1203       else
1204         {
1205           /* Converting multibyte to single-byte.
1206
1207              ??? Perhaps this conversion should be done in a special way
1208              by subtracting nonascii-insert-offset from each non-ASCII char,
1209              so that only the multibyte chars which really correspond to
1210              the chosen single-byte character set can possibly match.  */
1211           raw_pattern_size = SCHARS (string);
1212           raw_pattern_size_byte = SCHARS (string);
1213           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1214           copy_text (SDATA (string), raw_pattern,
1215                      SBYTES (string), 1, 0);
1216         }
1217
1218       /* Copy and optionally translate the pattern.  */
1219       len = raw_pattern_size;
1220       len_byte = raw_pattern_size_byte;
1221       patbuf = (unsigned char *) alloca (len_byte);
1222       pat = patbuf;
1223       base_pat = raw_pattern;
1224       if (multibyte)
1225         {
1226           /* Fill patbuf by translated characters in STRING while
1227              checking if we can use boyer-moore search.  If TRT is
1228              non-nil, we can use boyer-moore search only if TRT can be
1229              represented by the byte array of 256 elements.  For that,
1230              all non-ASCII case-equivalents of all case-senstive
1231              characters in STRING must belong to the same charset and
1232              row.  */
1233
1234           while (--len >= 0)
1235             {
1236               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1237               int c, translated, inverse;
1238               int in_charlen, charlen;
1239
1240               /* If we got here and the RE flag is set, it's because we're
1241                  dealing with a regexp known to be trivial, so the backslash
1242                  just quotes the next character.  */
1243               if (RE && *base_pat == '\\')
1244                 {
1245                   len--;
1246                   raw_pattern_size--;
1247                   len_byte--;
1248                   base_pat++;
1249                 }
1250
1251               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1252
1253               if (NILP (trt))
1254                 {
1255                   str = base_pat;
1256                   charlen = in_charlen;
1257                 }
1258               else
1259                 {
1260                   /* Translate the character.  */
1261                   TRANSLATE (translated, trt, c);
1262                   charlen = CHAR_STRING (translated, str_base);
1263                   str = str_base;
1264
1265                   /* Check if C has any other case-equivalents.  */
1266                   TRANSLATE (inverse, inverse_trt, c);
1267                   /* If so, check if we can use boyer-moore.  */
1268                   if (c != inverse && boyer_moore_ok)
1269                     {
1270                       /* Check if all equivalents belong to the same
1271                          charset & row.  Note that the check of C
1272                          itself is done by the last iteration.  Note
1273                          also that we don't have to check ASCII
1274                          characters because boyer-moore search can
1275                          always handle their translation.  */
1276                       while (1)
1277                         {
1278                           if (! ASCII_BYTE_P (inverse))
1279                             {
1280                               if (SINGLE_BYTE_CHAR_P (inverse))
1281                                 {
1282                                   /* Boyer-moore search can't handle a
1283                                      translation of an eight-bit
1284                                      character.  */
1285                                   boyer_moore_ok = 0;
1286                                   break;
1287                                 }
1288                               else if (charset_base == 0)
1289                                 charset_base = inverse & ~CHAR_FIELD3_MASK;
1290                               else if ((inverse & ~CHAR_FIELD3_MASK)
1291                                        != charset_base)
1292                                 {
1293                                   boyer_moore_ok = 0;
1294                                   break;
1295                                 }
1296                             }
1297                           if (c == inverse)
1298                             break;
1299                           TRANSLATE (inverse, inverse_trt, inverse);
1300                         }
1301                     }
1302                 }
1303
1304               /* Store this character into the translated pattern.  */
1305               bcopy (str, pat, charlen);
1306               pat += charlen;
1307               base_pat += in_charlen;
1308               len_byte -= in_charlen;
1309             }
1310         }
1311       else
1312         {
1313           /* Unibyte buffer.  */
1314           charset_base = 0;
1315           while (--len >= 0)
1316             {
1317               int c, translated;
1318
1319               /* If we got here and the RE flag is set, it's because we're
1320                  dealing with a regexp known to be trivial, so the backslash
1321                  just quotes the next character.  */
1322               if (RE && *base_pat == '\\')
1323                 {
1324                   len--;
1325                   raw_pattern_size--;
1326                   base_pat++;
1327                 }
1328               c = *base_pat++;
1329               TRANSLATE (translated, trt, c);
1330               *pat++ = translated;
1331             }
1332         }
1333
1334       len_byte = pat - patbuf;
1335       len = raw_pattern_size;
1336       pat = base_pat = patbuf;
1337
1338       if (boyer_moore_ok)
1339         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1340                             pos, pos_byte, lim, lim_byte,
1341                             charset_base);
1342       else
1343         return simple_search (n, pat, len, len_byte, trt,
1344                               pos, pos_byte, lim, lim_byte);
1345     }
1346 }
1347 \f
1348 /* Do a simple string search N times for the string PAT,
1349    whose length is LEN/LEN_BYTE,
1350    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1351    TRT is the translation table.
1352
1353    Return the character position where the match is found.
1354    Otherwise, if M matches remained to be found, return -M.
1355
1356    This kind of search works regardless of what is in PAT and
1357    regardless of what is in TRT.  It is used in cases where
1358    boyer_moore cannot work.  */
1359
1360 static int
1361 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1362      int n;
1363      unsigned char *pat;
1364      int len, len_byte;
1365      Lisp_Object trt;
1366      int pos, pos_byte;
1367      int lim, lim_byte;
1368 {
1369   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1370   int forward = n > 0;
1371
1372   if (lim > pos && multibyte)
1373     while (n > 0)
1374       {
1375         while (1)
1376           {
1377             /* Try matching at position POS.  */
1378             int this_pos = pos;
1379             int this_pos_byte = pos_byte;
1380             int this_len = len;
1381             int this_len_byte = len_byte;
1382             unsigned char *p = pat;
1383             if (pos + len > lim)
1384               goto stop;
1385
1386             while (this_len > 0)
1387               {
1388                 int charlen, buf_charlen;
1389                 int pat_ch, buf_ch;
1390
1391                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1392                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1393                                                  ZV_BYTE - this_pos_byte,
1394                                                  buf_charlen);
1395                 TRANSLATE (buf_ch, trt, buf_ch);
1396
1397                 if (buf_ch != pat_ch)
1398                   break;
1399
1400                 this_len_byte -= charlen;
1401                 this_len--;
1402                 p += charlen;
1403
1404                 this_pos_byte += buf_charlen;
1405                 this_pos++;
1406               }
1407
1408             if (this_len == 0)
1409               {
1410                 pos += len;
1411                 pos_byte += len_byte;
1412                 break;
1413               }
1414
1415             INC_BOTH (pos, pos_byte);
1416           }
1417
1418         n--;
1419       }
1420   else if (lim > pos)
1421     while (n > 0)
1422       {
1423         while (1)
1424           {
1425             /* Try matching at position POS.  */
1426             int this_pos = pos;
1427             int this_len = len;
1428             unsigned char *p = pat;
1429
1430             if (pos + len > lim)
1431               goto stop;
1432
1433             while (this_len > 0)
1434               {
1435                 int pat_ch = *p++;
1436                 int buf_ch = FETCH_BYTE (this_pos);
1437                 TRANSLATE (buf_ch, trt, buf_ch);
1438
1439                 if (buf_ch != pat_ch)
1440                   break;
1441
1442                 this_len--;
1443                 this_pos++;
1444               }
1445
1446             if (this_len == 0)
1447               {
1448                 pos += len;
1449                 break;
1450               }
1451
1452             pos++;
1453           }
1454
1455         n--;
1456       }
1457   /* Backwards search.  */
1458   else if (lim < pos && multibyte)
1459     while (n < 0)
1460       {
1461         while (1)
1462           {
1463             /* Try matching at position POS.  */
1464             int this_pos = pos - len;
1465             int this_pos_byte = pos_byte - len_byte;
1466             int this_len = len;
1467             int this_len_byte = len_byte;
1468             unsigned char *p = pat;
1469
1470             if (pos - len < lim)
1471               goto stop;
1472
1473             while (this_len > 0)
1474               {
1475                 int charlen, buf_charlen;
1476                 int pat_ch, buf_ch;
1477
1478                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1479                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1480                                                  ZV_BYTE - this_pos_byte,
1481                                                  buf_charlen);
1482                 TRANSLATE (buf_ch, trt, buf_ch);
1483
1484                 if (buf_ch != pat_ch)
1485                   break;
1486
1487                 this_len_byte -= charlen;
1488                 this_len--;
1489                 p += charlen;
1490                 this_pos_byte += buf_charlen;
1491                 this_pos++;
1492               }
1493
1494             if (this_len == 0)
1495               {
1496                 pos -= len;
1497                 pos_byte -= len_byte;
1498                 break;
1499               }
1500
1501             DEC_BOTH (pos, pos_byte);
1502           }
1503
1504         n++;
1505       }
1506   else if (lim < pos)
1507     while (n < 0)
1508       {
1509         while (1)
1510           {
1511             /* Try matching at position POS.  */
1512             int this_pos = pos - len;
1513             int this_len = len;
1514             unsigned char *p = pat;
1515
1516             if (pos - len < lim)
1517               goto stop;
1518
1519             while (this_len > 0)
1520               {
1521                 int pat_ch = *p++;
1522                 int buf_ch = FETCH_BYTE (this_pos);
1523                 TRANSLATE (buf_ch, trt, buf_ch);
1524
1525                 if (buf_ch != pat_ch)
1526                   break;
1527                 this_len--;
1528                 this_pos++;
1529               }
1530
1531             if (this_len == 0)
1532               {
1533                 pos -= len;
1534                 break;
1535               }
1536
1537             pos--;
1538           }
1539
1540         n++;
1541       }
1542
1543  stop:
1544   if (n == 0)
1545     {
1546       if (forward)
1547         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1548       else
1549         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1550
1551       return pos;
1552     }
1553   else if (n > 0)
1554     return -n;
1555   else
1556     return n;
1557 }
1558 \f
1559 /* Do Boyer-Moore search N times for the string BASE_PAT,
1560    whose length is LEN/LEN_BYTE,
1561    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1562    DIRECTION says which direction we search in.
1563    TRT and INVERSE_TRT are translation tables.
1564    Characters in PAT are already translated by TRT.
1565
1566    This kind of search works if all the characters in BASE_PAT that
1567    have nontrivial translation are the same aside from the last byte.
1568    This makes it possible to translate just the last byte of a
1569    character, and do so after just a simple test of the context.
1570    CHARSET_BASE is nonzero iff there is such a non-ASCII character.
1571
1572    If that criterion is not satisfied, do not call this function.  */
1573
1574 static int
1575 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1576              pos, pos_byte, lim, lim_byte, charset_base)
1577      int n;
1578      unsigned char *base_pat;
1579      int len, len_byte;
1580      Lisp_Object trt;
1581      Lisp_Object inverse_trt;
1582      int pos, pos_byte;
1583      int lim, lim_byte;
1584      int charset_base;
1585 {
1586   int direction = ((n > 0) ? 1 : -1);
1587   register int dirlen;
1588   int infinity, limit, stride_for_teases = 0;
1589   register int *BM_tab;
1590   int *BM_tab_base;
1591   register unsigned char *cursor, *p_limit;
1592   register int i, j;
1593   unsigned char *pat, *pat_end;
1594   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1595
1596   unsigned char simple_translate[0400];
1597   /* These are set to the preceding bytes of a byte to be translated
1598      if charset_base is nonzero.  As the maximum byte length of a
1599      multibyte character is 4, we have to check at most three previous
1600      bytes.  */
1601   int translate_prev_byte1 = 0;
1602   int translate_prev_byte2 = 0;
1603   int translate_prev_byte3 = 0;
1604
1605 #ifdef C_ALLOCA
1606   int BM_tab_space[0400];
1607   BM_tab = &BM_tab_space[0];
1608 #else
1609   BM_tab = (int *) alloca (0400 * sizeof (int));
1610 #endif
1611   /* The general approach is that we are going to maintain that we know */
1612   /* the first (closest to the present position, in whatever direction */
1613   /* we're searching) character that could possibly be the last */
1614   /* (furthest from present position) character of a valid match.  We */
1615   /* advance the state of our knowledge by looking at that character */
1616   /* and seeing whether it indeed matches the last character of the */
1617   /* pattern.  If it does, we take a closer look.  If it does not, we */
1618   /* move our pointer (to putative last characters) as far as is */
1619   /* logically possible.  This amount of movement, which I call a */
1620   /* stride, will be the length of the pattern if the actual character */
1621   /* appears nowhere in the pattern, otherwise it will be the distance */
1622   /* from the last occurrence of that character to the end of the */
1623   /* pattern. */
1624   /* As a coding trick, an enormous stride is coded into the table for */
1625   /* characters that match the last character.  This allows use of only */
1626   /* a single test, a test for having gone past the end of the */
1627   /* permissible match region, to test for both possible matches (when */
1628   /* the stride goes past the end immediately) and failure to */
1629   /* match (where you get nudged past the end one stride at a time). */
1630
1631   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1632   /* is determined only by the last character of the putative match. */
1633   /* If that character does not match, we will stride the proper */
1634   /* distance to propose a match that superimposes it on the last */
1635   /* instance of a character that matches it (per trt), or misses */
1636   /* it entirely if there is none. */
1637
1638   dirlen = len_byte * direction;
1639   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1640
1641   /* Record position after the end of the pattern.  */
1642   pat_end = base_pat + len_byte;
1643   /* BASE_PAT points to a character that we start scanning from.
1644      It is the first character in a forward search,
1645      the last character in a backward search.  */
1646   if (direction < 0)
1647     base_pat = pat_end - 1;
1648
1649   BM_tab_base = BM_tab;
1650   BM_tab += 0400;
1651   j = dirlen;           /* to get it in a register */
1652   /* A character that does not appear in the pattern induces a */
1653   /* stride equal to the pattern length. */
1654   while (BM_tab_base != BM_tab)
1655     {
1656       *--BM_tab = j;
1657       *--BM_tab = j;
1658       *--BM_tab = j;
1659       *--BM_tab = j;
1660     }
1661
1662   /* We use this for translation, instead of TRT itself.
1663      We fill this in to handle the characters that actually
1664      occur in the pattern.  Others don't matter anyway!  */
1665   bzero (simple_translate, sizeof simple_translate);
1666   for (i = 0; i < 0400; i++)
1667     simple_translate[i] = i;
1668
1669   if (charset_base)
1670     {
1671       /* Setup translate_prev_byte1/2/3 from CHARSET_BASE.  Only a
1672          byte following them are the target of translation.  */
1673       int sample_char = charset_base | 0x20;
1674       unsigned char str[MAX_MULTIBYTE_LENGTH];
1675       int len = CHAR_STRING (sample_char, str);
1676
1677       translate_prev_byte1 = str[len - 2];
1678       if (len > 2)
1679         {
1680           translate_prev_byte2 = str[len - 3];
1681           if (len > 3)
1682             translate_prev_byte3 = str[len - 4];
1683         }
1684     }
1685
1686   i = 0;
1687   while (i != infinity)
1688     {
1689       unsigned char *ptr = base_pat + i;
1690       i += direction;
1691       if (i == dirlen)
1692         i = infinity;
1693       if (! NILP (trt))
1694         {
1695           /* If the byte currently looking at is the last of a
1696              character to check case-equivalents, set CH to that
1697              character.  An ASCII character and a non-ASCII character
1698              matching with CHARSET_BASE are to be checked.  */
1699           int ch = -1;
1700
1701           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1702             ch = *ptr;
1703           else if (charset_base
1704                    && (pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1]))
1705             {
1706               unsigned char *charstart = ptr - 1;
1707
1708               while (! (CHAR_HEAD_P (*charstart)))
1709                 charstart--;
1710               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1711               if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1712                 ch = -1;
1713             }
1714
1715           if (ch > 0400)
1716             j = ((unsigned char) ch) | 0200;
1717           else
1718             j = *ptr;
1719
1720           if (i == infinity)
1721             stride_for_teases = BM_tab[j];
1722
1723           BM_tab[j] = dirlen - i;
1724           /* A translation table is accompanied by its inverse -- see */
1725           /* comment following downcase_table for details */
1726           if (ch >= 0)
1727             {
1728               int starting_ch = ch;
1729               int starting_j = j;
1730
1731               while (1)
1732                 {
1733                   TRANSLATE (ch, inverse_trt, ch);
1734                   if (ch > 0400)
1735                     j = ((unsigned char) ch) | 0200;
1736                   else
1737                     j = (unsigned char) ch;
1738
1739                   /* For all the characters that map into CH,
1740                      set up simple_translate to map the last byte
1741                      into STARTING_J.  */
1742                   simple_translate[j] = starting_j;
1743                   if (ch == starting_ch)
1744                     break;
1745                   BM_tab[j] = dirlen - i;
1746                 }
1747             }
1748         }
1749       else
1750         {
1751           j = *ptr;
1752
1753           if (i == infinity)
1754             stride_for_teases = BM_tab[j];
1755           BM_tab[j] = dirlen - i;
1756         }
1757       /* stride_for_teases tells how much to stride if we get a */
1758       /* match on the far character but are subsequently */
1759       /* disappointed, by recording what the stride would have been */
1760       /* for that character if the last character had been */
1761       /* different. */
1762     }
1763   infinity = dirlen - infinity;
1764   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1765   /* loop invariant - POS_BYTE points at where last char (first
1766      char if reverse) of pattern would align in a possible match.  */
1767   while (n != 0)
1768     {
1769       int tail_end;
1770       unsigned char *tail_end_ptr;
1771
1772       /* It's been reported that some (broken) compiler thinks that
1773          Boolean expressions in an arithmetic context are unsigned.
1774          Using an explicit ?1:0 prevents this.  */
1775       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1776           < 0)
1777         return (n * (0 - direction));
1778       /* First we do the part we can by pointers (maybe nothing) */
1779       QUIT;
1780       pat = base_pat;
1781       limit = pos_byte - dirlen + direction;
1782       if (direction > 0)
1783         {
1784           limit = BUFFER_CEILING_OF (limit);
1785           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1786              can take on without hitting edge of buffer or the gap.  */
1787           limit = min (limit, pos_byte + 20000);
1788           limit = min (limit, lim_byte - 1);
1789         }
1790       else
1791         {
1792           limit = BUFFER_FLOOR_OF (limit);
1793           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1794              can take on without hitting edge of buffer or the gap.  */
1795           limit = max (limit, pos_byte - 20000);
1796           limit = max (limit, lim_byte);
1797         }
1798       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1799       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1800
1801       if ((limit - pos_byte) * direction > 20)
1802         {
1803           unsigned char *p2;
1804
1805           p_limit = BYTE_POS_ADDR (limit);
1806           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1807           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1808           while (1)             /* use one cursor setting as long as i can */
1809             {
1810               if (direction > 0) /* worth duplicating */
1811                 {
1812                   /* Use signed comparison if appropriate
1813                      to make cursor+infinity sure to be > p_limit.
1814                      Assuming that the buffer lies in a range of addresses
1815                      that are all "positive" (as ints) or all "negative",
1816                      either kind of comparison will work as long
1817                      as we don't step by infinity.  So pick the kind
1818                      that works when we do step by infinity.  */
1819                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1820                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1821                       cursor += BM_tab[*cursor];
1822                   else
1823                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1824                       cursor += BM_tab[*cursor];
1825                 }
1826               else
1827                 {
1828                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1829                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1830                       cursor += BM_tab[*cursor];
1831                   else
1832                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1833                       cursor += BM_tab[*cursor];
1834                 }
1835 /* If you are here, cursor is beyond the end of the searched region. */
1836 /* This can happen if you match on the far character of the pattern, */
1837 /* because the "stride" of that character is infinity, a number able */
1838 /* to throw you well beyond the end of the search.  It can also */
1839 /* happen if you fail to match within the permitted region and would */
1840 /* otherwise try a character beyond that region */
1841               if ((cursor - p_limit) * direction <= len_byte)
1842                 break;  /* a small overrun is genuine */
1843               cursor -= infinity; /* large overrun = hit */
1844               i = dirlen - direction;
1845               if (! NILP (trt))
1846                 {
1847                   while ((i -= direction) + direction != 0)
1848                     {
1849                       int ch;
1850                       cursor -= direction;
1851                       /* Translate only the last byte of a character.  */
1852                       if (! multibyte
1853                           || ((cursor == tail_end_ptr
1854                                || CHAR_HEAD_P (cursor[1]))
1855                               && (CHAR_HEAD_P (cursor[0])
1856                                   /* Check if this is the last byte of
1857                                      a translable character.  */
1858                                   || (translate_prev_byte1 == cursor[-1]
1859                                       && (CHAR_HEAD_P (translate_prev_byte1)
1860                                           || (translate_prev_byte2 == cursor[-2]
1861                                               && (CHAR_HEAD_P (translate_prev_byte2)
1862                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1863                         ch = simple_translate[*cursor];
1864                       else
1865                         ch = *cursor;
1866                       if (pat[i] != ch)
1867                         break;
1868                     }
1869                 }
1870               else
1871                 {
1872                   while ((i -= direction) + direction != 0)
1873                     {
1874                       cursor -= direction;
1875                       if (pat[i] != *cursor)
1876                         break;
1877                     }
1878                 }
1879               cursor += dirlen - i - direction; /* fix cursor */
1880               if (i + direction == 0)
1881                 {
1882                   int position;
1883
1884                   cursor -= direction;
1885
1886                   position = pos_byte + cursor - p2 + ((direction > 0)
1887                                                        ? 1 - len_byte : 0);
1888                   set_search_regs (position, len_byte);
1889
1890                   if ((n -= direction) != 0)
1891                     cursor += dirlen; /* to resume search */
1892                   else
1893                     return ((direction > 0)
1894                             ? search_regs.end[0] : search_regs.start[0]);
1895                 }
1896               else
1897                 cursor += stride_for_teases; /* <sigh> we lose -  */
1898             }
1899           pos_byte += cursor - p2;
1900         }
1901       else
1902         /* Now we'll pick up a clump that has to be done the hard */
1903         /* way because it covers a discontinuity */
1904         {
1905           limit = ((direction > 0)
1906                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1907                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1908           limit = ((direction > 0)
1909                    ? min (limit + len_byte, lim_byte - 1)
1910                    : max (limit - len_byte, lim_byte));
1911           /* LIMIT is now the last value POS_BYTE can have
1912              and still be valid for a possible match.  */
1913           while (1)
1914             {
1915               /* This loop can be coded for space rather than */
1916               /* speed because it will usually run only once. */
1917               /* (the reach is at most len + 21, and typically */
1918               /* does not exceed len) */
1919               while ((limit - pos_byte) * direction >= 0)
1920                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1921               /* now run the same tests to distinguish going off the */
1922               /* end, a match or a phony match. */
1923               if ((pos_byte - limit) * direction <= len_byte)
1924                 break;  /* ran off the end */
1925               /* Found what might be a match.
1926                  Set POS_BYTE back to last (first if reverse) pos.  */
1927               pos_byte -= infinity;
1928               i = dirlen - direction;
1929               while ((i -= direction) + direction != 0)
1930                 {
1931                   int ch;
1932                   unsigned char *ptr;
1933                   pos_byte -= direction;
1934                   ptr = BYTE_POS_ADDR (pos_byte);
1935                   /* Translate only the last byte of a character.  */
1936                   if (! multibyte
1937                       || ((ptr == tail_end_ptr
1938                            || CHAR_HEAD_P (ptr[1]))
1939                           && (CHAR_HEAD_P (ptr[0])
1940                               /* Check if this is the last byte of a
1941                                  translable character.  */
1942                               || (translate_prev_byte1 == ptr[-1]
1943                                   && (CHAR_HEAD_P (translate_prev_byte1)
1944                                       || (translate_prev_byte2 == ptr[-2]
1945                                           && (CHAR_HEAD_P (translate_prev_byte2)
1946                                               || translate_prev_byte3 == ptr[-3])))))))
1947                     ch = simple_translate[*ptr];
1948                   else
1949                     ch = *ptr;
1950                   if (pat[i] != ch)
1951                     break;
1952                 }
1953               /* Above loop has moved POS_BYTE part or all the way
1954                  back to the first pos (last pos if reverse).
1955                  Set it once again at the last (first if reverse) char.  */
1956               pos_byte += dirlen - i- direction;
1957               if (i + direction == 0)
1958                 {
1959                   int position;
1960                   pos_byte -= direction;
1961
1962                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1963
1964                   set_search_regs (position, len_byte);
1965
1966                   if ((n -= direction) != 0)
1967                     pos_byte += dirlen; /* to resume search */
1968                   else
1969                     return ((direction > 0)
1970                             ? search_regs.end[0] : search_regs.start[0]);
1971                 }
1972               else
1973                 pos_byte += stride_for_teases;
1974             }
1975           }
1976       /* We have done one clump.  Can we continue? */
1977       if ((lim_byte - pos_byte) * direction < 0)
1978         return ((0 - n) * direction);
1979     }
1980   return BYTE_TO_CHAR (pos_byte);
1981 }
1982
1983 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1984    for the overall match just found in the current buffer.
1985    Also clear out the match data for registers 1 and up.  */
1986
1987 static void
1988 set_search_regs (beg_byte, nbytes)
1989      int beg_byte, nbytes;
1990 {
1991   int i;
1992
1993   /* Make sure we have registers in which to store
1994      the match position.  */
1995   if (search_regs.num_regs == 0)
1996     {
1997       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1998       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1999       search_regs.num_regs = 2;
2000     }
2001
2002   /* Clear out the other registers.  */
2003   for (i = 1; i < search_regs.num_regs; i++)
2004     {
2005       search_regs.start[i] = -1;
2006       search_regs.end[i] = -1;
2007     }
2008
2009   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2010   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2011   XSETBUFFER (last_thing_searched, current_buffer);
2012 }
2013 \f
2014 /* Given a string of words separated by word delimiters,
2015   compute a regexp that matches those exact words
2016   separated by arbitrary punctuation.  */
2017
2018 static Lisp_Object
2019 wordify (string)
2020      Lisp_Object string;
2021 {
2022   register unsigned char *p, *o;
2023   register int i, i_byte, len, punct_count = 0, word_count = 0;
2024   Lisp_Object val;
2025   int prev_c = 0;
2026   int adjust;
2027
2028   CHECK_STRING (string);
2029   p = SDATA (string);
2030   len = SCHARS (string);
2031
2032   for (i = 0, i_byte = 0; i < len; )
2033     {
2034       int c;
2035
2036       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2037
2038       if (SYNTAX (c) != Sword)
2039         {
2040           punct_count++;
2041           if (i > 0 && SYNTAX (prev_c) == Sword)
2042             word_count++;
2043         }
2044
2045       prev_c = c;
2046     }
2047
2048   if (SYNTAX (prev_c) == Sword)
2049     word_count++;
2050   if (!word_count)
2051     return empty_string;
2052
2053   adjust = - punct_count + 5 * (word_count - 1) + 4;
2054   if (STRING_MULTIBYTE (string))
2055     val = make_uninit_multibyte_string (len + adjust,
2056                                         SBYTES (string)
2057                                         + adjust);
2058   else
2059     val = make_uninit_string (len + adjust);
2060
2061   o = SDATA (val);
2062   *o++ = '\\';
2063   *o++ = 'b';
2064   prev_c = 0;
2065
2066   for (i = 0, i_byte = 0; i < len; )
2067     {
2068       int c;
2069       int i_byte_orig = i_byte;
2070
2071       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2072
2073       if (SYNTAX (c) == Sword)
2074         {
2075           bcopy (SDATA (string) + i_byte_orig, o,
2076                  i_byte - i_byte_orig);
2077           o += i_byte - i_byte_orig;
2078         }
2079       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2080         {
2081           *o++ = '\\';
2082           *o++ = 'W';
2083           *o++ = '\\';
2084           *o++ = 'W';
2085           *o++ = '*';
2086         }
2087
2088       prev_c = c;
2089     }
2090
2091   *o++ = '\\';
2092   *o++ = 'b';
2093
2094   return val;
2095 }
2096 \f
2097 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2098        "MSearch backward: ",
2099        doc: /* Search backward from point for STRING.
2100 Set point to the beginning of the occurrence found, and return point.
2101 An optional second argument bounds the search; it is a buffer position.
2102 The match found must not extend before that position.
2103 Optional third argument, if t, means if fail just return nil (no error).
2104  If not nil and not t, position at limit of search and return nil.
2105 Optional fourth argument is repeat count--search for successive occurrences.
2106
2107 Search case-sensitivity is determined by the value of the variable
2108 `case-fold-search', which see.
2109
2110 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2111      (string, bound, noerror, count)
2112      Lisp_Object string, bound, noerror, count;
2113 {
2114   return search_command (string, bound, noerror, count, -1, 0, 0);
2115 }
2116
2117 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2118        doc: /* Search forward from point for STRING.
2119 Set point to the end of the occurrence found, and return point.
2120 An optional second argument bounds the search; it is a buffer position.
2121 The match found must not extend after that position.  nil is equivalent
2122   to (point-max).
2123 Optional third argument, if t, means if fail just return nil (no error).
2124   If not nil and not t, move to limit of search and return nil.
2125 Optional fourth argument is repeat count--search for successive occurrences.
2126
2127 Search case-sensitivity is determined by the value of the variable
2128 `case-fold-search', which see.
2129
2130 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2131      (string, bound, noerror, count)
2132      Lisp_Object string, bound, noerror, count;
2133 {
2134   return search_command (string, bound, noerror, count, 1, 0, 0);
2135 }
2136
2137 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2138        "sWord search backward: ",
2139        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2140 Set point to the beginning of the occurrence found, and return point.
2141 An optional second argument bounds the search; it is a buffer position.
2142 The match found must not extend before that position.
2143 Optional third argument, if t, means if fail just return nil (no error).
2144   If not nil and not t, move to limit of search and return nil.
2145 Optional fourth argument is repeat count--search for successive occurrences.  */)
2146      (string, bound, noerror, count)
2147      Lisp_Object string, bound, noerror, count;
2148 {
2149   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2150 }
2151
2152 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2153        "sWord search: ",
2154        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2155 Set point to the end of the occurrence found, and return point.
2156 An optional second argument bounds the search; it is a buffer position.
2157 The match found must not extend after that position.
2158 Optional third argument, if t, means if fail just return nil (no error).
2159   If not nil and not t, move to limit of search and return nil.
2160 Optional fourth argument is repeat count--search for successive occurrences.  */)
2161      (string, bound, noerror, count)
2162      Lisp_Object string, bound, noerror, count;
2163 {
2164   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2165 }
2166
2167 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2168        "sRE search backward: ",
2169        doc: /* Search backward from point for match for regular expression REGEXP.
2170 Set point to the beginning of the match, and return point.
2171 The match found is the one starting last in the buffer
2172 and yet ending before the origin of the search.
2173 An optional second argument bounds the search; it is a buffer position.
2174 The match found must start at or after that position.
2175 Optional third argument, if t, means if fail just return nil (no error).
2176   If not nil and not t, move to limit of search and return nil.
2177 Optional fourth argument is repeat count--search for successive occurrences.
2178 See also the functions `match-beginning', `match-end', `match-string',
2179 and `replace-match'.  */)
2180      (regexp, bound, noerror, count)
2181      Lisp_Object regexp, bound, noerror, count;
2182 {
2183   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2184 }
2185
2186 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2187        "sRE search: ",
2188        doc: /* Search forward from point for regular expression REGEXP.
2189 Set point to the end of the occurrence found, and return point.
2190 An optional second argument bounds the search; it is a buffer position.
2191 The match found must not extend after that position.
2192 Optional third argument, if t, means if fail just return nil (no error).
2193   If not nil and not t, move to limit of search and return nil.
2194 Optional fourth argument is repeat count--search for successive occurrences.
2195 See also the functions `match-beginning', `match-end', `match-string',
2196 and `replace-match'.  */)
2197      (regexp, bound, noerror, count)
2198      Lisp_Object regexp, bound, noerror, count;
2199 {
2200   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2201 }
2202
2203 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2204        "sPosix search backward: ",
2205        doc: /* Search backward from point for match for regular expression REGEXP.
2206 Find the longest match in accord with Posix regular expression rules.
2207 Set point to the beginning of the match, and return point.
2208 The match found is the one starting last in the buffer
2209 and yet ending before the origin of the search.
2210 An optional second argument bounds the search; it is a buffer position.
2211 The match found must start at or after that position.
2212 Optional third argument, if t, means if fail just return nil (no error).
2213   If not nil and not t, move to limit of search and return nil.
2214 Optional fourth argument is repeat count--search for successive occurrences.
2215 See also the functions `match-beginning', `match-end', `match-string',
2216 and `replace-match'.  */)
2217      (regexp, bound, noerror, count)
2218      Lisp_Object regexp, bound, noerror, count;
2219 {
2220   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2221 }
2222
2223 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2224        "sPosix search: ",
2225        doc: /* Search forward from point for regular expression REGEXP.
2226 Find the longest match in accord with Posix regular expression rules.
2227 Set point to the end of the occurrence found, and return point.
2228 An optional second argument bounds the search; it is a buffer position.
2229 The match found must not extend after that position.
2230 Optional third argument, if t, means if fail just return nil (no error).
2231   If not nil and not t, move to limit of search and return nil.
2232 Optional fourth argument is repeat count--search for successive occurrences.
2233 See also the functions `match-beginning', `match-end', `match-string',
2234 and `replace-match'.  */)
2235      (regexp, bound, noerror, count)
2236      Lisp_Object regexp, bound, noerror, count;
2237 {
2238   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2239 }
2240 \f
2241 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2242        doc: /* Replace text matched by last search with NEWTEXT.
2243 Leave point at the end of the replacement text.
2244
2245 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2246 Otherwise maybe capitalize the whole text, or maybe just word initials,
2247 based on the replaced text.
2248 If the replaced text has only capital letters
2249 and has at least one multiletter word, convert NEWTEXT to all caps.
2250 Otherwise if all words are capitalized in the replaced text,
2251 capitalize each word in NEWTEXT.
2252
2253 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2254 Otherwise treat `\\' as special:
2255   `\\&' in NEWTEXT means substitute original matched text.
2256   `\\N' means substitute what matched the Nth `\\(...\\)'.
2257        If Nth parens didn't match, substitute nothing.
2258   `\\\\' means insert one `\\'.
2259 Case conversion does not apply to these substitutions.
2260
2261 FIXEDCASE and LITERAL are optional arguments.
2262
2263 The optional fourth argument STRING can be a string to modify.
2264 This is meaningful when the previous match was done against STRING,
2265 using `string-match'.  When used this way, `replace-match'
2266 creates and returns a new string made by copying STRING and replacing
2267 the part of STRING that was matched.
2268
2269 The optional fifth argument SUBEXP specifies a subexpression;
2270 it says to replace just that subexpression with NEWTEXT,
2271 rather than replacing the entire matched text.
2272 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2273 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2274 NEWTEXT in place of subexp N.
2275 This is useful only after a regular expression search or match,
2276 since only regular expressions have distinguished subexpressions.  */)
2277      (newtext, fixedcase, literal, string, subexp)
2278      Lisp_Object newtext, fixedcase, literal, string, subexp;
2279 {
2280   enum { nochange, all_caps, cap_initial } case_action;
2281   register int pos, pos_byte;
2282   int some_multiletter_word;
2283   int some_lowercase;
2284   int some_uppercase;
2285   int some_nonuppercase_initial;
2286   register int c, prevc;
2287   int sub;
2288   int opoint, newpoint;
2289
2290   CHECK_STRING (newtext);
2291
2292   if (! NILP (string))
2293     CHECK_STRING (string);
2294
2295   case_action = nochange;       /* We tried an initialization */
2296                                 /* but some C compilers blew it */
2297
2298   if (search_regs.num_regs <= 0)
2299     error ("`replace-match' called before any match found");
2300
2301   if (NILP (subexp))
2302     sub = 0;
2303   else
2304     {
2305       CHECK_NUMBER (subexp);
2306       sub = XINT (subexp);
2307       if (sub < 0 || sub >= search_regs.num_regs)
2308         args_out_of_range (subexp, make_number (search_regs.num_regs));
2309     }
2310
2311   if (NILP (string))
2312     {
2313       if (search_regs.start[sub] < BEGV
2314           || search_regs.start[sub] > search_regs.end[sub]
2315           || search_regs.end[sub] > ZV)
2316         args_out_of_range (make_number (search_regs.start[sub]),
2317                            make_number (search_regs.end[sub]));
2318     }
2319   else
2320     {
2321       if (search_regs.start[sub] < 0
2322           || search_regs.start[sub] > search_regs.end[sub]
2323           || search_regs.end[sub] > SCHARS (string))
2324         args_out_of_range (make_number (search_regs.start[sub]),
2325                            make_number (search_regs.end[sub]));
2326     }
2327
2328   if (NILP (fixedcase))
2329     {
2330       /* Decide how to casify by examining the matched text. */
2331       int last;
2332
2333       pos = search_regs.start[sub];
2334       last = search_regs.end[sub];
2335
2336       if (NILP (string))
2337         pos_byte = CHAR_TO_BYTE (pos);
2338       else
2339         pos_byte = string_char_to_byte (string, pos);
2340
2341       prevc = '\n';
2342       case_action = all_caps;
2343
2344       /* some_multiletter_word is set nonzero if any original word
2345          is more than one letter long. */
2346       some_multiletter_word = 0;
2347       some_lowercase = 0;
2348       some_nonuppercase_initial = 0;
2349       some_uppercase = 0;
2350
2351       while (pos < last)
2352         {
2353           if (NILP (string))
2354             {
2355               c = FETCH_CHAR (pos_byte);
2356               INC_BOTH (pos, pos_byte);
2357             }
2358           else
2359             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2360
2361           if (LOWERCASEP (c))
2362             {
2363               /* Cannot be all caps if any original char is lower case */
2364
2365               some_lowercase = 1;
2366               if (SYNTAX (prevc) != Sword)
2367                 some_nonuppercase_initial = 1;
2368               else
2369                 some_multiletter_word = 1;
2370             }
2371           else if (!NOCASEP (c))
2372             {
2373               some_uppercase = 1;
2374               if (SYNTAX (prevc) != Sword)
2375                 ;
2376               else
2377                 some_multiletter_word = 1;
2378             }
2379           else
2380             {
2381               /* If the initial is a caseless word constituent,
2382                  treat that like a lowercase initial.  */
2383               if (SYNTAX (prevc) != Sword)
2384                 some_nonuppercase_initial = 1;
2385             }
2386
2387           prevc = c;
2388         }
2389
2390       /* Convert to all caps if the old text is all caps
2391          and has at least one multiletter word.  */
2392       if (! some_lowercase && some_multiletter_word)
2393         case_action = all_caps;
2394       /* Capitalize each word, if the old text has all capitalized words.  */
2395       else if (!some_nonuppercase_initial && some_multiletter_word)
2396         case_action = cap_initial;
2397       else if (!some_nonuppercase_initial && some_uppercase)
2398         /* Should x -> yz, operating on X, give Yz or YZ?
2399            We'll assume the latter.  */
2400         case_action = all_caps;
2401       else
2402         case_action = nochange;
2403     }
2404
2405   /* Do replacement in a string.  */
2406   if (!NILP (string))
2407     {
2408       Lisp_Object before, after;
2409
2410       before = Fsubstring (string, make_number (0),
2411                            make_number (search_regs.start[sub]));
2412       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2413
2414       /* Substitute parts of the match into NEWTEXT
2415          if desired.  */
2416       if (NILP (literal))
2417         {
2418           int lastpos = 0;
2419           int lastpos_byte = 0;
2420           /* We build up the substituted string in ACCUM.  */
2421           Lisp_Object accum;
2422           Lisp_Object middle;
2423           int length = SBYTES (newtext);
2424
2425           accum = Qnil;
2426
2427           for (pos_byte = 0, pos = 0; pos_byte < length;)
2428             {
2429               int substart = -1;
2430               int subend = 0;
2431               int delbackslash = 0;
2432
2433               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2434
2435               if (c == '\\')
2436                 {
2437                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2438
2439                   if (c == '&')
2440                     {
2441                       substart = search_regs.start[sub];
2442                       subend = search_regs.end[sub];
2443                     }
2444                   else if (c >= '1' && c <= '9')
2445                     {
2446                       if (search_regs.start[c - '0'] >= 0
2447                           && c <= search_regs.num_regs + '0')
2448                         {
2449                           substart = search_regs.start[c - '0'];
2450                           subend = search_regs.end[c - '0'];
2451                         }
2452                       else
2453                         {
2454                           /* If that subexp did not match,
2455                              replace \\N with nothing.  */
2456                           substart = 0;
2457                           subend = 0;
2458                         }
2459                     }
2460                   else if (c == '\\')
2461                     delbackslash = 1;
2462                   else
2463                     error ("Invalid use of `\\' in replacement text");
2464                 }
2465               if (substart >= 0)
2466                 {
2467                   if (pos - 2 != lastpos)
2468                     middle = substring_both (newtext, lastpos,
2469                                              lastpos_byte,
2470                                              pos - 2, pos_byte - 2);
2471                   else
2472                     middle = Qnil;
2473                   accum = concat3 (accum, middle,
2474                                    Fsubstring (string,
2475                                                make_number (substart),
2476                                                make_number (subend)));
2477                   lastpos = pos;
2478                   lastpos_byte = pos_byte;
2479                 }
2480               else if (delbackslash)
2481                 {
2482                   middle = substring_both (newtext, lastpos,
2483                                            lastpos_byte,
2484                                            pos - 1, pos_byte - 1);
2485
2486                   accum = concat2 (accum, middle);
2487                   lastpos = pos;
2488                   lastpos_byte = pos_byte;
2489                 }
2490             }
2491
2492           if (pos != lastpos)
2493             middle = substring_both (newtext, lastpos,
2494                                      lastpos_byte,
2495                                      pos, pos_byte);
2496           else
2497             middle = Qnil;
2498
2499           newtext = concat2 (accum, middle);
2500         }
2501
2502       /* Do case substitution in NEWTEXT if desired.  */
2503       if (case_action == all_caps)
2504         newtext = Fupcase (newtext);
2505       else if (case_action == cap_initial)
2506         newtext = Fupcase_initials (newtext);
2507
2508       return concat3 (before, newtext, after);
2509     }
2510
2511   /* Record point, then move (quietly) to the start of the match.  */
2512   if (PT >= search_regs.end[sub])
2513     opoint = PT - ZV;
2514   else if (PT > search_regs.start[sub])
2515     opoint = search_regs.end[sub] - ZV;
2516   else
2517     opoint = PT;
2518
2519   /* If we want non-literal replacement,
2520      perform substitution on the replacement string.  */
2521   if (NILP (literal))
2522     {
2523       int length = SBYTES (newtext);
2524       unsigned char *substed;
2525       int substed_alloc_size, substed_len;
2526       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2527       int str_multibyte = STRING_MULTIBYTE (newtext);
2528       Lisp_Object rev_tbl;
2529       int really_changed = 0;
2530
2531       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2532                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2533                                           make_number (0))
2534                 : Qnil);
2535
2536       substed_alloc_size = length * 2 + 100;
2537       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2538       substed_len = 0;
2539
2540       /* Go thru NEWTEXT, producing the actual text to insert in
2541          SUBSTED while adjusting multibyteness to that of the current
2542          buffer.  */
2543
2544       for (pos_byte = 0, pos = 0; pos_byte < length;)
2545         {
2546           unsigned char str[MAX_MULTIBYTE_LENGTH];
2547           unsigned char *add_stuff = NULL;
2548           int add_len = 0;
2549           int idx = -1;
2550
2551           if (str_multibyte)
2552             {
2553               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2554               if (!buf_multibyte)
2555                 c = multibyte_char_to_unibyte (c, rev_tbl);
2556             }
2557           else
2558             {
2559               /* Note that we don't have to increment POS.  */
2560               c = SREF (newtext, pos_byte++);
2561               if (buf_multibyte)
2562                 c = unibyte_char_to_multibyte (c);
2563             }
2564
2565           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2566              or set IDX to a match index, which means put that part
2567              of the buffer text into SUBSTED.  */
2568
2569           if (c == '\\')
2570             {
2571               really_changed = 1;
2572
2573               if (str_multibyte)
2574                 {
2575                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2576                                                       pos, pos_byte);
2577                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2578                     c = multibyte_char_to_unibyte (c, rev_tbl);
2579                 }
2580               else
2581                 {
2582                   c = SREF (newtext, pos_byte++);
2583                   if (buf_multibyte)
2584                     c = unibyte_char_to_multibyte (c);
2585                 }
2586
2587               if (c == '&')
2588                 idx = sub;
2589               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2590                 {
2591                   if (search_regs.start[c - '0'] >= 1)
2592                     idx = c - '0';
2593                 }
2594               else if (c == '\\')
2595                 add_len = 1, add_stuff = "\\";
2596               else
2597                 {
2598                   xfree (substed);
2599                   error ("Invalid use of `\\' in replacement text");
2600                 }
2601             }
2602           else
2603             {
2604               add_len = CHAR_STRING (c, str);
2605               add_stuff = str;
2606             }
2607
2608           /* If we want to copy part of a previous match,
2609              set up ADD_STUFF and ADD_LEN to point to it.  */
2610           if (idx >= 0)
2611             {
2612               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2613               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2614               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2615                 move_gap (search_regs.start[idx]);
2616               add_stuff = BYTE_POS_ADDR (begbyte);
2617             }
2618
2619           /* Now the stuff we want to add to SUBSTED
2620              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2621
2622           /* Make sure SUBSTED is big enough.  */
2623           if (substed_len + add_len >= substed_alloc_size)
2624             {
2625               substed_alloc_size = substed_len + add_len + 500;
2626               substed = (unsigned char *) xrealloc (substed,
2627                                                     substed_alloc_size + 1);
2628             }
2629
2630           /* Now add to the end of SUBSTED.  */
2631           if (add_stuff)
2632             {
2633               bcopy (add_stuff, substed + substed_len, add_len);
2634               substed_len += add_len;
2635             }
2636         }
2637
2638       if (really_changed)
2639         {
2640           if (buf_multibyte)
2641             {
2642               int nchars = multibyte_chars_in_text (substed, substed_len);
2643
2644               newtext = make_multibyte_string (substed, nchars, substed_len);
2645             }
2646           else
2647             newtext = make_unibyte_string (substed, substed_len);
2648         }
2649       xfree (substed);
2650     }
2651
2652   /* Replace the old text with the new in the cleanest possible way.  */
2653   replace_range (search_regs.start[sub], search_regs.end[sub],
2654                  newtext, 1, 0, 1);
2655   newpoint = search_regs.start[sub] + SCHARS (newtext);
2656
2657   if (case_action == all_caps)
2658     Fupcase_region (make_number (search_regs.start[sub]),
2659                     make_number (newpoint));
2660   else if (case_action == cap_initial)
2661     Fupcase_initials_region (make_number (search_regs.start[sub]),
2662                              make_number (newpoint));
2663
2664   /* Adjust search data for this change.  */
2665   {
2666     int oldend = search_regs.end[sub];
2667     int oldstart = search_regs.start[sub];
2668     int change = newpoint - search_regs.end[sub];
2669     int i;
2670
2671     for (i = 0; i < search_regs.num_regs; i++)
2672       {
2673         if (search_regs.start[i] >= oldend)
2674           search_regs.start[i] += change;
2675         else if (search_regs.start[i] > oldstart)
2676           search_regs.start[i] = oldstart;
2677         if (search_regs.end[i] >= oldend)
2678           search_regs.end[i] += change;
2679         else if (search_regs.end[i] > oldstart)
2680           search_regs.end[i] = oldstart;
2681       }
2682   }
2683
2684   /* Put point back where it was in the text.  */
2685   if (opoint <= 0)
2686     TEMP_SET_PT (opoint + ZV);
2687   else
2688     TEMP_SET_PT (opoint);
2689
2690   /* Now move point "officially" to the start of the inserted replacement.  */
2691   move_if_not_intangible (newpoint);
2692
2693   return Qnil;
2694 }
2695 \f
2696 static Lisp_Object
2697 match_limit (num, beginningp)
2698      Lisp_Object num;
2699      int beginningp;
2700 {
2701   register int n;
2702
2703   CHECK_NUMBER (num);
2704   n = XINT (num);
2705   if (n < 0)
2706     args_out_of_range (num, make_number (0));
2707   if (search_regs.num_regs <= 0)
2708     error ("No match data, because no search succeeded");
2709   if (n >= search_regs.num_regs
2710       || search_regs.start[n] < 0)
2711     return Qnil;
2712   return (make_number ((beginningp) ? search_regs.start[n]
2713                                     : search_regs.end[n]));
2714 }
2715
2716 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2717        doc: /* Return position of start of text matched by last search.
2718 SUBEXP, a number, specifies which parenthesized expression in the last
2719   regexp.
2720 Value is nil if SUBEXPth pair didn't match, or there were less than
2721   SUBEXP pairs.
2722 Zero means the entire text matched by the whole regexp or whole string.  */)
2723      (subexp)
2724      Lisp_Object subexp;
2725 {
2726   return match_limit (subexp, 1);
2727 }
2728
2729 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2730        doc: /* Return position of end of text matched by last search.
2731 SUBEXP, a number, specifies which parenthesized expression in the last
2732   regexp.
2733 Value is nil if SUBEXPth pair didn't match, or there were less than
2734   SUBEXP pairs.
2735 Zero means the entire text matched by the whole regexp or whole string.  */)
2736      (subexp)
2737      Lisp_Object subexp;
2738 {
2739   return match_limit (subexp, 0);
2740 }
2741
2742 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2743        doc: /* Return a list containing all info on what the last search matched.
2744 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2745 All the elements are markers or nil (nil if the Nth pair didn't match)
2746 if the last match was on a buffer; integers or nil if a string was matched.
2747 Use `store-match-data' to reinstate the data in this list.
2748
2749 If INTEGERS (the optional first argument) is non-nil, always use
2750 integers \(rather than markers) to represent buffer positions.  In
2751 this case, and if the last match was in a buffer, the buffer will get
2752 stored as one additional element at the end of the list.
2753
2754 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2755 enough to hold all the values, and if INTEGERS is non-nil, no consing
2756 is done.
2757
2758 If optional third arg RESEAT is non-nil, any previous markers on the
2759 REUSE list will be modified to point to nowhere.
2760
2761 Return value is undefined if the last search failed.  */)
2762   (integers, reuse, reseat)
2763      Lisp_Object integers, reuse, reseat;
2764 {
2765   Lisp_Object tail, prev;
2766   Lisp_Object *data;
2767   int i, len;
2768
2769   if (!NILP (reseat))
2770     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2771       if (MARKERP (XCAR (tail)))
2772         {
2773           unchain_marker (XMARKER (XCAR (tail)));
2774           XSETCAR (tail, Qnil);
2775         }
2776
2777   if (NILP (last_thing_searched))
2778     return Qnil;
2779
2780   prev = Qnil;
2781
2782   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2783                                  * sizeof (Lisp_Object));
2784
2785   len = 0;
2786   for (i = 0; i < search_regs.num_regs; i++)
2787     {
2788       int start = search_regs.start[i];
2789       if (start >= 0)
2790         {
2791           if (EQ (last_thing_searched, Qt)
2792               || ! NILP (integers))
2793             {
2794               XSETFASTINT (data[2 * i], start);
2795               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2796             }
2797           else if (BUFFERP (last_thing_searched))
2798             {
2799               data[2 * i] = Fmake_marker ();
2800               Fset_marker (data[2 * i],
2801                            make_number (start),
2802                            last_thing_searched);
2803               data[2 * i + 1] = Fmake_marker ();
2804               Fset_marker (data[2 * i + 1],
2805                            make_number (search_regs.end[i]),
2806                            last_thing_searched);
2807             }
2808           else
2809             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2810             abort ();
2811
2812           len = 2 * i + 2;
2813         }
2814       else
2815         data[2 * i] = data[2 * i + 1] = Qnil;
2816     }
2817
2818   if (BUFFERP (last_thing_searched) && !NILP (integers))
2819     {
2820       data[len] = last_thing_searched;
2821       len++;
2822     }
2823
2824   /* If REUSE is not usable, cons up the values and return them.  */
2825   if (! CONSP (reuse))
2826     return Flist (len, data);
2827
2828   /* If REUSE is a list, store as many value elements as will fit
2829      into the elements of REUSE.  */
2830   for (i = 0, tail = reuse; CONSP (tail);
2831        i++, tail = XCDR (tail))
2832     {
2833       if (i < len)
2834         XSETCAR (tail, data[i]);
2835       else
2836         XSETCAR (tail, Qnil);
2837       prev = tail;
2838     }
2839
2840   /* If we couldn't fit all value elements into REUSE,
2841      cons up the rest of them and add them to the end of REUSE.  */
2842   if (i < len)
2843     XSETCDR (prev, Flist (len - i, data + i));
2844
2845   return reuse;
2846 }
2847
2848 /* Internal usage only:
2849    If RESEAT is `evaporate', put the markers back on the free list
2850    immediately.  No other references to the markers must exist in this case,
2851    so it is used only internally on the unwind stack and save-match-data from
2852    Lisp.  */
2853
2854 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2855        doc: /* Set internal data on last search match from elements of LIST.
2856 LIST should have been created by calling `match-data' previously.
2857
2858 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2859     (list, reseat)
2860      register Lisp_Object list, reseat;
2861 {
2862   register int i;
2863   register Lisp_Object marker;
2864
2865   if (running_asynch_code)
2866     save_search_regs ();
2867
2868   if (!CONSP (list) && !NILP (list))
2869     list = wrong_type_argument (Qconsp, list);
2870
2871   /* Unless we find a marker with a buffer or an explicit buffer
2872      in LIST, assume that this match data came from a string.  */
2873   last_thing_searched = Qt;
2874
2875   /* Allocate registers if they don't already exist.  */
2876   {
2877     int length = XFASTINT (Flength (list)) / 2;
2878
2879     if (length > search_regs.num_regs)
2880       {
2881         if (search_regs.num_regs == 0)
2882           {
2883             search_regs.start
2884               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2885             search_regs.end
2886               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2887           }
2888         else
2889           {
2890             search_regs.start
2891               = (regoff_t *) xrealloc (search_regs.start,
2892                                        length * sizeof (regoff_t));
2893             search_regs.end
2894               = (regoff_t *) xrealloc (search_regs.end,
2895                                        length * sizeof (regoff_t));
2896           }
2897
2898         for (i = search_regs.num_regs; i < length; i++)
2899           search_regs.start[i] = -1;
2900
2901         search_regs.num_regs = length;
2902       }
2903
2904     for (i = 0; CONSP (list); i++)
2905       {
2906         marker = XCAR (list);
2907         if (BUFFERP (marker))
2908           {
2909             last_thing_searched = marker;
2910             break;
2911           }
2912         if (i >= length)
2913           break;
2914         if (NILP (marker))
2915           {
2916             search_regs.start[i] = -1;
2917             list = XCDR (list);
2918           }
2919         else
2920           {
2921             int from;
2922             Lisp_Object m;
2923
2924             m = marker;
2925             if (MARKERP (marker))
2926               {
2927                 if (XMARKER (marker)->buffer == 0)
2928                   XSETFASTINT (marker, 0);
2929                 else
2930                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2931               }
2932
2933             CHECK_NUMBER_COERCE_MARKER (marker);
2934             from = XINT (marker);
2935
2936             if (!NILP (reseat) && MARKERP (m))
2937               {
2938                 if (EQ (reseat, Qevaporate))
2939                   free_marker (m);
2940                 else
2941                   unchain_marker (XMARKER (m));
2942                 XSETCAR (list, Qnil);
2943               }
2944
2945             if ((list = XCDR (list), !CONSP (list)))
2946               break;
2947
2948             m = marker = XCAR (list);
2949
2950             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2951               XSETFASTINT (marker, 0);
2952
2953             CHECK_NUMBER_COERCE_MARKER (marker);
2954             search_regs.start[i] = from;
2955             search_regs.end[i] = XINT (marker);
2956
2957             if (!NILP (reseat) && MARKERP (m))
2958               {
2959                 if (EQ (reseat, Qevaporate))
2960                   free_marker (m);
2961                 else
2962                   unchain_marker (XMARKER (m));
2963                 XSETCAR (list, Qnil);
2964               }
2965           }
2966         list = XCDR (list);
2967       }
2968
2969     for (; i < search_regs.num_regs; i++)
2970       search_regs.start[i] = -1;
2971   }
2972
2973   return Qnil;
2974 }
2975
2976 /* If non-zero the match data have been saved in saved_search_regs
2977    during the execution of a sentinel or filter. */
2978 static int search_regs_saved;
2979 static struct re_registers saved_search_regs;
2980 static Lisp_Object saved_last_thing_searched;
2981
2982 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2983    if asynchronous code (filter or sentinel) is running. */
2984 static void
2985 save_search_regs ()
2986 {
2987   if (!search_regs_saved)
2988     {
2989       saved_search_regs.num_regs = search_regs.num_regs;
2990       saved_search_regs.start = search_regs.start;
2991       saved_search_regs.end = search_regs.end;
2992       saved_last_thing_searched = last_thing_searched;
2993       last_thing_searched = Qnil;
2994       search_regs.num_regs = 0;
2995       search_regs.start = 0;
2996       search_regs.end = 0;
2997
2998       search_regs_saved = 1;
2999     }
3000 }
3001
3002 /* Called upon exit from filters and sentinels. */
3003 void
3004 restore_search_regs ()
3005 {
3006   if (search_regs_saved)
3007     {
3008       if (search_regs.num_regs > 0)
3009         {
3010           xfree (search_regs.start);
3011           xfree (search_regs.end);
3012         }
3013       search_regs.num_regs = saved_search_regs.num_regs;
3014       search_regs.start = saved_search_regs.start;
3015       search_regs.end = saved_search_regs.end;
3016       last_thing_searched = saved_last_thing_searched;
3017       saved_last_thing_searched = Qnil;
3018       search_regs_saved = 0;
3019     }
3020 }
3021
3022 static Lisp_Object
3023 unwind_set_match_data (list)
3024      Lisp_Object list;
3025 {
3026   /* It is safe to free (evaporate) the markers immediately.  */
3027   return Fset_match_data (list, Qevaporate);
3028 }
3029
3030 /* Called to unwind protect the match data.  */
3031 void
3032 record_unwind_save_match_data ()
3033 {
3034   record_unwind_protect (unwind_set_match_data,
3035                          Fmatch_data (Qnil, Qnil, Qnil));
3036 }
3037
3038 /* Quote a string to inactivate reg-expr chars */
3039
3040 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3041        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3042      (string)
3043      Lisp_Object string;
3044 {
3045   register unsigned char *in, *out, *end;
3046   register unsigned char *temp;
3047   int backslashes_added = 0;
3048
3049   CHECK_STRING (string);
3050
3051   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3052
3053   /* Now copy the data into the new string, inserting escapes. */
3054
3055   in = SDATA (string);
3056   end = in + SBYTES (string);
3057   out = temp;
3058
3059   for (; in != end; in++)
3060     {
3061       if (*in == '[' || *in == ']'
3062           || *in == '*' || *in == '.' || *in == '\\'
3063           || *in == '?' || *in == '+'
3064           || *in == '^' || *in == '$')
3065         *out++ = '\\', backslashes_added++;
3066       *out++ = *in;
3067     }
3068
3069   return make_specified_string (temp,
3070                                 SCHARS (string) + backslashes_added,
3071                                 out - temp,
3072                                 STRING_MULTIBYTE (string));
3073 }
3074 \f
3075 void
3076 syms_of_search ()
3077 {
3078   register int i;
3079
3080   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3081     {
3082       searchbufs[i].buf.allocated = 100;
3083       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3084       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3085       searchbufs[i].regexp = Qnil;
3086       searchbufs[i].whitespace_regexp = Qnil;
3087       staticpro (&searchbufs[i].regexp);
3088       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3089     }
3090   searchbuf_head = &searchbufs[0];
3091
3092   Qsearch_failed = intern ("search-failed");
3093   staticpro (&Qsearch_failed);
3094   Qinvalid_regexp = intern ("invalid-regexp");
3095   staticpro (&Qinvalid_regexp);
3096
3097   Fput (Qsearch_failed, Qerror_conditions,
3098         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3099   Fput (Qsearch_failed, Qerror_message,
3100         build_string ("Search failed"));
3101
3102   Fput (Qinvalid_regexp, Qerror_conditions,
3103         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3104   Fput (Qinvalid_regexp, Qerror_message,
3105         build_string ("Invalid regexp"));
3106
3107   last_thing_searched = Qnil;
3108   staticpro (&last_thing_searched);
3109
3110   saved_last_thing_searched = Qnil;
3111   staticpro (&saved_last_thing_searched);
3112
3113   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3114       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3115 Some commands use this for user-specified regexps.
3116 Spaces that occur inside character classes or repetition operators
3117 or other such regexp constructs are not replaced with this.
3118 A value of nil (which is the normal value) means treat spaces literally.  */);
3119   Vsearch_spaces_regexp = Qnil;
3120
3121   defsubr (&Slooking_at);
3122   defsubr (&Sposix_looking_at);
3123   defsubr (&Sstring_match);
3124   defsubr (&Sposix_string_match);
3125   defsubr (&Ssearch_forward);
3126   defsubr (&Ssearch_backward);
3127   defsubr (&Sword_search_forward);
3128   defsubr (&Sword_search_backward);
3129   defsubr (&Sre_search_forward);
3130   defsubr (&Sre_search_backward);
3131   defsubr (&Sposix_search_forward);
3132   defsubr (&Sposix_search_backward);
3133   defsubr (&Sreplace_match);
3134   defsubr (&Smatch_beginning);
3135   defsubr (&Smatch_end);
3136   defsubr (&Smatch_data);
3137   defsubr (&Sset_match_data);
3138   defsubr (&Sregexp_quote);
3139 }
3140
3141 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3142    (do not change this comment) */