src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007, 2008, 2009
   4                  Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "character.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   /* Syntax table for which the regexp applies.  We need this because
  46      of character classes.  If this is t, then the compiled pattern is valid
  47      for any syntax-table.  */
  48   Lisp_Object syntax_table;
  49   struct re_pattern_buffer buf;
  50   char fastmap[0400];
  51   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  52   char posix;
  53 };
  54
  55 /* The instances of that struct.  */
  56 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  57
  58 /* The head of the linked list; points to the most recently used buffer.  */
  59 struct regexp_cache *searchbuf_head;
  60
  61
  62 /* Every call to re_match, etc., must pass &search_regs as the regs
  63    argument unless you can show it is unnecessary (i.e., if re_match
  64    is certainly going to be called again before region-around-match
  65    can be called).
  66
  67    Since the registers are now dynamically allocated, we need to make
  68    sure not to refer to the Nth register before checking that it has
  69    been allocated by checking search_regs.num_regs.
  70
  71    The regex code keeps track of whether it has allocated the search
  72    buffer using bits in the re_pattern_buffer.  This means that whenever
  73    you compile a new pattern, it completely forgets whether it has
  74    allocated any registers, and will allocate new registers the next
  75    time you call a searching or matching function.  Therefore, we need
  76    to call re_set_registers after compiling a new pattern or after
  77    setting the match registers, so that the regex functions will be
  78    able to free or re-allocate it properly.  */
  79 static struct re_registers search_regs;
  80
  81 /* The buffer in which the last search was performed, or
  82    Qt if the last search was done in a string;
  83    Qnil if no searching has been done yet.  */
  84 static Lisp_Object last_thing_searched;
  85
  86 /* error condition signaled when regexp compile_pattern fails */
  87
  88 Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches */
  91 Lisp_Object Qsearch_failed;
  92
  93 Lisp_Object Vsearch_spaces_regexp;
  94
  95 /* If non-nil, the match data will not be changed during call to
  96    searching or matching functions.  This variable is for internal use
  97    only.  */
  98 Lisp_Object Vinhibit_changing_match_data;
  99
 100 static void set_search_regs ();
 101 static void save_search_regs ();
 102 static int simple_search ();
 103 static int boyer_moore ();
 104 static int search_buffer ();
 105 static void matcher_overflow () NO_RETURN;
 106
 107 static void
 108 matcher_overflow ()
 109 {
 110   error ("Stack overflow in regexp matcher");
 111 }
 112
 113 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 114    PATTERN is the pattern to compile.
 115    CP is the place to put the result.
 116    TRANSLATE is a translation table for ignoring case, or nil for none.
 117    REGP is the structure that says where to store the "register"
 118    values that will result from matching this pattern.
 119    If it is 0, we should compile the pattern not to record any
 120    subexpression bounds.
 121    POSIX is nonzero if we want full backtracking (POSIX style)
 122    for this pattern.  0 means backtrack only enough to get a valid match.
 123
 124    The behavior also depends on Vsearch_spaces_regexp.  */
 125
 126 static void
 127 compile_pattern_1 (cp, pattern, translate, regp, posix)
 128      struct regexp_cache *cp;
 129      Lisp_Object pattern;
 130      Lisp_Object translate;
 131      struct re_registers *regp;
 132      int posix;
 133 {
 134   char *val;
 135   reg_syntax_t old;
 136
 137   cp->regexp = Qnil;
 138   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 139   cp->posix = posix;
 140   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 141   cp->buf.charset_unibyte = charset_unibyte;
 142   if (STRINGP (Vsearch_spaces_regexp))
 143     cp->whitespace_regexp = Vsearch_spaces_regexp;
 144   else
 145     cp->whitespace_regexp = Qnil;
 146
 147   /* rms: I think BLOCK_INPUT is not needed here any more,
 148      because regex.c defines malloc to call xmalloc.
 149      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 150      So let's turn it off.  */
 151   /*  BLOCK_INPUT;  */
 152   old = re_set_syntax (RE_SYNTAX_EMACS
 153                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 154
 155   if (STRINGP (Vsearch_spaces_regexp))
 156     re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
 157   else
 158     re_set_whitespace_regexp (NULL);
 159
 160   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 161                                      SBYTES (pattern), &cp->buf);
 162
 163   /* If the compiled pattern hard codes some of the contents of the
 164      syntax-table, it can only be reused with *this* syntax table.  */
 165   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 166
 167   re_set_whitespace_regexp (NULL);
 168
 169   re_set_syntax (old);
 170   /* UNBLOCK_INPUT;  */
 171   if (val)
 172     xsignal1 (Qinvalid_regexp, build_string (val));
 173
 174   cp->regexp = Fcopy_sequence (pattern);
 175 }
 176
 177 /* Shrink each compiled regexp buffer in the cache
 178    to the size actually used right now.
 179    This is called from garbage collection.  */
 180
 181 void
 182 shrink_regexp_cache ()
 183 {
 184   struct regexp_cache *cp;
 185
 186   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 187     {
 188       cp->buf.allocated = cp->buf.used;
 189       cp->buf.buffer
 190         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 191     }
 192 }
 193
 194 /* Clear the regexp cache w.r.t. a particular syntax table,
 195    because it was changed.
 196    There is no danger of memory leak here because re_compile_pattern
 197    automagically manages the memory in each re_pattern_buffer struct,
 198    based on its `allocated' and `buffer' values.  */
 199 void
 200 clear_regexp_cache ()
 201 {
 202   int i;
 203
 204   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 205     /* It's tempting to compare with the syntax-table we've actually changd,
 206        but it's not sufficient because char-table inheritance mewans that
 207        modifying one syntax-table can change others at the same time.  */
 208     if (!EQ (searchbufs[i].syntax_table, Qt))
 209       searchbufs[i].regexp = Qnil;
 210 }
 211
 212 /* Compile a regexp if necessary, but first check to see if there's one in
 213    the cache.
 214    PATTERN is the pattern to compile.
 215    TRANSLATE is a translation table for ignoring case, or nil for none.
 216    REGP is the structure that says where to store the "register"
 217    values that will result from matching this pattern.
 218    If it is 0, we should compile the pattern not to record any
 219    subexpression bounds.
 220    POSIX is nonzero if we want full backtracking (POSIX style)
 221    for this pattern.  0 means backtrack only enough to get a valid match.  */
 222
 223 struct re_pattern_buffer *
 224 compile_pattern (pattern, regp, translate, posix, multibyte)
 225      Lisp_Object pattern;
 226      struct re_registers *regp;
 227      Lisp_Object translate;
 228      int posix, multibyte;
 229 {
 230   struct regexp_cache *cp, **cpp;
 231
 232   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 233     {
 234       cp = *cpp;
 235       /* Entries are initialized to nil, and may be set to nil by
 236          compile_pattern_1 if the pattern isn't valid.  Don't apply
 237          string accessors in those cases.  However, compile_pattern_1
 238          is only applied to the cache entry we pick here to reuse.  So
 239          nil should never appear before a non-nil entry.  */
 240       if (NILP (cp->regexp))
 241         goto compile_it;
 242       if (SCHARS (cp->regexp) == SCHARS (pattern)
 243           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 244           && !NILP (Fstring_equal (cp->regexp, pattern))
 245           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 246           && cp->posix == posix
 247           && (EQ (cp->syntax_table, Qt)
 248               || EQ (cp->syntax_table, current_buffer->syntax_table))
 249           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 250           && cp->buf.charset_unibyte == charset_unibyte)
 251         break;
 252
 253       /* If we're at the end of the cache, compile into the nil cell
 254          we found, or the last (least recently used) cell with a
 255          string value.  */
 256       if (cp->next == 0)
 257         {
 258         compile_it:
 259           compile_pattern_1 (cp, pattern, translate, regp, posix);
 260           break;
 261         }
 262     }
 263
 264   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 265      either because we found it in the cache or because we just compiled it.
 266      Move it to the front of the queue to mark it as most recently used.  */
 267   *cpp = cp->next;
 268   cp->next = searchbuf_head;
 269   searchbuf_head = cp;
 270
 271   /* Advise the searching functions about the space we have allocated
 272      for register data.  */
 273   if (regp)
 274     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 275
 276   /* The compiled pattern can be used both for mulitbyte and unibyte
 277      target.  But, we have to tell which the pattern is used for. */
 278   cp->buf.target_multibyte = multibyte;
 279
 280   return &cp->buf;
 281 }
 282
 283 \f
 284 static Lisp_Object
 285 looking_at_1 (string, posix)
 286      Lisp_Object string;
 287      int posix;
 288 {
 289   Lisp_Object val;
 290   unsigned char *p1, *p2;
 291   int s1, s2;
 292   register int i;
 293   struct re_pattern_buffer *bufp;
 294
 295   if (running_asynch_code)
 296     save_search_regs ();
 297
 298   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 299   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 300     = current_buffer->case_eqv_table;
 301
 302   CHECK_STRING (string);
 303   bufp = compile_pattern (string,
 304                           (NILP (Vinhibit_changing_match_data)
 305                            ? &search_regs : NULL),
 306                           (!NILP (current_buffer->case_fold_search)
 307                            ? current_buffer->case_canon_table : Qnil),
 308                           posix,
 309                           !NILP (current_buffer->enable_multibyte_characters));
 310
 311   immediate_quit = 1;
 312   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 313
 314   /* Get pointers and sizes of the two strings
 315      that make up the visible portion of the buffer. */
 316
 317   p1 = BEGV_ADDR;
 318   s1 = GPT_BYTE - BEGV_BYTE;
 319   p2 = GAP_END_ADDR;
 320   s2 = ZV_BYTE - GPT_BYTE;
 321   if (s1 < 0)
 322     {
 323       p2 = p1;
 324       s2 = ZV_BYTE - BEGV_BYTE;
 325       s1 = 0;
 326     }
 327   if (s2 < 0)
 328     {
 329       s1 = ZV_BYTE - BEGV_BYTE;
 330       s2 = 0;
 331     }
 332
 333   re_match_object = Qnil;
 334
 335   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 336                   PT_BYTE - BEGV_BYTE,
 337                   (NILP (Vinhibit_changing_match_data)
 338                    ? &search_regs : NULL),
 339                   ZV_BYTE - BEGV_BYTE);
 340   immediate_quit = 0;
 341
 342   if (i == -2)
 343     matcher_overflow ();
 344
 345   val = (0 <= i ? Qt : Qnil);
 346   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 347     for (i = 0; i < search_regs.num_regs; i++)
 348       if (search_regs.start[i] >= 0)
 349         {
 350           search_regs.start[i]
 351             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 352           search_regs.end[i]
 353             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 354         }
 355
 356   /* Set last_thing_searched only when match data is changed.  */
 357   if (NILP (Vinhibit_changing_match_data))
 358     XSETBUFFER (last_thing_searched, current_buffer);
 359
 360   return val;
 361 }
 362
 363 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 364        doc: /* Return t if text after point matches regular expression REGEXP.
 365 This function modifies the match data that `match-beginning',
 366 `match-end' and `match-data' access; save and restore the match
 367 data if you want to preserve them.  */)
 368      (regexp)
 369      Lisp_Object regexp;
 370 {
 371   return looking_at_1 (regexp, 0);
 372 }
 373
 374 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 375        doc: /* Return t if text after point matches regular expression REGEXP.
 376 Find the longest match, in accord with Posix regular expression rules.
 377 This function modifies the match data that `match-beginning',
 378 `match-end' and `match-data' access; save and restore the match
 379 data if you want to preserve them.  */)
 380      (regexp)
 381      Lisp_Object regexp;
 382 {
 383   return looking_at_1 (regexp, 1);
 384 }
 385 \f
 386 static Lisp_Object
 387 string_match_1 (regexp, string, start, posix)
 388      Lisp_Object regexp, string, start;
 389      int posix;
 390 {
 391   int val;
 392   struct re_pattern_buffer *bufp;
 393   int pos, pos_byte;
 394   int i;
 395
 396   if (running_asynch_code)
 397     save_search_regs ();
 398
 399   CHECK_STRING (regexp);
 400   CHECK_STRING (string);
 401
 402   if (NILP (start))
 403     pos = 0, pos_byte = 0;
 404   else
 405     {
 406       int len = SCHARS (string);
 407
 408       CHECK_NUMBER (start);
 409       pos = XINT (start);
 410       if (pos < 0 && -pos <= len)
 411         pos = len + pos;
 412       else if (0 > pos || pos > len)
 413         args_out_of_range (string, start);
 414       pos_byte = string_char_to_byte (string, pos);
 415     }
 416
 417   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 418   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 419     = current_buffer->case_eqv_table;
 420
 421   bufp = compile_pattern (regexp,
 422                           (NILP (Vinhibit_changing_match_data)
 423                            ? &search_regs : NULL),
 424                           (!NILP (current_buffer->case_fold_search)
 425                            ? current_buffer->case_canon_table : Qnil),
 426                           posix,
 427                           STRING_MULTIBYTE (string));
 428   immediate_quit = 1;
 429   re_match_object = string;
 430
 431   val = re_search (bufp, (char *) SDATA (string),
 432                    SBYTES (string), pos_byte,
 433                    SBYTES (string) - pos_byte,
 434                    (NILP (Vinhibit_changing_match_data)
 435                     ? &search_regs : NULL));
 436   immediate_quit = 0;
 437
 438   /* Set last_thing_searched only when match data is changed.  */
 439   if (NILP (Vinhibit_changing_match_data))
 440     last_thing_searched = Qt;
 441
 442   if (val == -2)
 443     matcher_overflow ();
 444   if (val < 0) return Qnil;
 445
 446   if (NILP (Vinhibit_changing_match_data))
 447     for (i = 0; i < search_regs.num_regs; i++)
 448       if (search_regs.start[i] >= 0)
 449         {
 450           search_regs.start[i]
 451             = string_byte_to_char (string, search_regs.start[i]);
 452           search_regs.end[i]
 453             = string_byte_to_char (string, search_regs.end[i]);
 454         }
 455
 456   return make_number (string_byte_to_char (string, val));
 457 }
 458
 459 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 460        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 461 Matching ignores case if `case-fold-search' is non-nil.
 462 If third arg START is non-nil, start search at that index in STRING.
 463 For index of first char beyond the match, do (match-end 0).
 464 `match-end' and `match-beginning' also give indices of substrings
 465 matched by parenthesis constructs in the pattern.
 466
 467 You can use the function `match-string' to extract the substrings
 468 matched by the parenthesis constructions in REGEXP. */)
 469      (regexp, string, start)
 470      Lisp_Object regexp, string, start;
 471 {
 472   return string_match_1 (regexp, string, start, 0);
 473 }
 474
 475 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 476        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 477 Find the longest match, in accord with Posix regular expression rules.
 478 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 479 If third arg START is non-nil, start search at that index in STRING.
 480 For index of first char beyond the match, do (match-end 0).
 481 `match-end' and `match-beginning' also give indices of substrings
 482 matched by parenthesis constructs in the pattern.  */)
 483      (regexp, string, start)
 484      Lisp_Object regexp, string, start;
 485 {
 486   return string_match_1 (regexp, string, start, 1);
 487 }
 488
 489 /* Match REGEXP against STRING, searching all of STRING,
 490    and return the index of the match, or negative on failure.
 491    This does not clobber the match data.  */
 492
 493 int
 494 fast_string_match (regexp, string)
 495      Lisp_Object regexp, string;
 496 {
 497   int val;
 498   struct re_pattern_buffer *bufp;
 499
 500   bufp = compile_pattern (regexp, 0, Qnil,
 501                           0, STRING_MULTIBYTE (string));
 502   immediate_quit = 1;
 503   re_match_object = string;
 504
 505   val = re_search (bufp, (char *) SDATA (string),
 506                    SBYTES (string), 0,
 507                    SBYTES (string), 0);
 508   immediate_quit = 0;
 509   return val;
 510 }
 511
 512 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 513    and return the index of the match, or negative on failure.
 514    This does not clobber the match data.
 515    We assume that STRING contains single-byte characters.  */
 516
 517 extern Lisp_Object Vascii_downcase_table;
 518
 519 int
 520 fast_c_string_match_ignore_case (regexp, string)
 521      Lisp_Object regexp;
 522      const char *string;
 523 {
 524   int val;
 525   struct re_pattern_buffer *bufp;
 526   int len = strlen (string);
 527
 528   regexp = string_make_unibyte (regexp);
 529   re_match_object = Qt;
 530   bufp = compile_pattern (regexp, 0,
 531                           Vascii_canon_table, 0,
 532                           0);
 533   immediate_quit = 1;
 534   val = re_search (bufp, string, len, 0, len, 0);
 535   immediate_quit = 0;
 536   return val;
 537 }
 538
 539 /* Like fast_string_match but ignore case.  */
 540
 541 int
 542 fast_string_match_ignore_case (regexp, string)
 543      Lisp_Object regexp, string;
 544 {
 545   int val;
 546   struct re_pattern_buffer *bufp;
 547
 548   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 549                           0, STRING_MULTIBYTE (string));
 550   immediate_quit = 1;
 551   re_match_object = string;
 552
 553   val = re_search (bufp, (char *) SDATA (string),
 554                    SBYTES (string), 0,
 555                    SBYTES (string), 0);
 556   immediate_quit = 0;
 557   return val;
 558 }
 559 \f
 560 /* Match REGEXP atainst the characters after POS to LIMIT, and return
 561    the number of matched characters.  If STRING is non-nil, match
 562    against the characters in it.  In that case, POS and LIMIT are
 563    indices into the string.  This function doesn't modify the match
 564    data.  */
 565
 566 EMACS_INT
 567 fast_looking_at (regexp, pos, pos_byte, limit, limit_byte, string)
 568      Lisp_Object regexp;
 569      EMACS_INT pos, pos_byte, limit, limit_byte;
 570      Lisp_Object string;
 571 {
 572   int multibyte;
 573   struct re_pattern_buffer *buf;
 574   unsigned char *p1, *p2;
 575   int s1, s2;
 576   EMACS_INT len;
 577
 578   if (STRINGP (string))
 579     {
 580       if (pos_byte < 0)
 581         pos_byte = string_char_to_byte (string, pos);
 582       if (limit_byte < 0)
 583         limit_byte = string_char_to_byte (string, limit);
 584       p1 = NULL;
 585       s1 = 0;
 586       p2 = SDATA (string);
 587       s2 = SBYTES (string);
 588       re_match_object = string;
 589       multibyte = STRING_MULTIBYTE (string);
 590     }
 591   else
 592     {
 593       if (pos_byte < 0)
 594         pos_byte = CHAR_TO_BYTE (pos);
 595       if (limit_byte < 0)
 596         limit_byte = CHAR_TO_BYTE (limit);
 597       pos_byte -= BEGV_BYTE;
 598       limit_byte -= BEGV_BYTE;
 599       p1 = BEGV_ADDR;
 600       s1 = GPT_BYTE - BEGV_BYTE;
 601       p2 = GAP_END_ADDR;
 602       s2 = ZV_BYTE - GPT_BYTE;
 603       if (s1 < 0)
 604         {
 605           p2 = p1;
 606           s2 = ZV_BYTE - BEGV_BYTE;
 607           s1 = 0;
 608         }
 609       if (s2 < 0)
 610         {
 611           s1 = ZV_BYTE - BEGV_BYTE;
 612           s2 = 0;
 613         }
 614       re_match_object = Qnil;
 615       multibyte = ! NILP (current_buffer->enable_multibyte_characters);
 616     }
 617
 618   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 619   immediate_quit = 1;
 620   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 621                     pos_byte, NULL, limit_byte);
 622   immediate_quit = 0;
 623
 624   return len;
 625 }
 626
 627 \f
 628 /* The newline cache: remembering which sections of text have no newlines.  */
 629
 630 /* If the user has requested newline caching, make sure it's on.
 631    Otherwise, make sure it's off.
 632    This is our cheezy way of associating an action with the change of
 633    state of a buffer-local variable.  */
 634 static void
 635 newline_cache_on_off (buf)
 636      struct buffer *buf;
 637 {
 638   if (NILP (buf->cache_long_line_scans))
 639     {
 640       /* It should be off.  */
 641       if (buf->newline_cache)
 642         {
 643           free_region_cache (buf->newline_cache);
 644           buf->newline_cache = 0;
 645         }
 646     }
 647   else
 648     {
 649       /* It should be on.  */
 650       if (buf->newline_cache == 0)
 651         buf->newline_cache = new_region_cache ();
 652     }
 653 }
 654
 655 \f
 656 /* Search for COUNT instances of the character TARGET between START and END.
 657
 658    If COUNT is positive, search forwards; END must be >= START.
 659    If COUNT is negative, search backwards for the -COUNTth instance;
 660       END must be <= START.
 661    If COUNT is zero, do anything you please; run rogue, for all I care.
 662
 663    If END is zero, use BEGV or ZV instead, as appropriate for the
 664    direction indicated by COUNT.
 665
 666    If we find COUNT instances, set *SHORTAGE to zero, and return the
 667    position past the COUNTth match.  Note that for reverse motion
 668    this is not the same as the usual convention for Emacs motion commands.
 669
 670    If we don't find COUNT instances before reaching END, set *SHORTAGE
 671    to the number of TARGETs left unfound, and return END.
 672
 673    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 674    except when inside redisplay.  */
 675
 676 int
 677 scan_buffer (target, start, end, count, shortage, allow_quit)
 678      register int target;
 679      int start, end;
 680      int count;
 681      int *shortage;
 682      int allow_quit;
 683 {
 684   struct region_cache *newline_cache;
 685   int direction;
 686
 687   if (count > 0)
 688     {
 689       direction = 1;
 690       if (! end) end = ZV;
 691     }
 692   else
 693     {
 694       direction = -1;
 695       if (! end) end = BEGV;
 696     }
 697
 698   newline_cache_on_off (current_buffer);
 699   newline_cache = current_buffer->newline_cache;
 700
 701   if (shortage != 0)
 702     *shortage = 0;
 703
 704   immediate_quit = allow_quit;
 705
 706   if (count > 0)
 707     while (start != end)
 708       {
 709         /* Our innermost scanning loop is very simple; it doesn't know
 710            about gaps, buffer ends, or the newline cache.  ceiling is
 711            the position of the last character before the next such
 712            obstacle --- the last character the dumb search loop should
 713            examine.  */
 714         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 715         int start_byte = CHAR_TO_BYTE (start);
 716         int tem;
 717
 718         /* If we're looking for a newline, consult the newline cache
 719            to see where we can avoid some scanning.  */
 720         if (target == '\n' && newline_cache)
 721           {
 722             int next_change;
 723             immediate_quit = 0;
 724             while (region_cache_forward
 725                    (current_buffer, newline_cache, start_byte, &next_change))
 726               start_byte = next_change;
 727             immediate_quit = allow_quit;
 728
 729             /* START should never be after END.  */
 730             if (start_byte > ceiling_byte)
 731               start_byte = ceiling_byte;
 732
 733             /* Now the text after start is an unknown region, and
 734                next_change is the position of the next known region. */
 735             ceiling_byte = min (next_change - 1, ceiling_byte);
 736           }
 737
 738         /* The dumb loop can only scan text stored in contiguous
 739            bytes. BUFFER_CEILING_OF returns the last character
 740            position that is contiguous, so the ceiling is the
 741            position after that.  */
 742         tem = BUFFER_CEILING_OF (start_byte);
 743         ceiling_byte = min (tem, ceiling_byte);
 744
 745         {
 746           /* The termination address of the dumb loop.  */
 747           register unsigned char *ceiling_addr
 748             = BYTE_POS_ADDR (ceiling_byte) + 1;
 749           register unsigned char *cursor
 750             = BYTE_POS_ADDR (start_byte);
 751           unsigned char *base = cursor;
 752
 753           while (cursor < ceiling_addr)
 754             {
 755               unsigned char *scan_start = cursor;
 756
 757               /* The dumb loop.  */
 758               while (*cursor != target && ++cursor < ceiling_addr)
 759                 ;
 760
 761               /* If we're looking for newlines, cache the fact that
 762                  the region from start to cursor is free of them. */
 763               if (target == '\n' && newline_cache)
 764                 know_region_cache (current_buffer, newline_cache,
 765                                    start_byte + scan_start - base,
 766                                    start_byte + cursor - base);
 767
 768               /* Did we find the target character?  */
 769               if (cursor < ceiling_addr)
 770                 {
 771                   if (--count == 0)
 772                     {
 773                       immediate_quit = 0;
 774                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 775                     }
 776                   cursor++;
 777                 }
 778             }
 779
 780           start = BYTE_TO_CHAR (start_byte + cursor - base);
 781         }
 782       }
 783   else
 784     while (start > end)
 785       {
 786         /* The last character to check before the next obstacle.  */
 787         int ceiling_byte = CHAR_TO_BYTE (end);
 788         int start_byte = CHAR_TO_BYTE (start);
 789         int tem;
 790
 791         /* Consult the newline cache, if appropriate.  */
 792         if (target == '\n' && newline_cache)
 793           {
 794             int next_change;
 795             immediate_quit = 0;
 796             while (region_cache_backward
 797                    (current_buffer, newline_cache, start_byte, &next_change))
 798               start_byte = next_change;
 799             immediate_quit = allow_quit;
 800
 801             /* Start should never be at or before end.  */
 802             if (start_byte <= ceiling_byte)
 803               start_byte = ceiling_byte + 1;
 804
 805             /* Now the text before start is an unknown region, and
 806                next_change is the position of the next known region. */
 807             ceiling_byte = max (next_change, ceiling_byte);
 808           }
 809
 810         /* Stop scanning before the gap.  */
 811         tem = BUFFER_FLOOR_OF (start_byte - 1);
 812         ceiling_byte = max (tem, ceiling_byte);
 813
 814         {
 815           /* The termination address of the dumb loop.  */
 816           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 817           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 818           unsigned char *base = cursor;
 819
 820           while (cursor >= ceiling_addr)
 821             {
 822               unsigned char *scan_start = cursor;
 823
 824               while (*cursor != target && --cursor >= ceiling_addr)
 825                 ;
 826
 827               /* If we're looking for newlines, cache the fact that
 828                  the region from after the cursor to start is free of them.  */
 829               if (target == '\n' && newline_cache)
 830                 know_region_cache (current_buffer, newline_cache,
 831                                    start_byte + cursor - base,
 832                                    start_byte + scan_start - base);
 833
 834               /* Did we find the target character?  */
 835               if (cursor >= ceiling_addr)
 836                 {
 837                   if (++count >= 0)
 838                     {
 839                       immediate_quit = 0;
 840                       return BYTE_TO_CHAR (start_byte + cursor - base);
 841                     }
 842                   cursor--;
 843                 }
 844             }
 845
 846           start = BYTE_TO_CHAR (start_byte + cursor - base);
 847         }
 848       }
 849
 850   immediate_quit = 0;
 851   if (shortage != 0)
 852     *shortage = count * direction;
 853   return start;
 854 }
 855 \f
 856 /* Search for COUNT instances of a line boundary, which means either a
 857    newline or (if selective display enabled) a carriage return.
 858    Start at START.  If COUNT is negative, search backwards.
 859
 860    We report the resulting position by calling TEMP_SET_PT_BOTH.
 861
 862    If we find COUNT instances. we position after (always after,
 863    even if scanning backwards) the COUNTth match, and return 0.
 864
 865    If we don't find COUNT instances before reaching the end of the
 866    buffer (or the beginning, if scanning backwards), we return
 867    the number of line boundaries left unfound, and position at
 868    the limit we bumped up against.
 869
 870    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 871    except in special cases.  */
 872
 873 int
 874 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 875      int start, start_byte;
 876      int limit, limit_byte;
 877      register int count;
 878      int allow_quit;
 879 {
 880   int direction = ((count > 0) ? 1 : -1);
 881
 882   register unsigned char *cursor;
 883   unsigned char *base;
 884
 885   register int ceiling;
 886   register unsigned char *ceiling_addr;
 887
 888   int old_immediate_quit = immediate_quit;
 889
 890   /* The code that follows is like scan_buffer
 891      but checks for either newline or carriage return.  */
 892
 893   if (allow_quit)
 894     immediate_quit++;
 895
 896   start_byte = CHAR_TO_BYTE (start);
 897
 898   if (count > 0)
 899     {
 900       while (start_byte < limit_byte)
 901         {
 902           ceiling =  BUFFER_CEILING_OF (start_byte);
 903           ceiling = min (limit_byte - 1, ceiling);
 904           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 905           base = (cursor = BYTE_POS_ADDR (start_byte));
 906           while (1)
 907             {
 908               while (*cursor != '\n' && ++cursor != ceiling_addr)
 909                 ;
 910
 911               if (cursor != ceiling_addr)
 912                 {
 913                   if (--count == 0)
 914                     {
 915                       immediate_quit = old_immediate_quit;
 916                       start_byte = start_byte + cursor - base + 1;
 917                       start = BYTE_TO_CHAR (start_byte);
 918                       TEMP_SET_PT_BOTH (start, start_byte);
 919                       return 0;
 920                     }
 921                   else
 922                     if (++cursor == ceiling_addr)
 923                       break;
 924                 }
 925               else
 926                 break;
 927             }
 928           start_byte += cursor - base;
 929         }
 930     }
 931   else
 932     {
 933       while (start_byte > limit_byte)
 934         {
 935           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 936           ceiling = max (limit_byte, ceiling);
 937           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 938           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 939           while (1)
 940             {
 941               while (--cursor != ceiling_addr && *cursor != '\n')
 942                 ;
 943
 944               if (cursor != ceiling_addr)
 945                 {
 946                   if (++count == 0)
 947                     {
 948                       immediate_quit = old_immediate_quit;
 949                       /* Return the position AFTER the match we found.  */
 950                       start_byte = start_byte + cursor - base + 1;
 951                       start = BYTE_TO_CHAR (start_byte);
 952                       TEMP_SET_PT_BOTH (start, start_byte);
 953                       return 0;
 954                     }
 955                 }
 956               else
 957                 break;
 958             }
 959           /* Here we add 1 to compensate for the last decrement
 960              of CURSOR, which took it past the valid range.  */
 961           start_byte += cursor - base + 1;
 962         }
 963     }
 964
 965   TEMP_SET_PT_BOTH (limit, limit_byte);
 966   immediate_quit = old_immediate_quit;
 967
 968   return count * direction;
 969 }
 970
 971 int
 972 find_next_newline_no_quit (from, cnt)
 973      register int from, cnt;
 974 {
 975   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 976 }
 977
 978 /* Like find_next_newline, but returns position before the newline,
 979    not after, and only search up to TO.  This isn't just
 980    find_next_newline (...)-1, because you might hit TO.  */
 981
 982 int
 983 find_before_next_newline (from, to, cnt)
 984      int from, to, cnt;
 985 {
 986   int shortage;
 987   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 988
 989   if (shortage == 0)
 990     pos--;
 991
 992   return pos;
 993 }
 994 \f
 995 /* Subroutines of Lisp buffer search functions. */
 996
 997 static Lisp_Object
 998 search_command (string, bound, noerror, count, direction, RE, posix)
 999      Lisp_Object string, bound, noerror, count;
1000      int direction;
1001      int RE;
1002      int posix;
1003 {
1004   register int np;
1005   int lim, lim_byte;
1006   int n = direction;
1007
1008   if (!NILP (count))
1009     {
1010       CHECK_NUMBER (count);
1011       n *= XINT (count);
1012     }
1013
1014   CHECK_STRING (string);
1015   if (NILP (bound))
1016     {
1017       if (n > 0)
1018         lim = ZV, lim_byte = ZV_BYTE;
1019       else
1020         lim = BEGV, lim_byte = BEGV_BYTE;
1021     }
1022   else
1023     {
1024       CHECK_NUMBER_COERCE_MARKER (bound);
1025       lim = XINT (bound);
1026       if (n > 0 ? lim < PT : lim > PT)
1027         error ("Invalid search bound (wrong side of point)");
1028       if (lim > ZV)
1029         lim = ZV, lim_byte = ZV_BYTE;
1030       else if (lim < BEGV)
1031         lim = BEGV, lim_byte = BEGV_BYTE;
1032       else
1033         lim_byte = CHAR_TO_BYTE (lim);
1034     }
1035
1036   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1037   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
1038     = current_buffer->case_eqv_table;
1039
1040   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1041                       (!NILP (current_buffer->case_fold_search)
1042                        ? current_buffer->case_canon_table
1043                        : Qnil),
1044                       (!NILP (current_buffer->case_fold_search)
1045                        ? current_buffer->case_eqv_table
1046                        : Qnil),
1047                       posix);
1048   if (np <= 0)
1049     {
1050       if (NILP (noerror))
1051         xsignal1 (Qsearch_failed, string);
1052
1053       if (!EQ (noerror, Qt))
1054         {
1055           if (lim < BEGV || lim > ZV)
1056             abort ();
1057           SET_PT_BOTH (lim, lim_byte);
1058           return Qnil;
1059 #if 0 /* This would be clean, but maybe programs depend on
1060          a value of nil here.  */
1061           np = lim;
1062 #endif
1063         }
1064       else
1065         return Qnil;
1066     }
1067
1068   if (np < BEGV || np > ZV)
1069     abort ();
1070
1071   SET_PT (np);
1072
1073   return make_number (np);
1074 }
1075 \f
1076 /* Return 1 if REGEXP it matches just one constant string.  */
1077
1078 static int
1079 trivial_regexp_p (regexp)
1080      Lisp_Object regexp;
1081 {
1082   int len = SBYTES (regexp);
1083   unsigned char *s = SDATA (regexp);
1084   while (--len >= 0)
1085     {
1086       switch (*s++)
1087         {
1088         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1089           return 0;
1090         case '\\':
1091           if (--len < 0)
1092             return 0;
1093           switch (*s++)
1094             {
1095             case '|': case '(': case ')': case '`': case '\'': case 'b':
1096             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1097             case 'S': case '=': case '{': case '}': case '_':
1098             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1099             case '1': case '2': case '3': case '4': case '5':
1100             case '6': case '7': case '8': case '9':
1101               return 0;
1102             }
1103         }
1104     }
1105   return 1;
1106 }
1107
1108 /* Search for the n'th occurrence of STRING in the current buffer,
1109    starting at position POS and stopping at position LIM,
1110    treating STRING as a literal string if RE is false or as
1111    a regular expression if RE is true.
1112
1113    If N is positive, searching is forward and LIM must be greater than POS.
1114    If N is negative, searching is backward and LIM must be less than POS.
1115
1116    Returns -x if x occurrences remain to be found (x > 0),
1117    or else the position at the beginning of the Nth occurrence
1118    (if searching backward) or the end (if searching forward).
1119
1120    POSIX is nonzero if we want full backtracking (POSIX style)
1121    for this pattern.  0 means backtrack only enough to get a valid match.  */
1122
1123 #define TRANSLATE(out, trt, d)                  \
1124 do                                              \
1125   {                                             \
1126     if (! NILP (trt))                           \
1127       {                                         \
1128         Lisp_Object temp;                       \
1129         temp = Faref (trt, make_number (d));    \
1130         if (INTEGERP (temp))                    \
1131           out = XINT (temp);                    \
1132         else                                    \
1133           out = d;                              \
1134       }                                         \
1135     else                                        \
1136       out = d;                                  \
1137   }                                             \
1138 while (0)
1139
1140 /* Only used in search_buffer, to record the end position of the match
1141    when searching regexps and SEARCH_REGS should not be changed
1142    (i.e. Vinhibit_changing_match_data is non-nil).  */
1143 static struct re_registers search_regs_1;
1144
1145 static int
1146 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1147                RE, trt, inverse_trt, posix)
1148      Lisp_Object string;
1149      int pos;
1150      int pos_byte;
1151      int lim;
1152      int lim_byte;
1153      int n;
1154      int RE;
1155      Lisp_Object trt;
1156      Lisp_Object inverse_trt;
1157      int posix;
1158 {
1159   int len = SCHARS (string);
1160   int len_byte = SBYTES (string);
1161   register int i;
1162
1163   if (running_asynch_code)
1164     save_search_regs ();
1165
1166   /* Searching 0 times means don't move.  */
1167   /* Null string is found at starting position.  */
1168   if (len == 0 || n == 0)
1169     {
1170       set_search_regs (pos_byte, 0);
1171       return pos;
1172     }
1173
1174   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1175     {
1176       unsigned char *p1, *p2;
1177       int s1, s2;
1178       struct re_pattern_buffer *bufp;
1179
1180       bufp = compile_pattern (string,
1181                               (NILP (Vinhibit_changing_match_data)
1182                                ? &search_regs : &search_regs_1),
1183                               trt, posix,
1184                               !NILP (current_buffer->enable_multibyte_characters));
1185
1186       immediate_quit = 1;       /* Quit immediately if user types ^G,
1187                                    because letting this function finish
1188                                    can take too long. */
1189       QUIT;                     /* Do a pending quit right away,
1190                                    to avoid paradoxical behavior */
1191       /* Get pointers and sizes of the two strings
1192          that make up the visible portion of the buffer. */
1193
1194       p1 = BEGV_ADDR;
1195       s1 = GPT_BYTE - BEGV_BYTE;
1196       p2 = GAP_END_ADDR;
1197       s2 = ZV_BYTE - GPT_BYTE;
1198       if (s1 < 0)
1199         {
1200           p2 = p1;
1201           s2 = ZV_BYTE - BEGV_BYTE;
1202           s1 = 0;
1203         }
1204       if (s2 < 0)
1205         {
1206           s1 = ZV_BYTE - BEGV_BYTE;
1207           s2 = 0;
1208         }
1209       re_match_object = Qnil;
1210
1211       while (n < 0)
1212         {
1213           int val;
1214           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1215                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1216                              (NILP (Vinhibit_changing_match_data)
1217                               ? &search_regs : &search_regs_1),
1218                              /* Don't allow match past current point */
1219                              pos_byte - BEGV_BYTE);
1220           if (val == -2)
1221             {
1222               matcher_overflow ();
1223             }
1224           if (val >= 0)
1225             {
1226               if (NILP (Vinhibit_changing_match_data))
1227                 {
1228                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1229                   for (i = 0; i < search_regs.num_regs; i++)
1230                     if (search_regs.start[i] >= 0)
1231                       {
1232                         search_regs.start[i]
1233                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1234                         search_regs.end[i]
1235                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1236                       }
1237                   XSETBUFFER (last_thing_searched, current_buffer);
1238                   /* Set pos to the new position. */
1239                   pos = search_regs.start[0];
1240                 }
1241               else
1242                 {
1243                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1244                   /* Set pos to the new position.  */
1245                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1246                 }
1247             }
1248           else
1249             {
1250               immediate_quit = 0;
1251               return (n);
1252             }
1253           n++;
1254         }
1255       while (n > 0)
1256         {
1257           int val;
1258           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1259                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1260                              (NILP (Vinhibit_changing_match_data)
1261                               ? &search_regs : &search_regs_1),
1262                              lim_byte - BEGV_BYTE);
1263           if (val == -2)
1264             {
1265               matcher_overflow ();
1266             }
1267           if (val >= 0)
1268             {
1269               if (NILP (Vinhibit_changing_match_data))
1270                 {
1271                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1272                   for (i = 0; i < search_regs.num_regs; i++)
1273                     if (search_regs.start[i] >= 0)
1274                       {
1275                         search_regs.start[i]
1276                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1277                         search_regs.end[i]
1278                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1279                       }
1280                   XSETBUFFER (last_thing_searched, current_buffer);
1281                   pos = search_regs.end[0];
1282                 }
1283               else
1284                 {
1285                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1286                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1287                 }
1288             }
1289           else
1290             {
1291               immediate_quit = 0;
1292               return (0 - n);
1293             }
1294           n--;
1295         }
1296       immediate_quit = 0;
1297       return (pos);
1298     }
1299   else                          /* non-RE case */
1300     {
1301       unsigned char *raw_pattern, *pat;
1302       int raw_pattern_size;
1303       int raw_pattern_size_byte;
1304       unsigned char *patbuf;
1305       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1306       unsigned char *base_pat;
1307       /* Set to positive if we find a non-ASCII char that need
1308          translation.  Otherwise set to zero later.  */
1309       int char_base = -1;
1310       int boyer_moore_ok = 1;
1311
1312       /* MULTIBYTE says whether the text to be searched is multibyte.
1313          We must convert PATTERN to match that, or we will not really
1314          find things right.  */
1315
1316       if (multibyte == STRING_MULTIBYTE (string))
1317         {
1318           raw_pattern = (unsigned char *) SDATA (string);
1319           raw_pattern_size = SCHARS (string);
1320           raw_pattern_size_byte = SBYTES (string);
1321         }
1322       else if (multibyte)
1323         {
1324           raw_pattern_size = SCHARS (string);
1325           raw_pattern_size_byte
1326             = count_size_as_multibyte (SDATA (string),
1327                                        raw_pattern_size);
1328           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1329           copy_text (SDATA (string), raw_pattern,
1330                      SCHARS (string), 0, 1);
1331         }
1332       else
1333         {
1334           /* Converting multibyte to single-byte.
1335
1336              ??? Perhaps this conversion should be done in a special way
1337              by subtracting nonascii-insert-offset from each non-ASCII char,
1338              so that only the multibyte chars which really correspond to
1339              the chosen single-byte character set can possibly match.  */
1340           raw_pattern_size = SCHARS (string);
1341           raw_pattern_size_byte = SCHARS (string);
1342           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1343           copy_text (SDATA (string), raw_pattern,
1344                      SBYTES (string), 1, 0);
1345         }
1346
1347       /* Copy and optionally translate the pattern.  */
1348       len = raw_pattern_size;
1349       len_byte = raw_pattern_size_byte;
1350       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1351       pat = patbuf;
1352       base_pat = raw_pattern;
1353       if (multibyte)
1354         {
1355           /* Fill patbuf by translated characters in STRING while
1356              checking if we can use boyer-moore search.  If TRT is
1357              non-nil, we can use boyer-moore search only if TRT can be
1358              represented by the byte array of 256 elements.  For that,
1359              all non-ASCII case-equivalents of all case-senstive
1360              characters in STRING must belong to the same charset and
1361              row.  */
1362
1363           while (--len >= 0)
1364             {
1365               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1366               int c, translated, inverse;
1367               int in_charlen, charlen;
1368
1369               /* If we got here and the RE flag is set, it's because we're
1370                  dealing with a regexp known to be trivial, so the backslash
1371                  just quotes the next character.  */
1372               if (RE && *base_pat == '\\')
1373                 {
1374                   len--;
1375                   raw_pattern_size--;
1376                   len_byte--;
1377                   base_pat++;
1378                 }
1379
1380               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1381
1382               if (NILP (trt))
1383                 {
1384                   str = base_pat;
1385                   charlen = in_charlen;
1386                 }
1387               else
1388                 {
1389                   /* Translate the character.  */
1390                   TRANSLATE (translated, trt, c);
1391                   charlen = CHAR_STRING (translated, str_base);
1392                   str = str_base;
1393
1394                   /* Check if C has any other case-equivalents.  */
1395                   TRANSLATE (inverse, inverse_trt, c);
1396                   /* If so, check if we can use boyer-moore.  */
1397                   if (c != inverse && boyer_moore_ok)
1398                     {
1399                       /* Check if all equivalents belong to the same
1400                          group of characters.  Note that the check of C
1401                          itself is done by the last iteration.  */
1402                       int this_char_base = -1;
1403
1404                       while (boyer_moore_ok)
1405                         {
1406                           if (ASCII_BYTE_P (inverse))
1407                             {
1408                               if (this_char_base > 0)
1409                                 boyer_moore_ok = 0;
1410                               else
1411                                 this_char_base = 0;
1412                             }
1413                           else if (CHAR_BYTE8_P (inverse))
1414                             /* Boyer-moore search can't handle a
1415                                translation of an eight-bit
1416                                character.  */
1417                             boyer_moore_ok = 0;
1418                           else if (this_char_base < 0)
1419                             {
1420                               this_char_base = inverse & ~0x3F;
1421                               if (char_base < 0)
1422                                 char_base = this_char_base;
1423                               else if (this_char_base != char_base)
1424                                 boyer_moore_ok = 0;
1425                             }
1426                           else if ((inverse & ~0x3F) != this_char_base)
1427                             boyer_moore_ok = 0;
1428                           if (c == inverse)
1429                             break;
1430                           TRANSLATE (inverse, inverse_trt, inverse);
1431                         }
1432                     }
1433                 }
1434
1435               /* Store this character into the translated pattern.  */
1436               bcopy (str, pat, charlen);
1437               pat += charlen;
1438               base_pat += in_charlen;
1439               len_byte -= in_charlen;
1440             }
1441
1442           /* If char_base is still negative we didn't find any translated
1443              non-ASCII characters.  */
1444           if (char_base < 0)
1445             char_base = 0;
1446         }
1447       else
1448         {
1449           /* Unibyte buffer.  */
1450           char_base = 0;
1451           while (--len >= 0)
1452             {
1453               int c, translated;
1454
1455               /* If we got here and the RE flag is set, it's because we're
1456                  dealing with a regexp known to be trivial, so the backslash
1457                  just quotes the next character.  */
1458               if (RE && *base_pat == '\\')
1459                 {
1460                   len--;
1461                   raw_pattern_size--;
1462                   base_pat++;
1463                 }
1464               c = *base_pat++;
1465               TRANSLATE (translated, trt, c);
1466               *pat++ = translated;
1467             }
1468         }
1469
1470       len_byte = pat - patbuf;
1471       len = raw_pattern_size;
1472       pat = base_pat = patbuf;
1473
1474       if (boyer_moore_ok)
1475         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1476                             pos, pos_byte, lim, lim_byte,
1477                             char_base);
1478       else
1479         return simple_search (n, pat, len, len_byte, trt,
1480                               pos, pos_byte, lim, lim_byte);
1481     }
1482 }
1483 \f
1484 /* Do a simple string search N times for the string PAT,
1485    whose length is LEN/LEN_BYTE,
1486    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1487    TRT is the translation table.
1488
1489    Return the character position where the match is found.
1490    Otherwise, if M matches remained to be found, return -M.
1491
1492    This kind of search works regardless of what is in PAT and
1493    regardless of what is in TRT.  It is used in cases where
1494    boyer_moore cannot work.  */
1495
1496 static int
1497 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1498      int n;
1499      unsigned char *pat;
1500      int len, len_byte;
1501      Lisp_Object trt;
1502      int pos, pos_byte;
1503      int lim, lim_byte;
1504 {
1505   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1506   int forward = n > 0;
1507   /* Number of buffer bytes matched.  Note that this may be different
1508      from len_byte in a multibyte buffer.  */
1509   int match_byte;
1510
1511   if (lim > pos && multibyte)
1512     while (n > 0)
1513       {
1514         while (1)
1515           {
1516             /* Try matching at position POS.  */
1517             int this_pos = pos;
1518             int this_pos_byte = pos_byte;
1519             int this_len = len;
1520             int this_len_byte = len_byte;
1521             unsigned char *p = pat;
1522             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1523               goto stop;
1524
1525             while (this_len > 0)
1526               {
1527                 int charlen, buf_charlen;
1528                 int pat_ch, buf_ch;
1529
1530                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1531                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1532                                                  ZV_BYTE - this_pos_byte,
1533                                                  buf_charlen);
1534                 TRANSLATE (buf_ch, trt, buf_ch);
1535
1536                 if (buf_ch != pat_ch)
1537                   break;
1538
1539                 this_len_byte -= charlen;
1540                 this_len--;
1541                 p += charlen;
1542
1543                 this_pos_byte += buf_charlen;
1544                 this_pos++;
1545               }
1546
1547             if (this_len == 0)
1548               {
1549                 match_byte = this_pos_byte - pos_byte;
1550                 pos += len;
1551                 pos_byte += match_byte;
1552                 break;
1553               }
1554
1555             INC_BOTH (pos, pos_byte);
1556           }
1557
1558         n--;
1559       }
1560   else if (lim > pos)
1561     while (n > 0)
1562       {
1563         while (1)
1564           {
1565             /* Try matching at position POS.  */
1566             int this_pos = pos;
1567             int this_len = len;
1568             unsigned char *p = pat;
1569
1570             if (pos + len > lim)
1571               goto stop;
1572
1573             while (this_len > 0)
1574               {
1575                 int pat_ch = *p++;
1576                 int buf_ch = FETCH_BYTE (this_pos);
1577                 TRANSLATE (buf_ch, trt, buf_ch);
1578
1579                 if (buf_ch != pat_ch)
1580                   break;
1581
1582                 this_len--;
1583                 this_pos++;
1584               }
1585
1586             if (this_len == 0)
1587               {
1588                 match_byte = len;
1589                 pos += len;
1590                 break;
1591               }
1592
1593             pos++;
1594           }
1595
1596         n--;
1597       }
1598   /* Backwards search.  */
1599   else if (lim < pos && multibyte)
1600     while (n < 0)
1601       {
1602         while (1)
1603           {
1604             /* Try matching at position POS.  */
1605             int this_pos = pos - len;
1606             int this_pos_byte;
1607             int this_len = len;
1608             int this_len_byte = len_byte;
1609             unsigned char *p = pat;
1610
1611             if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1612               goto stop;
1613             this_pos_byte = CHAR_TO_BYTE (this_pos);
1614             match_byte = pos_byte - this_pos_byte;
1615
1616             while (this_len > 0)
1617               {
1618                 int charlen, buf_charlen;
1619                 int pat_ch, buf_ch;
1620
1621                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1622                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1623                                                  ZV_BYTE - this_pos_byte,
1624                                                  buf_charlen);
1625                 TRANSLATE (buf_ch, trt, buf_ch);
1626
1627                 if (buf_ch != pat_ch)
1628                   break;
1629
1630                 this_len_byte -= charlen;
1631                 this_len--;
1632                 p += charlen;
1633                 this_pos_byte += buf_charlen;
1634                 this_pos++;
1635               }
1636
1637             if (this_len == 0)
1638               {
1639                 pos -= len;
1640                 pos_byte -= match_byte;
1641                 break;
1642               }
1643
1644             DEC_BOTH (pos, pos_byte);
1645           }
1646
1647         n++;
1648       }
1649   else if (lim < pos)
1650     while (n < 0)
1651       {
1652         while (1)
1653           {
1654             /* Try matching at position POS.  */
1655             int this_pos = pos - len;
1656             int this_len = len;
1657             unsigned char *p = pat;
1658
1659             if (this_pos < lim)
1660               goto stop;
1661
1662             while (this_len > 0)
1663               {
1664                 int pat_ch = *p++;
1665                 int buf_ch = FETCH_BYTE (this_pos);
1666                 TRANSLATE (buf_ch, trt, buf_ch);
1667
1668                 if (buf_ch != pat_ch)
1669                   break;
1670                 this_len--;
1671                 this_pos++;
1672               }
1673
1674             if (this_len == 0)
1675               {
1676                 match_byte = len;
1677                 pos -= len;
1678                 break;
1679               }
1680
1681             pos--;
1682           }
1683
1684         n++;
1685       }
1686
1687  stop:
1688   if (n == 0)
1689     {
1690       if (forward)
1691         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1692       else
1693         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1694
1695       return pos;
1696     }
1697   else if (n > 0)
1698     return -n;
1699   else
1700     return n;
1701 }
1702 \f
1703 /* Do Boyer-Moore search N times for the string BASE_PAT,
1704    whose length is LEN/LEN_BYTE,
1705    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1706    DIRECTION says which direction we search in.
1707    TRT and INVERSE_TRT are translation tables.
1708    Characters in PAT are already translated by TRT.
1709
1710    This kind of search works if all the characters in BASE_PAT that
1711    have nontrivial translation are the same aside from the last byte.
1712    This makes it possible to translate just the last byte of a
1713    character, and do so after just a simple test of the context.
1714    CHAR_BASE is nonzero if there is such a non-ASCII character.
1715
1716    If that criterion is not satisfied, do not call this function.  */
1717
1718 static int
1719 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1720              pos, pos_byte, lim, lim_byte, char_base)
1721      int n;
1722      unsigned char *base_pat;
1723      int len, len_byte;
1724      Lisp_Object trt;
1725      Lisp_Object inverse_trt;
1726      int pos, pos_byte;
1727      int lim, lim_byte;
1728      int char_base;
1729 {
1730   int direction = ((n > 0) ? 1 : -1);
1731   register int dirlen;
1732   int limit, stride_for_teases = 0;
1733   int BM_tab[0400];
1734   register unsigned char *cursor, *p_limit;
1735   register int i, j;
1736   unsigned char *pat, *pat_end;
1737   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1738
1739   unsigned char simple_translate[0400];
1740   /* These are set to the preceding bytes of a byte to be translated
1741      if char_base is nonzero.  As the maximum byte length of a
1742      multibyte character is 5, we have to check at most four previous
1743      bytes.  */
1744   int translate_prev_byte1 = 0;
1745   int translate_prev_byte2 = 0;
1746   int translate_prev_byte3 = 0;
1747   int translate_prev_byte4 = 0;
1748
1749   /* The general approach is that we are going to maintain that we know
1750      the first (closest to the present position, in whatever direction
1751      we're searching) character that could possibly be the last
1752      (furthest from present position) character of a valid match.  We
1753      advance the state of our knowledge by looking at that character
1754      and seeing whether it indeed matches the last character of the
1755      pattern.  If it does, we take a closer look.  If it does not, we
1756      move our pointer (to putative last characters) as far as is
1757      logically possible.  This amount of movement, which I call a
1758      stride, will be the length of the pattern if the actual character
1759      appears nowhere in the pattern, otherwise it will be the distance
1760      from the last occurrence of that character to the end of the
1761      pattern.  If the amount is zero we have a possible match.  */
1762
1763   /* Here we make a "mickey mouse" BM table.  The stride of the search
1764      is determined only by the last character of the putative match.
1765      If that character does not match, we will stride the proper
1766      distance to propose a match that superimposes it on the last
1767      instance of a character that matches it (per trt), or misses
1768      it entirely if there is none. */
1769
1770   dirlen = len_byte * direction;
1771
1772   /* Record position after the end of the pattern.  */
1773   pat_end = base_pat + len_byte;
1774   /* BASE_PAT points to a character that we start scanning from.
1775      It is the first character in a forward search,
1776      the last character in a backward search.  */
1777   if (direction < 0)
1778     base_pat = pat_end - 1;
1779
1780   /* A character that does not appear in the pattern induces a
1781      stride equal to the pattern length.  */
1782   for (i = 0; i < 0400; i++)
1783     BM_tab[i] = dirlen;
1784
1785   /* We use this for translation, instead of TRT itself.
1786      We fill this in to handle the characters that actually
1787      occur in the pattern.  Others don't matter anyway!  */
1788   for (i = 0; i < 0400; i++)
1789     simple_translate[i] = i;
1790
1791   if (char_base)
1792     {
1793       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1794          byte following them are the target of translation.  */
1795       unsigned char str[MAX_MULTIBYTE_LENGTH];
1796       int len = CHAR_STRING (char_base, str);
1797
1798       translate_prev_byte1 = str[len - 2];
1799       if (len > 2)
1800         {
1801           translate_prev_byte2 = str[len - 3];
1802           if (len > 3)
1803             {
1804               translate_prev_byte3 = str[len - 4];
1805               if (len > 4)
1806                 translate_prev_byte4 = str[len - 5];
1807             }
1808         }
1809     }
1810
1811   i = 0;
1812   while (i != dirlen)
1813     {
1814       unsigned char *ptr = base_pat + i;
1815       i += direction;
1816       if (! NILP (trt))
1817         {
1818           /* If the byte currently looking at is the last of a
1819              character to check case-equivalents, set CH to that
1820              character.  An ASCII character and a non-ASCII character
1821              matching with CHAR_BASE are to be checked.  */
1822           int ch = -1;
1823
1824           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1825             ch = *ptr;
1826           else if (char_base
1827                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1828             {
1829               unsigned char *charstart = ptr - 1;
1830
1831               while (! (CHAR_HEAD_P (*charstart)))
1832                 charstart--;
1833               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1834               if (char_base != (ch & ~0x3F))
1835                 ch = -1;
1836             }
1837
1838           if (ch >= 0200)
1839             j = (ch & 0x3F) | 0200;
1840           else
1841             j = *ptr;
1842
1843           if (i == dirlen)
1844             stride_for_teases = BM_tab[j];
1845
1846           BM_tab[j] = dirlen - i;
1847           /* A translation table is accompanied by its inverse -- see */
1848           /* comment following downcase_table for details */
1849           if (ch >= 0)
1850             {
1851               int starting_ch = ch;
1852               int starting_j = j;
1853
1854               while (1)
1855                 {
1856                   TRANSLATE (ch, inverse_trt, ch);
1857                   if (ch >= 0200)
1858                     j = (ch & 0x3F) | 0200;
1859                   else
1860                     j = ch;
1861
1862                   /* For all the characters that map into CH,
1863                      set up simple_translate to map the last byte
1864                      into STARTING_J.  */
1865                   simple_translate[j] = starting_j;
1866                   if (ch == starting_ch)
1867                     break;
1868                   BM_tab[j] = dirlen - i;
1869                 }
1870             }
1871         }
1872       else
1873         {
1874           j = *ptr;
1875
1876           if (i == dirlen)
1877             stride_for_teases = BM_tab[j];
1878           BM_tab[j] = dirlen - i;
1879         }
1880       /* stride_for_teases tells how much to stride if we get a
1881          match on the far character but are subsequently
1882          disappointed, by recording what the stride would have been
1883          for that character if the last character had been
1884          different.  */
1885     }
1886   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1887   /* loop invariant - POS_BYTE points at where last char (first
1888      char if reverse) of pattern would align in a possible match.  */
1889   while (n != 0)
1890     {
1891       int tail_end;
1892       unsigned char *tail_end_ptr;
1893
1894       /* It's been reported that some (broken) compiler thinks that
1895          Boolean expressions in an arithmetic context are unsigned.
1896          Using an explicit ?1:0 prevents this.  */
1897       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1898           < 0)
1899         return (n * (0 - direction));
1900       /* First we do the part we can by pointers (maybe nothing) */
1901       QUIT;
1902       pat = base_pat;
1903       limit = pos_byte - dirlen + direction;
1904       if (direction > 0)
1905         {
1906           limit = BUFFER_CEILING_OF (limit);
1907           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1908              can take on without hitting edge of buffer or the gap.  */
1909           limit = min (limit, pos_byte + 20000);
1910           limit = min (limit, lim_byte - 1);
1911         }
1912       else
1913         {
1914           limit = BUFFER_FLOOR_OF (limit);
1915           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1916              can take on without hitting edge of buffer or the gap.  */
1917           limit = max (limit, pos_byte - 20000);
1918           limit = max (limit, lim_byte);
1919         }
1920       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1921       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1922
1923       if ((limit - pos_byte) * direction > 20)
1924         {
1925           unsigned char *p2;
1926
1927           p_limit = BYTE_POS_ADDR (limit);
1928           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1929           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1930           while (1)             /* use one cursor setting as long as i can */
1931             {
1932               if (direction > 0) /* worth duplicating */
1933                 {
1934                   while (cursor <= p_limit)
1935                     {
1936                       if (BM_tab[*cursor] == 0)
1937                         goto hit;
1938                       cursor += BM_tab[*cursor];
1939                     }
1940                 }
1941               else
1942                 {
1943                   while (cursor >= p_limit)
1944                     {
1945                       if (BM_tab[*cursor] == 0)
1946                         goto hit;
1947                       cursor += BM_tab[*cursor];
1948                     }
1949                 }
1950               /* If you are here, cursor is beyond the end of the
1951                  searched region.  You fail to match within the
1952                  permitted region and would otherwise try a character
1953                  beyond that region.  */
1954               break;
1955
1956             hit:
1957               i = dirlen - direction;
1958               if (! NILP (trt))
1959                 {
1960                   while ((i -= direction) + direction != 0)
1961                     {
1962                       int ch;
1963                       cursor -= direction;
1964                       /* Translate only the last byte of a character.  */
1965                       if (! multibyte
1966                           || ((cursor == tail_end_ptr
1967                                || CHAR_HEAD_P (cursor[1]))
1968                               && (CHAR_HEAD_P (cursor[0])
1969                                   /* Check if this is the last byte of
1970                                      a translable character.  */
1971                                   || (translate_prev_byte1 == cursor[-1]
1972                                       && (CHAR_HEAD_P (translate_prev_byte1)
1973                                           || (translate_prev_byte2 == cursor[-2]
1974                                               && (CHAR_HEAD_P (translate_prev_byte2)
1975                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1976                         ch = simple_translate[*cursor];
1977                       else
1978                         ch = *cursor;
1979                       if (pat[i] != ch)
1980                         break;
1981                     }
1982                 }
1983               else
1984                 {
1985                   while ((i -= direction) + direction != 0)
1986                     {
1987                       cursor -= direction;
1988                       if (pat[i] != *cursor)
1989                         break;
1990                     }
1991                 }
1992               cursor += dirlen - i - direction; /* fix cursor */
1993               if (i + direction == 0)
1994                 {
1995                   int position, start, end;
1996
1997                   cursor -= direction;
1998
1999                   position = pos_byte + cursor - p2 + ((direction > 0)
2000                                                        ? 1 - len_byte : 0);
2001                   set_search_regs (position, len_byte);
2002
2003                   if (NILP (Vinhibit_changing_match_data))
2004                     {
2005                       start = search_regs.start[0];
2006                       end = search_regs.end[0];
2007                     }
2008                   else
2009                     /* If Vinhibit_changing_match_data is non-nil,
2010                        search_regs will not be changed.  So let's
2011                        compute start and end here.  */
2012                     {
2013                       start = BYTE_TO_CHAR (position);
2014                       end = BYTE_TO_CHAR (position + len_byte);
2015                     }
2016
2017                   if ((n -= direction) != 0)
2018                     cursor += dirlen; /* to resume search */
2019                   else
2020                     return direction > 0 ? end : start;
2021                 }
2022               else
2023                 cursor += stride_for_teases; /* <sigh> we lose -  */
2024             }
2025           pos_byte += cursor - p2;
2026         }
2027       else
2028         /* Now we'll pick up a clump that has to be done the hard
2029            way because it covers a discontinuity.  */
2030         {
2031           limit = ((direction > 0)
2032                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2033                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2034           limit = ((direction > 0)
2035                    ? min (limit + len_byte, lim_byte - 1)
2036                    : max (limit - len_byte, lim_byte));
2037           /* LIMIT is now the last value POS_BYTE can have
2038              and still be valid for a possible match.  */
2039           while (1)
2040             {
2041               /* This loop can be coded for space rather than
2042                  speed because it will usually run only once.
2043                  (the reach is at most len + 21, and typically
2044                  does not exceed len).  */
2045               while ((limit - pos_byte) * direction >= 0)
2046                 {
2047                   int ch = FETCH_BYTE (pos_byte);
2048                   if (BM_tab[ch] == 0)
2049                     goto hit2;
2050                   pos_byte += BM_tab[ch];
2051                 }
2052               break;    /* ran off the end */
2053
2054             hit2:
2055               /* Found what might be a match.  */
2056               i = dirlen - direction;
2057               while ((i -= direction) + direction != 0)
2058                 {
2059                   int ch;
2060                   unsigned char *ptr;
2061                   pos_byte -= direction;
2062                   ptr = BYTE_POS_ADDR (pos_byte);
2063                   /* Translate only the last byte of a character.  */
2064                   if (! multibyte
2065                       || ((ptr == tail_end_ptr
2066                            || CHAR_HEAD_P (ptr[1]))
2067                           && (CHAR_HEAD_P (ptr[0])
2068                               /* Check if this is the last byte of a
2069                                  translable character.  */
2070                               || (translate_prev_byte1 == ptr[-1]
2071                                   && (CHAR_HEAD_P (translate_prev_byte1)
2072                                       || (translate_prev_byte2 == ptr[-2]
2073                                           && (CHAR_HEAD_P (translate_prev_byte2)
2074                                               || translate_prev_byte3 == ptr[-3])))))))
2075                     ch = simple_translate[*ptr];
2076                   else
2077                     ch = *ptr;
2078                   if (pat[i] != ch)
2079                     break;
2080                 }
2081               /* Above loop has moved POS_BYTE part or all the way
2082                  back to the first pos (last pos if reverse).
2083                  Set it once again at the last (first if reverse) char.  */
2084               pos_byte += dirlen - i - direction;
2085               if (i + direction == 0)
2086                 {
2087                   int position, start, end;
2088                   pos_byte -= direction;
2089
2090                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2091                   set_search_regs (position, len_byte);
2092
2093                   if (NILP (Vinhibit_changing_match_data))
2094                     {
2095                       start = search_regs.start[0];
2096                       end = search_regs.end[0];
2097                     }
2098                   else
2099                     /* If Vinhibit_changing_match_data is non-nil,
2100                        search_regs will not be changed.  So let's
2101                        compute start and end here.  */
2102                     {
2103                       start = BYTE_TO_CHAR (position);
2104                       end = BYTE_TO_CHAR (position + len_byte);
2105                     }
2106
2107                   if ((n -= direction) != 0)
2108                     pos_byte += dirlen; /* to resume search */
2109                   else
2110                     return direction > 0 ? end : start;
2111                 }
2112               else
2113                 pos_byte += stride_for_teases;
2114             }
2115           }
2116       /* We have done one clump.  Can we continue? */
2117       if ((lim_byte - pos_byte) * direction < 0)
2118         return ((0 - n) * direction);
2119     }
2120   return BYTE_TO_CHAR (pos_byte);
2121 }
2122
2123 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2124    for the overall match just found in the current buffer.
2125    Also clear out the match data for registers 1 and up.  */
2126
2127 static void
2128 set_search_regs (beg_byte, nbytes)
2129      int beg_byte, nbytes;
2130 {
2131   int i;
2132
2133   if (!NILP (Vinhibit_changing_match_data))
2134     return;
2135
2136   /* Make sure we have registers in which to store
2137      the match position.  */
2138   if (search_regs.num_regs == 0)
2139     {
2140       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2141       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2142       search_regs.num_regs = 2;
2143     }
2144
2145   /* Clear out the other registers.  */
2146   for (i = 1; i < search_regs.num_regs; i++)
2147     {
2148       search_regs.start[i] = -1;
2149       search_regs.end[i] = -1;
2150     }
2151
2152   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2153   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2154   XSETBUFFER (last_thing_searched, current_buffer);
2155 }
2156 \f
2157 /* Given STRING, a string of words separated by word delimiters,
2158    compute a regexp that matches those exact words separated by
2159    arbitrary punctuation.  If LAX is nonzero, the end of the string
2160    need not match a word boundary unless it ends in whitespace.  */
2161
2162 static Lisp_Object
2163 wordify (string, lax)
2164      Lisp_Object string;
2165      int lax;
2166 {
2167   register unsigned char *p, *o;
2168   register int i, i_byte, len, punct_count = 0, word_count = 0;
2169   Lisp_Object val;
2170   int prev_c = 0;
2171   int adjust, whitespace_at_end;
2172
2173   CHECK_STRING (string);
2174   p = SDATA (string);
2175   len = SCHARS (string);
2176
2177   for (i = 0, i_byte = 0; i < len; )
2178     {
2179       int c;
2180
2181       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2182
2183       if (SYNTAX (c) != Sword)
2184         {
2185           punct_count++;
2186           if (i > 0 && SYNTAX (prev_c) == Sword)
2187             word_count++;
2188         }
2189
2190       prev_c = c;
2191     }
2192
2193   if (SYNTAX (prev_c) == Sword)
2194     {
2195       word_count++;
2196       whitespace_at_end = 0;
2197     }
2198   else
2199     whitespace_at_end = 1;
2200
2201   if (!word_count)
2202     return empty_unibyte_string;
2203
2204   adjust = - punct_count + 5 * (word_count - 1)
2205     + ((lax && !whitespace_at_end) ? 2 : 4);
2206   if (STRING_MULTIBYTE (string))
2207     val = make_uninit_multibyte_string (len + adjust,
2208                                         SBYTES (string)
2209                                         + adjust);
2210   else
2211     val = make_uninit_string (len + adjust);
2212
2213   o = SDATA (val);
2214   *o++ = '\\';
2215   *o++ = 'b';
2216   prev_c = 0;
2217
2218   for (i = 0, i_byte = 0; i < len; )
2219     {
2220       int c;
2221       int i_byte_orig = i_byte;
2222
2223       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2224
2225       if (SYNTAX (c) == Sword)
2226         {
2227           bcopy (SDATA (string) + i_byte_orig, o,
2228                  i_byte - i_byte_orig);
2229           o += i_byte - i_byte_orig;
2230         }
2231       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2232         {
2233           *o++ = '\\';
2234           *o++ = 'W';
2235           *o++ = '\\';
2236           *o++ = 'W';
2237           *o++ = '*';
2238         }
2239
2240       prev_c = c;
2241     }
2242
2243   if (!lax || whitespace_at_end)
2244     {
2245       *o++ = '\\';
2246       *o++ = 'b';
2247     }
2248
2249   return val;
2250 }
2251 \f
2252 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2253        "MSearch backward: ",
2254        doc: /* Search backward from point for STRING.
2255 Set point to the beginning of the occurrence found, and return point.
2256 An optional second argument bounds the search; it is a buffer position.
2257 The match found must not extend before that position.
2258 Optional third argument, if t, means if fail just return nil (no error).
2259  If not nil and not t, position at limit of search and return nil.
2260 Optional fourth argument is repeat count--search for successive occurrences.
2261
2262 Search case-sensitivity is determined by the value of the variable
2263 `case-fold-search', which see.
2264
2265 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2266      (string, bound, noerror, count)
2267      Lisp_Object string, bound, noerror, count;
2268 {
2269   return search_command (string, bound, noerror, count, -1, 0, 0);
2270 }
2271
2272 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2273        doc: /* Search forward from point for STRING.
2274 Set point to the end of the occurrence found, and return point.
2275 An optional second argument bounds the search; it is a buffer position.
2276 The match found must not extend after that position.  A value of nil is
2277   equivalent to (point-max).
2278 Optional third argument, if t, means if fail just return nil (no error).
2279   If not nil and not t, move to limit of search and return nil.
2280 Optional fourth argument is repeat count--search for successive occurrences.
2281
2282 Search case-sensitivity is determined by the value of the variable
2283 `case-fold-search', which see.
2284
2285 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2286      (string, bound, noerror, count)
2287      Lisp_Object string, bound, noerror, count;
2288 {
2289   return search_command (string, bound, noerror, count, 1, 0, 0);
2290 }
2291
2292 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2293        "sWord search backward: ",
2294        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2295 Set point to the beginning of the occurrence found, and return point.
2296 An optional second argument bounds the search; it is a buffer position.
2297 The match found must not extend before that position.
2298 Optional third argument, if t, means if fail just return nil (no error).
2299   If not nil and not t, move to limit of search and return nil.
2300 Optional fourth argument is repeat count--search for successive occurrences.  */)
2301      (string, bound, noerror, count)
2302      Lisp_Object string, bound, noerror, count;
2303 {
2304   return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
2305 }
2306
2307 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2308        "sWord search: ",
2309        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2310 Set point to the end of the occurrence found, and return point.
2311 An optional second argument bounds the search; it is a buffer position.
2312 The match found must not extend after that position.
2313 Optional third argument, if t, means if fail just return nil (no error).
2314   If not nil and not t, move to limit of search and return nil.
2315 Optional fourth argument is repeat count--search for successive occurrences.  */)
2316      (string, bound, noerror, count)
2317      Lisp_Object string, bound, noerror, count;
2318 {
2319   return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
2320 }
2321
2322 DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
2323        "sWord search backward: ",
2324        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2325 Set point to the beginning of the occurrence found, and return point.
2326
2327 Unlike `word-search-backward', the end of STRING need not match a word
2328 boundary unless it ends in whitespace.
2329
2330 An optional second argument bounds the search; it is a buffer position.
2331 The match found must not extend before that position.
2332 Optional third argument, if t, means if fail just return nil (no error).
2333   If not nil and not t, move to limit of search and return nil.
2334 Optional fourth argument is repeat count--search for successive occurrences.  */)
2335      (string, bound, noerror, count)
2336      Lisp_Object string, bound, noerror, count;
2337 {
2338   return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
2339 }
2340
2341 DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
2342        "sWord search: ",
2343        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2344 Set point to the end of the occurrence found, and return point.
2345
2346 Unlike `word-search-forward', the end of STRING need not match a word
2347 boundary unless it ends in whitespace.
2348
2349 An optional second argument bounds the search; it is a buffer position.
2350 The match found must not extend after that position.
2351 Optional third argument, if t, means if fail just return nil (no error).
2352   If not nil and not t, move to limit of search and return nil.
2353 Optional fourth argument is repeat count--search for successive occurrences.  */)
2354      (string, bound, noerror, count)
2355      Lisp_Object string, bound, noerror, count;
2356 {
2357   return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
2358 }
2359
2360 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2361        "sRE search backward: ",
2362        doc: /* Search backward from point for match for regular expression REGEXP.
2363 Set point to the beginning of the match, and return point.
2364 The match found is the one starting last in the buffer
2365 and yet ending before the origin of the search.
2366 An optional second argument bounds the search; it is a buffer position.
2367 The match found must start at or after that position.
2368 Optional third argument, if t, means if fail just return nil (no error).
2369   If not nil and not t, move to limit of search and return nil.
2370 Optional fourth argument is repeat count--search for successive occurrences.
2371 See also the functions `match-beginning', `match-end', `match-string',
2372 and `replace-match'.  */)
2373      (regexp, bound, noerror, count)
2374      Lisp_Object regexp, bound, noerror, count;
2375 {
2376   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2377 }
2378
2379 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2380        "sRE search: ",
2381        doc: /* Search forward from point for regular expression REGEXP.
2382 Set point to the end of the occurrence found, and return point.
2383 An optional second argument bounds the search; it is a buffer position.
2384 The match found must not extend after that position.
2385 Optional third argument, if t, means if fail just return nil (no error).
2386   If not nil and not t, move to limit of search and return nil.
2387 Optional fourth argument is repeat count--search for successive occurrences.
2388 See also the functions `match-beginning', `match-end', `match-string',
2389 and `replace-match'.  */)
2390      (regexp, bound, noerror, count)
2391      Lisp_Object regexp, bound, noerror, count;
2392 {
2393   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2394 }
2395
2396 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2397        "sPosix search backward: ",
2398        doc: /* Search backward from point for match for regular expression REGEXP.
2399 Find the longest match in accord with Posix regular expression rules.
2400 Set point to the beginning of the match, and return point.
2401 The match found is the one starting last in the buffer
2402 and yet ending before the origin of the search.
2403 An optional second argument bounds the search; it is a buffer position.
2404 The match found must start at or after that position.
2405 Optional third argument, if t, means if fail just return nil (no error).
2406   If not nil and not t, move to limit of search and return nil.
2407 Optional fourth argument is repeat count--search for successive occurrences.
2408 See also the functions `match-beginning', `match-end', `match-string',
2409 and `replace-match'.  */)
2410      (regexp, bound, noerror, count)
2411      Lisp_Object regexp, bound, noerror, count;
2412 {
2413   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2414 }
2415
2416 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2417        "sPosix search: ",
2418        doc: /* Search forward from point for regular expression REGEXP.
2419 Find the longest match in accord with Posix regular expression rules.
2420 Set point to the end of the occurrence found, and return point.
2421 An optional second argument bounds the search; it is a buffer position.
2422 The match found must not extend after that position.
2423 Optional third argument, if t, means if fail just return nil (no error).
2424   If not nil and not t, move to limit of search and return nil.
2425 Optional fourth argument is repeat count--search for successive occurrences.
2426 See also the functions `match-beginning', `match-end', `match-string',
2427 and `replace-match'.  */)
2428      (regexp, bound, noerror, count)
2429      Lisp_Object regexp, bound, noerror, count;
2430 {
2431   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2432 }
2433 \f
2434 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2435        doc: /* Replace text matched by last search with NEWTEXT.
2436 Leave point at the end of the replacement text.
2437
2438 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2439 Otherwise maybe capitalize the whole text, or maybe just word initials,
2440 based on the replaced text.
2441 If the replaced text has only capital letters
2442 and has at least one multiletter word, convert NEWTEXT to all caps.
2443 Otherwise if all words are capitalized in the replaced text,
2444 capitalize each word in NEWTEXT.
2445
2446 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2447 Otherwise treat `\\' as special:
2448   `\\&' in NEWTEXT means substitute original matched text.
2449   `\\N' means substitute what matched the Nth `\\(...\\)'.
2450        If Nth parens didn't match, substitute nothing.
2451   `\\\\' means insert one `\\'.
2452 Case conversion does not apply to these substitutions.
2453
2454 FIXEDCASE and LITERAL are optional arguments.
2455
2456 The optional fourth argument STRING can be a string to modify.
2457 This is meaningful when the previous match was done against STRING,
2458 using `string-match'.  When used this way, `replace-match'
2459 creates and returns a new string made by copying STRING and replacing
2460 the part of STRING that was matched.
2461
2462 The optional fifth argument SUBEXP specifies a subexpression;
2463 it says to replace just that subexpression with NEWTEXT,
2464 rather than replacing the entire matched text.
2465 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2466 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2467 NEWTEXT in place of subexp N.
2468 This is useful only after a regular expression search or match,
2469 since only regular expressions have distinguished subexpressions.  */)
2470      (newtext, fixedcase, literal, string, subexp)
2471      Lisp_Object newtext, fixedcase, literal, string, subexp;
2472 {
2473   enum { nochange, all_caps, cap_initial } case_action;
2474   register int pos, pos_byte;
2475   int some_multiletter_word;
2476   int some_lowercase;
2477   int some_uppercase;
2478   int some_nonuppercase_initial;
2479   register int c, prevc;
2480   int sub;
2481   int opoint, newpoint;
2482
2483   CHECK_STRING (newtext);
2484
2485   if (! NILP (string))
2486     CHECK_STRING (string);
2487
2488   case_action = nochange;       /* We tried an initialization */
2489                                 /* but some C compilers blew it */
2490
2491   if (search_regs.num_regs <= 0)
2492     error ("`replace-match' called before any match found");
2493
2494   if (NILP (subexp))
2495     sub = 0;
2496   else
2497     {
2498       CHECK_NUMBER (subexp);
2499       sub = XINT (subexp);
2500       if (sub < 0 || sub >= search_regs.num_regs)
2501         args_out_of_range (subexp, make_number (search_regs.num_regs));
2502     }
2503
2504   if (NILP (string))
2505     {
2506       if (search_regs.start[sub] < BEGV
2507           || search_regs.start[sub] > search_regs.end[sub]
2508           || search_regs.end[sub] > ZV)
2509         args_out_of_range (make_number (search_regs.start[sub]),
2510                            make_number (search_regs.end[sub]));
2511     }
2512   else
2513     {
2514       if (search_regs.start[sub] < 0
2515           || search_regs.start[sub] > search_regs.end[sub]
2516           || search_regs.end[sub] > SCHARS (string))
2517         args_out_of_range (make_number (search_regs.start[sub]),
2518                            make_number (search_regs.end[sub]));
2519     }
2520
2521   if (NILP (fixedcase))
2522     {
2523       /* Decide how to casify by examining the matched text. */
2524       int last;
2525
2526       pos = search_regs.start[sub];
2527       last = search_regs.end[sub];
2528
2529       if (NILP (string))
2530         pos_byte = CHAR_TO_BYTE (pos);
2531       else
2532         pos_byte = string_char_to_byte (string, pos);
2533
2534       prevc = '\n';
2535       case_action = all_caps;
2536
2537       /* some_multiletter_word is set nonzero if any original word
2538          is more than one letter long. */
2539       some_multiletter_word = 0;
2540       some_lowercase = 0;
2541       some_nonuppercase_initial = 0;
2542       some_uppercase = 0;
2543
2544       while (pos < last)
2545         {
2546           if (NILP (string))
2547             {
2548               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2549               INC_BOTH (pos, pos_byte);
2550             }
2551           else
2552             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2553
2554           if (LOWERCASEP (c))
2555             {
2556               /* Cannot be all caps if any original char is lower case */
2557
2558               some_lowercase = 1;
2559               if (SYNTAX (prevc) != Sword)
2560                 some_nonuppercase_initial = 1;
2561               else
2562                 some_multiletter_word = 1;
2563             }
2564           else if (UPPERCASEP (c))
2565             {
2566               some_uppercase = 1;
2567               if (SYNTAX (prevc) != Sword)
2568                 ;
2569               else
2570                 some_multiletter_word = 1;
2571             }
2572           else
2573             {
2574               /* If the initial is a caseless word constituent,
2575                  treat that like a lowercase initial.  */
2576               if (SYNTAX (prevc) != Sword)
2577                 some_nonuppercase_initial = 1;
2578             }
2579
2580           prevc = c;
2581         }
2582
2583       /* Convert to all caps if the old text is all caps
2584          and has at least one multiletter word.  */
2585       if (! some_lowercase && some_multiletter_word)
2586         case_action = all_caps;
2587       /* Capitalize each word, if the old text has all capitalized words.  */
2588       else if (!some_nonuppercase_initial && some_multiletter_word)
2589         case_action = cap_initial;
2590       else if (!some_nonuppercase_initial && some_uppercase)
2591         /* Should x -> yz, operating on X, give Yz or YZ?
2592            We'll assume the latter.  */
2593         case_action = all_caps;
2594       else
2595         case_action = nochange;
2596     }
2597
2598   /* Do replacement in a string.  */
2599   if (!NILP (string))
2600     {
2601       Lisp_Object before, after;
2602
2603       before = Fsubstring (string, make_number (0),
2604                            make_number (search_regs.start[sub]));
2605       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2606
2607       /* Substitute parts of the match into NEWTEXT
2608          if desired.  */
2609       if (NILP (literal))
2610         {
2611           int lastpos = 0;
2612           int lastpos_byte = 0;
2613           /* We build up the substituted string in ACCUM.  */
2614           Lisp_Object accum;
2615           Lisp_Object middle;
2616           int length = SBYTES (newtext);
2617
2618           accum = Qnil;
2619
2620           for (pos_byte = 0, pos = 0; pos_byte < length;)
2621             {
2622               int substart = -1;
2623               int subend = 0;
2624               int delbackslash = 0;
2625
2626               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2627
2628               if (c == '\\')
2629                 {
2630                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2631
2632                   if (c == '&')
2633                     {
2634                       substart = search_regs.start[sub];
2635                       subend = search_regs.end[sub];
2636                     }
2637                   else if (c >= '1' && c <= '9')
2638                     {
2639                       if (search_regs.start[c - '0'] >= 0
2640                           && c <= search_regs.num_regs + '0')
2641                         {
2642                           substart = search_regs.start[c - '0'];
2643                           subend = search_regs.end[c - '0'];
2644                         }
2645                       else
2646                         {
2647                           /* If that subexp did not match,
2648                              replace \\N with nothing.  */
2649                           substart = 0;
2650                           subend = 0;
2651                         }
2652                     }
2653                   else if (c == '\\')
2654                     delbackslash = 1;
2655                   else
2656                     error ("Invalid use of `\\' in replacement text");
2657                 }
2658               if (substart >= 0)
2659                 {
2660                   if (pos - 2 != lastpos)
2661                     middle = substring_both (newtext, lastpos,
2662                                              lastpos_byte,
2663                                              pos - 2, pos_byte - 2);
2664                   else
2665                     middle = Qnil;
2666                   accum = concat3 (accum, middle,
2667                                    Fsubstring (string,
2668                                                make_number (substart),
2669                                                make_number (subend)));
2670                   lastpos = pos;
2671                   lastpos_byte = pos_byte;
2672                 }
2673               else if (delbackslash)
2674                 {
2675                   middle = substring_both (newtext, lastpos,
2676                                            lastpos_byte,
2677                                            pos - 1, pos_byte - 1);
2678
2679                   accum = concat2 (accum, middle);
2680                   lastpos = pos;
2681                   lastpos_byte = pos_byte;
2682                 }
2683             }
2684
2685           if (pos != lastpos)
2686             middle = substring_both (newtext, lastpos,
2687                                      lastpos_byte,
2688                                      pos, pos_byte);
2689           else
2690             middle = Qnil;
2691
2692           newtext = concat2 (accum, middle);
2693         }
2694
2695       /* Do case substitution in NEWTEXT if desired.  */
2696       if (case_action == all_caps)
2697         newtext = Fupcase (newtext);
2698       else if (case_action == cap_initial)
2699         newtext = Fupcase_initials (newtext);
2700
2701       return concat3 (before, newtext, after);
2702     }
2703
2704   /* Record point, then move (quietly) to the start of the match.  */
2705   if (PT >= search_regs.end[sub])
2706     opoint = PT - ZV;
2707   else if (PT > search_regs.start[sub])
2708     opoint = search_regs.end[sub] - ZV;
2709   else
2710     opoint = PT;
2711
2712   /* If we want non-literal replacement,
2713      perform substitution on the replacement string.  */
2714   if (NILP (literal))
2715     {
2716       int length = SBYTES (newtext);
2717       unsigned char *substed;
2718       int substed_alloc_size, substed_len;
2719       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2720       int str_multibyte = STRING_MULTIBYTE (newtext);
2721       Lisp_Object rev_tbl;
2722       int really_changed = 0;
2723
2724       rev_tbl = Qnil;
2725
2726       substed_alloc_size = length * 2 + 100;
2727       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2728       substed_len = 0;
2729
2730       /* Go thru NEWTEXT, producing the actual text to insert in
2731          SUBSTED while adjusting multibyteness to that of the current
2732          buffer.  */
2733
2734       for (pos_byte = 0, pos = 0; pos_byte < length;)
2735         {
2736           unsigned char str[MAX_MULTIBYTE_LENGTH];
2737           unsigned char *add_stuff = NULL;
2738           int add_len = 0;
2739           int idx = -1;
2740
2741           if (str_multibyte)
2742             {
2743               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2744               if (!buf_multibyte)
2745                 c = multibyte_char_to_unibyte (c, rev_tbl);
2746             }
2747           else
2748             {
2749               /* Note that we don't have to increment POS.  */
2750               c = SREF (newtext, pos_byte++);
2751               if (buf_multibyte)
2752                 c = unibyte_char_to_multibyte (c);
2753             }
2754
2755           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2756              or set IDX to a match index, which means put that part
2757              of the buffer text into SUBSTED.  */
2758
2759           if (c == '\\')
2760             {
2761               really_changed = 1;
2762
2763               if (str_multibyte)
2764                 {
2765                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2766                                                       pos, pos_byte);
2767                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2768                     c = multibyte_char_to_unibyte (c, rev_tbl);
2769                 }
2770               else
2771                 {
2772                   c = SREF (newtext, pos_byte++);
2773                   if (buf_multibyte)
2774                     c = unibyte_char_to_multibyte (c);
2775                 }
2776
2777               if (c == '&')
2778                 idx = sub;
2779               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2780                 {
2781                   if (search_regs.start[c - '0'] >= 1)
2782                     idx = c - '0';
2783                 }
2784               else if (c == '\\')
2785                 add_len = 1, add_stuff = "\\";
2786               else
2787                 {
2788                   xfree (substed);
2789                   error ("Invalid use of `\\' in replacement text");
2790                 }
2791             }
2792           else
2793             {
2794               add_len = CHAR_STRING (c, str);
2795               add_stuff = str;
2796             }
2797
2798           /* If we want to copy part of a previous match,
2799              set up ADD_STUFF and ADD_LEN to point to it.  */
2800           if (idx >= 0)
2801             {
2802               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2803               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2804               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2805                 move_gap (search_regs.start[idx]);
2806               add_stuff = BYTE_POS_ADDR (begbyte);
2807             }
2808
2809           /* Now the stuff we want to add to SUBSTED
2810              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2811
2812           /* Make sure SUBSTED is big enough.  */
2813           if (substed_len + add_len >= substed_alloc_size)
2814             {
2815               substed_alloc_size = substed_len + add_len + 500;
2816               substed = (unsigned char *) xrealloc (substed,
2817                                                     substed_alloc_size + 1);
2818             }
2819
2820           /* Now add to the end of SUBSTED.  */
2821           if (add_stuff)
2822             {
2823               bcopy (add_stuff, substed + substed_len, add_len);
2824               substed_len += add_len;
2825             }
2826         }
2827
2828       if (really_changed)
2829         {
2830           if (buf_multibyte)
2831             {
2832               int nchars = multibyte_chars_in_text (substed, substed_len);
2833
2834               newtext = make_multibyte_string (substed, nchars, substed_len);
2835             }
2836           else
2837             newtext = make_unibyte_string (substed, substed_len);
2838         }
2839       xfree (substed);
2840     }
2841
2842   /* Replace the old text with the new in the cleanest possible way.  */
2843   replace_range (search_regs.start[sub], search_regs.end[sub],
2844                  newtext, 1, 0, 1);
2845   newpoint = search_regs.start[sub] + SCHARS (newtext);
2846
2847   if (case_action == all_caps)
2848     Fupcase_region (make_number (search_regs.start[sub]),
2849                     make_number (newpoint));
2850   else if (case_action == cap_initial)
2851     Fupcase_initials_region (make_number (search_regs.start[sub]),
2852                              make_number (newpoint));
2853
2854   /* Adjust search data for this change.  */
2855   {
2856     int oldend = search_regs.end[sub];
2857     int oldstart = search_regs.start[sub];
2858     int change = newpoint - search_regs.end[sub];
2859     int i;
2860
2861     for (i = 0; i < search_regs.num_regs; i++)
2862       {
2863         if (search_regs.start[i] >= oldend)
2864           search_regs.start[i] += change;
2865         else if (search_regs.start[i] > oldstart)
2866           search_regs.start[i] = oldstart;
2867         if (search_regs.end[i] >= oldend)
2868           search_regs.end[i] += change;
2869         else if (search_regs.end[i] > oldstart)
2870           search_regs.end[i] = oldstart;
2871       }
2872   }
2873
2874   /* Put point back where it was in the text.  */
2875   if (opoint <= 0)
2876     TEMP_SET_PT (opoint + ZV);
2877   else
2878     TEMP_SET_PT (opoint);
2879
2880   /* Now move point "officially" to the start of the inserted replacement.  */
2881   move_if_not_intangible (newpoint);
2882
2883   return Qnil;
2884 }
2885 \f
2886 static Lisp_Object
2887 match_limit (num, beginningp)
2888      Lisp_Object num;
2889      int beginningp;
2890 {
2891   register int n;
2892
2893   CHECK_NUMBER (num);
2894   n = XINT (num);
2895   if (n < 0)
2896     args_out_of_range (num, make_number (0));
2897   if (search_regs.num_regs <= 0)
2898     error ("No match data, because no search succeeded");
2899   if (n >= search_regs.num_regs
2900       || search_regs.start[n] < 0)
2901     return Qnil;
2902   return (make_number ((beginningp) ? search_regs.start[n]
2903                                     : search_regs.end[n]));
2904 }
2905
2906 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2907        doc: /* Return position of start of text matched by last search.
2908 SUBEXP, a number, specifies which parenthesized expression in the last
2909   regexp.
2910 Value is nil if SUBEXPth pair didn't match, or there were less than
2911   SUBEXP pairs.
2912 Zero means the entire text matched by the whole regexp or whole string.  */)
2913      (subexp)
2914      Lisp_Object subexp;
2915 {
2916   return match_limit (subexp, 1);
2917 }
2918
2919 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2920        doc: /* Return position of end of text matched by last search.
2921 SUBEXP, a number, specifies which parenthesized expression in the last
2922   regexp.
2923 Value is nil if SUBEXPth pair didn't match, or there were less than
2924   SUBEXP pairs.
2925 Zero means the entire text matched by the whole regexp or whole string.  */)
2926      (subexp)
2927      Lisp_Object subexp;
2928 {
2929   return match_limit (subexp, 0);
2930 }
2931
2932 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2933        doc: /* Return a list containing all info on what the last search matched.
2934 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2935 All the elements are markers or nil (nil if the Nth pair didn't match)
2936 if the last match was on a buffer; integers or nil if a string was matched.
2937 Use `set-match-data' to reinstate the data in this list.
2938
2939 If INTEGERS (the optional first argument) is non-nil, always use
2940 integers \(rather than markers) to represent buffer positions.  In
2941 this case, and if the last match was in a buffer, the buffer will get
2942 stored as one additional element at the end of the list.
2943
2944 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2945 enough to hold all the values, and if INTEGERS is non-nil, no consing
2946 is done.
2947
2948 If optional third arg RESEAT is non-nil, any previous markers on the
2949 REUSE list will be modified to point to nowhere.
2950
2951 Return value is undefined if the last search failed.  */)
2952   (integers, reuse, reseat)
2953      Lisp_Object integers, reuse, reseat;
2954 {
2955   Lisp_Object tail, prev;
2956   Lisp_Object *data;
2957   int i, len;
2958
2959   if (!NILP (reseat))
2960     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2961       if (MARKERP (XCAR (tail)))
2962         {
2963           unchain_marker (XMARKER (XCAR (tail)));
2964           XSETCAR (tail, Qnil);
2965         }
2966
2967   if (NILP (last_thing_searched))
2968     return Qnil;
2969
2970   prev = Qnil;
2971
2972   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2973                                  * sizeof (Lisp_Object));
2974
2975   len = 0;
2976   for (i = 0; i < search_regs.num_regs; i++)
2977     {
2978       int start = search_regs.start[i];
2979       if (start >= 0)
2980         {
2981           if (EQ (last_thing_searched, Qt)
2982               || ! NILP (integers))
2983             {
2984               XSETFASTINT (data[2 * i], start);
2985               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2986             }
2987           else if (BUFFERP (last_thing_searched))
2988             {
2989               data[2 * i] = Fmake_marker ();
2990               Fset_marker (data[2 * i],
2991                            make_number (start),
2992                            last_thing_searched);
2993               data[2 * i + 1] = Fmake_marker ();
2994               Fset_marker (data[2 * i + 1],
2995                            make_number (search_regs.end[i]),
2996                            last_thing_searched);
2997             }
2998           else
2999             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
3000             abort ();
3001
3002           len = 2 * i + 2;
3003         }
3004       else
3005         data[2 * i] = data[2 * i + 1] = Qnil;
3006     }
3007
3008   if (BUFFERP (last_thing_searched) && !NILP (integers))
3009     {
3010       data[len] = last_thing_searched;
3011       len++;
3012     }
3013
3014   /* If REUSE is not usable, cons up the values and return them.  */
3015   if (! CONSP (reuse))
3016     return Flist (len, data);
3017
3018   /* If REUSE is a list, store as many value elements as will fit
3019      into the elements of REUSE.  */
3020   for (i = 0, tail = reuse; CONSP (tail);
3021        i++, tail = XCDR (tail))
3022     {
3023       if (i < len)
3024         XSETCAR (tail, data[i]);
3025       else
3026         XSETCAR (tail, Qnil);
3027       prev = tail;
3028     }
3029
3030   /* If we couldn't fit all value elements into REUSE,
3031      cons up the rest of them and add them to the end of REUSE.  */
3032   if (i < len)
3033     XSETCDR (prev, Flist (len - i, data + i));
3034
3035   return reuse;
3036 }
3037
3038 /* We used to have an internal use variant of `reseat' described as:
3039
3040       If RESEAT is `evaporate', put the markers back on the free list
3041       immediately.  No other references to the markers must exist in this
3042       case, so it is used only internally on the unwind stack and
3043       save-match-data from Lisp.
3044
3045    But it was ill-conceived: those supposedly-internal markers get exposed via
3046    the undo-list, so freeing them here is unsafe.  */
3047
3048 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
3049        doc: /* Set internal data on last search match from elements of LIST.
3050 LIST should have been created by calling `match-data' previously.
3051
3052 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
3053     (list, reseat)
3054      register Lisp_Object list, reseat;
3055 {
3056   register int i;
3057   register Lisp_Object marker;
3058
3059   if (running_asynch_code)
3060     save_search_regs ();
3061
3062   CHECK_LIST (list);
3063
3064   /* Unless we find a marker with a buffer or an explicit buffer
3065      in LIST, assume that this match data came from a string.  */
3066   last_thing_searched = Qt;
3067
3068   /* Allocate registers if they don't already exist.  */
3069   {
3070     int length = XFASTINT (Flength (list)) / 2;
3071
3072     if (length > search_regs.num_regs)
3073       {
3074         if (search_regs.num_regs == 0)
3075           {
3076             search_regs.start
3077               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3078             search_regs.end
3079               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3080           }
3081         else
3082           {
3083             search_regs.start
3084               = (regoff_t *) xrealloc (search_regs.start,
3085                                        length * sizeof (regoff_t));
3086             search_regs.end
3087               = (regoff_t *) xrealloc (search_regs.end,
3088                                        length * sizeof (regoff_t));
3089           }
3090
3091         for (i = search_regs.num_regs; i < length; i++)
3092           search_regs.start[i] = -1;
3093
3094         search_regs.num_regs = length;
3095       }
3096
3097     for (i = 0; CONSP (list); i++)
3098       {
3099         marker = XCAR (list);
3100         if (BUFFERP (marker))
3101           {
3102             last_thing_searched = marker;
3103             break;
3104           }
3105         if (i >= length)
3106           break;
3107         if (NILP (marker))
3108           {
3109             search_regs.start[i] = -1;
3110             list = XCDR (list);
3111           }
3112         else
3113           {
3114             int from;
3115             Lisp_Object m;
3116
3117             m = marker;
3118             if (MARKERP (marker))
3119               {
3120                 if (XMARKER (marker)->buffer == 0)
3121                   XSETFASTINT (marker, 0);
3122                 else
3123                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3124               }
3125
3126             CHECK_NUMBER_COERCE_MARKER (marker);
3127             from = XINT (marker);
3128
3129             if (!NILP (reseat) && MARKERP (m))
3130               {
3131                 unchain_marker (XMARKER (m));
3132                 XSETCAR (list, Qnil);
3133               }
3134
3135             if ((list = XCDR (list), !CONSP (list)))
3136               break;
3137
3138             m = marker = XCAR (list);
3139
3140             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3141               XSETFASTINT (marker, 0);
3142
3143             CHECK_NUMBER_COERCE_MARKER (marker);
3144             search_regs.start[i] = from;
3145             search_regs.end[i] = XINT (marker);
3146
3147             if (!NILP (reseat) && MARKERP (m))
3148               {
3149                 unchain_marker (XMARKER (m));
3150                 XSETCAR (list, Qnil);
3151               }
3152           }
3153         list = XCDR (list);
3154       }
3155
3156     for (; i < search_regs.num_regs; i++)
3157       search_regs.start[i] = -1;
3158   }
3159
3160   return Qnil;
3161 }
3162
3163 /* If non-zero the match data have been saved in saved_search_regs
3164    during the execution of a sentinel or filter. */
3165 static int search_regs_saved;
3166 static struct re_registers saved_search_regs;
3167 static Lisp_Object saved_last_thing_searched;
3168
3169 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3170    if asynchronous code (filter or sentinel) is running. */
3171 static void
3172 save_search_regs ()
3173 {
3174   if (!search_regs_saved)
3175     {
3176       saved_search_regs.num_regs = search_regs.num_regs;
3177       saved_search_regs.start = search_regs.start;
3178       saved_search_regs.end = search_regs.end;
3179       saved_last_thing_searched = last_thing_searched;
3180       last_thing_searched = Qnil;
3181       search_regs.num_regs = 0;
3182       search_regs.start = 0;
3183       search_regs.end = 0;
3184
3185       search_regs_saved = 1;
3186     }
3187 }
3188
3189 /* Called upon exit from filters and sentinels. */
3190 void
3191 restore_search_regs ()
3192 {
3193   if (search_regs_saved)
3194     {
3195       if (search_regs.num_regs > 0)
3196         {
3197           xfree (search_regs.start);
3198           xfree (search_regs.end);
3199         }
3200       search_regs.num_regs = saved_search_regs.num_regs;
3201       search_regs.start = saved_search_regs.start;
3202       search_regs.end = saved_search_regs.end;
3203       last_thing_searched = saved_last_thing_searched;
3204       saved_last_thing_searched = Qnil;
3205       search_regs_saved = 0;
3206     }
3207 }
3208
3209 static Lisp_Object
3210 unwind_set_match_data (list)
3211      Lisp_Object list;
3212 {
3213   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3214   return Fset_match_data (list, Qt);
3215 }
3216
3217 /* Called to unwind protect the match data.  */
3218 void
3219 record_unwind_save_match_data ()
3220 {
3221   record_unwind_protect (unwind_set_match_data,
3222                          Fmatch_data (Qnil, Qnil, Qnil));
3223 }
3224
3225 /* Quote a string to inactivate reg-expr chars */
3226
3227 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3228        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3229      (string)
3230      Lisp_Object string;
3231 {
3232   register unsigned char *in, *out, *end;
3233   register unsigned char *temp;
3234   int backslashes_added = 0;
3235
3236   CHECK_STRING (string);
3237
3238   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3239
3240   /* Now copy the data into the new string, inserting escapes. */
3241
3242   in = SDATA (string);
3243   end = in + SBYTES (string);
3244   out = temp;
3245
3246   for (; in != end; in++)
3247     {
3248       if (*in == '['
3249           || *in == '*' || *in == '.' || *in == '\\'
3250           || *in == '?' || *in == '+'
3251           || *in == '^' || *in == '$')
3252         *out++ = '\\', backslashes_added++;
3253       *out++ = *in;
3254     }
3255
3256   return make_specified_string (temp,
3257                                 SCHARS (string) + backslashes_added,
3258                                 out - temp,
3259                                 STRING_MULTIBYTE (string));
3260 }
3261 \f
3262 void
3263 syms_of_search ()
3264 {
3265   register int i;
3266
3267   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3268     {
3269       searchbufs[i].buf.allocated = 100;
3270       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3271       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3272       searchbufs[i].regexp = Qnil;
3273       searchbufs[i].whitespace_regexp = Qnil;
3274       searchbufs[i].syntax_table = Qnil;
3275       staticpro (&searchbufs[i].regexp);
3276       staticpro (&searchbufs[i].whitespace_regexp);
3277       staticpro (&searchbufs[i].syntax_table);
3278       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3279     }
3280   searchbuf_head = &searchbufs[0];
3281
3282   Qsearch_failed = intern ("search-failed");
3283   staticpro (&Qsearch_failed);
3284   Qinvalid_regexp = intern ("invalid-regexp");
3285   staticpro (&Qinvalid_regexp);
3286
3287   Fput (Qsearch_failed, Qerror_conditions,
3288         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3289   Fput (Qsearch_failed, Qerror_message,
3290         build_string ("Search failed"));
3291
3292   Fput (Qinvalid_regexp, Qerror_conditions,
3293         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3294   Fput (Qinvalid_regexp, Qerror_message,
3295         build_string ("Invalid regexp"));
3296
3297   last_thing_searched = Qnil;
3298   staticpro (&last_thing_searched);
3299
3300   saved_last_thing_searched = Qnil;
3301   staticpro (&saved_last_thing_searched);
3302
3303   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3304       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3305 Some commands use this for user-specified regexps.
3306 Spaces that occur inside character classes or repetition operators
3307 or other such regexp constructs are not replaced with this.
3308 A value of nil (which is the normal value) means treat spaces literally.  */);
3309   Vsearch_spaces_regexp = Qnil;
3310
3311   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3312       doc: /* Internal use only.
3313 If non-nil, the primitive searching and matching functions
3314 such as `looking-at', `string-match', `re-search-forward', etc.,
3315 do not set the match data.  The proper way to use this variable
3316 is to bind it with `let' around a small expression.  */);
3317   Vinhibit_changing_match_data = Qnil;
3318
3319   defsubr (&Slooking_at);
3320   defsubr (&Sposix_looking_at);
3321   defsubr (&Sstring_match);
3322   defsubr (&Sposix_string_match);
3323   defsubr (&Ssearch_forward);
3324   defsubr (&Ssearch_backward);
3325   defsubr (&Sword_search_forward);
3326   defsubr (&Sword_search_backward);
3327   defsubr (&Sword_search_forward_lax);
3328   defsubr (&Sword_search_backward_lax);
3329   defsubr (&Sre_search_forward);
3330   defsubr (&Sre_search_backward);
3331   defsubr (&Sposix_search_forward);
3332   defsubr (&Sposix_search_backward);
3333   defsubr (&Sreplace_match);
3334   defsubr (&Smatch_beginning);
3335   defsubr (&Smatch_end);
3336   defsubr (&Smatch_data);
3337   defsubr (&Sset_match_data);
3338   defsubr (&Sregexp_quote);
3339 }
3340
3341 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3342    (do not change this comment) */