src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "charset.h"
  28 #include "region-cache.h"
  29 #include "commands.h"
  30 #include "blockinput.h"
  31 #include "intervals.h"
  32
  33 #include <sys/types.h>
  34 #include "regex.h"
  35
  36 #define REGEXP_CACHE_SIZE 20
  37
  38 /* If the regexp is non-nil, then the buffer contains the compiled form
  39    of that regexp, suitable for searching.  */
  40 struct regexp_cache
  41 {
  42   struct regexp_cache *next;
  43   Lisp_Object regexp;
  44   struct re_pattern_buffer buf;
  45   char fastmap[0400];
  46   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  47   char posix;
  48 };
  49
  50 /* The instances of that struct.  */
  51 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  52
  53 /* The head of the linked list; points to the most recently used buffer.  */
  54 struct regexp_cache *searchbuf_head;
  55
  56
  57 /* Every call to re_match, etc., must pass &search_regs as the regs
  58    argument unless you can show it is unnecessary (i.e., if re_match
  59    is certainly going to be called again before region-around-match
  60    can be called).
  61
  62    Since the registers are now dynamically allocated, we need to make
  63    sure not to refer to the Nth register before checking that it has
  64    been allocated by checking search_regs.num_regs.
  65
  66    The regex code keeps track of whether it has allocated the search
  67    buffer using bits in the re_pattern_buffer.  This means that whenever
  68    you compile a new pattern, it completely forgets whether it has
  69    allocated any registers, and will allocate new registers the next
  70    time you call a searching or matching function.  Therefore, we need
  71    to call re_set_registers after compiling a new pattern or after
  72    setting the match registers, so that the regex functions will be
  73    able to free or re-allocate it properly.  */
  74 static struct re_registers search_regs;
  75
  76 /* The buffer in which the last search was performed, or
  77    Qt if the last search was done in a string;
  78    Qnil if no searching has been done yet.  */
  79 static Lisp_Object last_thing_searched;
  80
  81 /* error condition signaled when regexp compile_pattern fails */
  82
  83 Lisp_Object Qinvalid_regexp;
  84
  85 static void set_search_regs ();
  86 static void save_search_regs ();
  87
  88 static int search_buffer ();
  89
  90 static void
  91 matcher_overflow ()
  92 {
  93   error ("Stack overflow in regexp matcher");
  94 }
  95
  96 #ifdef __STDC__
  97 #define CONST const
  98 #else
  99 #define CONST
 100 #endif
 101
 102 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 103    PATTERN is the pattern to compile.
 104    CP is the place to put the result.
 105    TRANSLATE is a translation table for ignoring case, or NULL for none.
 106    REGP is the structure that says where to store the "register"
 107    values that will result from matching this pattern.
 108    If it is 0, we should compile the pattern not to record any
 109    subexpression bounds.
 110    POSIX is nonzero if we want full backtracking (POSIX style)
 111    for this pattern.  0 means backtrack only enough to get a valid match.
 112    MULTIBYTE is nonzero if we want to handle multibyte characters in
 113    PATTERN.  0 means all multibyte characters are recognized just as
 114    sequences of binary data.  */
 115
 116 static void
 117 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 118      struct regexp_cache *cp;
 119      Lisp_Object pattern;
 120      Lisp_Object *translate;
 121      struct re_registers *regp;
 122      int posix;
 123      int multibyte;
 124 {
 125   char *raw_pattern;
 126   int raw_pattern_size;
 127   char *val;
 128   reg_syntax_t old;
 129
 130   /* MULTIBYTE says whether the text to be searched is multibyte.
 131      We must convert PATTERN to match that, or we will not really
 132      find things right.  */
 133
 134   if (multibyte == STRING_MULTIBYTE (pattern))
 135     {
 136       raw_pattern = (char *) XSTRING (pattern)->data;
 137       raw_pattern_size = XSTRING (pattern)->size_byte;
 138     }
 139   else if (multibyte)
 140     {
 141       raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
 142                                                   XSTRING (pattern)->size);
 143       raw_pattern = (char *) alloca (raw_pattern_size + 1);
 144       copy_text (XSTRING (pattern)->data, raw_pattern,
 145                  XSTRING (pattern)->size, 0, 1);
 146     }
 147   else
 148     {
 149       /* Converting multibyte to single-byte.
 150
 151          ??? Perhaps this conversion should be done in a special way
 152          by subtracting nonascii-insert-offset from each non-ASCII char,
 153          so that only the multibyte chars which really correspond to
 154          the chosen single-byte character set can possibly match.  */
 155       raw_pattern_size = XSTRING (pattern)->size;
 156       raw_pattern = (char *) alloca (raw_pattern_size + 1);
 157       copy_text (XSTRING (pattern)->data, raw_pattern,
 158                  XSTRING (pattern)->size, 1, 0);
 159     }
 160
 161   cp->regexp = Qnil;
 162   cp->buf.translate = translate;
 163   cp->posix = posix;
 164   cp->buf.multibyte = multibyte;
 165   BLOCK_INPUT;
 166   old = re_set_syntax (RE_SYNTAX_EMACS
 167                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 168   val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf);
 169   re_set_syntax (old);
 170   UNBLOCK_INPUT;
 171   if (val)
 172     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 173
 174   cp->regexp = Fcopy_sequence (pattern);
 175 }
 176
 177 /* Compile a regexp if necessary, but first check to see if there's one in
 178    the cache.
 179    PATTERN is the pattern to compile.
 180    TRANSLATE is a translation table for ignoring case, or NULL for none.
 181    REGP is the structure that says where to store the "register"
 182    values that will result from matching this pattern.
 183    If it is 0, we should compile the pattern not to record any
 184    subexpression bounds.
 185    POSIX is nonzero if we want full backtracking (POSIX style)
 186    for this pattern.  0 means backtrack only enough to get a valid match.  */
 187
 188 struct re_pattern_buffer *
 189 compile_pattern (pattern, regp, translate, posix, multibyte)
 190      Lisp_Object pattern;
 191      struct re_registers *regp;
 192      Lisp_Object *translate;
 193      int posix, multibyte;
 194 {
 195   struct regexp_cache *cp, **cpp;
 196
 197   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 198     {
 199       cp = *cpp;
 200       if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
 201           && !NILP (Fstring_equal (cp->regexp, pattern))
 202           && cp->buf.translate == translate
 203           && cp->posix == posix
 204           && cp->buf.multibyte == multibyte)
 205         break;
 206
 207       /* If we're at the end of the cache, compile into the last cell.  */
 208       if (cp->next == 0)
 209         {
 210           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 211           break;
 212         }
 213     }
 214
 215   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 216      either because we found it in the cache or because we just compiled it.
 217      Move it to the front of the queue to mark it as most recently used.  */
 218   *cpp = cp->next;
 219   cp->next = searchbuf_head;
 220   searchbuf_head = cp;
 221
 222   /* Advise the searching functions about the space we have allocated
 223      for register data.  */
 224   if (regp)
 225     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 226
 227   return &cp->buf;
 228 }
 229
 230 /* Error condition used for failing searches */
 231 Lisp_Object Qsearch_failed;
 232
 233 Lisp_Object
 234 signal_failure (arg)
 235      Lisp_Object arg;
 236 {
 237   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 238   return Qnil;
 239 }
 240 \f
 241 static Lisp_Object
 242 looking_at_1 (string, posix)
 243      Lisp_Object string;
 244      int posix;
 245 {
 246   Lisp_Object val;
 247   unsigned char *p1, *p2;
 248   int s1, s2;
 249   register int i;
 250   struct re_pattern_buffer *bufp;
 251
 252   if (running_asynch_code)
 253     save_search_regs ();
 254
 255   CHECK_STRING (string, 0);
 256   bufp = compile_pattern (string, &search_regs,
 257                           (!NILP (current_buffer->case_fold_search)
 258                            ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
 259                           posix,
 260                           !NILP (current_buffer->enable_multibyte_characters));
 261
 262   immediate_quit = 1;
 263   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 264
 265   /* Get pointers and sizes of the two strings
 266      that make up the visible portion of the buffer. */
 267
 268   p1 = BEGV_ADDR;
 269   s1 = GPT_BYTE - BEGV_BYTE;
 270   p2 = GAP_END_ADDR;
 271   s2 = ZV_BYTE - GPT_BYTE;
 272   if (s1 < 0)
 273     {
 274       p2 = p1;
 275       s2 = ZV_BYTE - BEGV_BYTE;
 276       s1 = 0;
 277     }
 278   if (s2 < 0)
 279     {
 280       s1 = ZV_BYTE - BEGV_BYTE;
 281       s2 = 0;
 282     }
 283
 284   re_match_object = Qnil;
 285
 286   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 287                   PT_BYTE - BEGV_BYTE, &search_regs,
 288                   ZV_BYTE - BEGV_BYTE);
 289   if (i == -2)
 290     matcher_overflow ();
 291
 292   val = (0 <= i ? Qt : Qnil);
 293   if (i >= 0)
 294     for (i = 0; i < search_regs.num_regs; i++)
 295       if (search_regs.start[i] >= 0)
 296         {
 297           search_regs.start[i]
 298             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 299           search_regs.end[i]
 300             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 301         }
 302   XSETBUFFER (last_thing_searched, current_buffer);
 303   immediate_quit = 0;
 304   return val;
 305 }
 306
 307 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 308   "Return t if text after point matches regular expression REGEXP.\n\
 309 This function modifies the match data that `match-beginning',\n\
 310 `match-end' and `match-data' access; save and restore the match\n\
 311 data if you want to preserve them.")
 312   (regexp)
 313      Lisp_Object regexp;
 314 {
 315   return looking_at_1 (regexp, 0);
 316 }
 317
 318 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 319   "Return t if text after point matches regular expression REGEXP.\n\
 320 Find the longest match, in accord with Posix regular expression rules.\n\
 321 This function modifies the match data that `match-beginning',\n\
 322 `match-end' and `match-data' access; save and restore the match\n\
 323 data if you want to preserve them.")
 324   (regexp)
 325      Lisp_Object regexp;
 326 {
 327   return looking_at_1 (regexp, 1);
 328 }
 329 \f
 330 static Lisp_Object
 331 string_match_1 (regexp, string, start, posix)
 332      Lisp_Object regexp, string, start;
 333      int posix;
 334 {
 335   int val;
 336   struct re_pattern_buffer *bufp;
 337   int pos, pos_byte;
 338   int i;
 339
 340   if (running_asynch_code)
 341     save_search_regs ();
 342
 343   CHECK_STRING (regexp, 0);
 344   CHECK_STRING (string, 1);
 345
 346   if (NILP (start))
 347     pos = 0, pos_byte = 0;
 348   else
 349     {
 350       int len = XSTRING (string)->size;
 351
 352       CHECK_NUMBER (start, 2);
 353       pos = XINT (start);
 354       if (pos < 0 && -pos <= len)
 355         pos = len + pos;
 356       else if (0 > pos || pos > len)
 357         args_out_of_range (string, start);
 358       pos_byte = string_char_to_byte (string, pos);
 359     }
 360
 361   bufp = compile_pattern (regexp, &search_regs,
 362                           (!NILP (current_buffer->case_fold_search)
 363                            ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
 364                           posix,
 365                           STRING_MULTIBYTE (string));
 366   immediate_quit = 1;
 367   re_match_object = string;
 368
 369   val = re_search (bufp, (char *) XSTRING (string)->data,
 370                    XSTRING (string)->size_byte, pos_byte,
 371                    XSTRING (string)->size_byte - pos_byte,
 372                    &search_regs);
 373   immediate_quit = 0;
 374   last_thing_searched = Qt;
 375   if (val == -2)
 376     matcher_overflow ();
 377   if (val < 0) return Qnil;
 378
 379   for (i = 0; i < search_regs.num_regs; i++)
 380     if (search_regs.start[i] >= 0)
 381       {
 382         search_regs.start[i]
 383           = string_byte_to_char (string, search_regs.start[i]);
 384         search_regs.end[i]
 385           = string_byte_to_char (string, search_regs.end[i]);
 386       }
 387
 388   return make_number (string_byte_to_char (string, val));
 389 }
 390
 391 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 392   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 393 If third arg START is non-nil, start search at that index in STRING.\n\
 394 For index of first char beyond the match, do (match-end 0).\n\
 395 `match-end' and `match-beginning' also give indices of substrings\n\
 396 matched by parenthesis constructs in the pattern.")
 397   (regexp, string, start)
 398      Lisp_Object regexp, string, start;
 399 {
 400   return string_match_1 (regexp, string, start, 0);
 401 }
 402
 403 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 404   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 405 Find the longest match, in accord with Posix regular expression rules.\n\
 406 If third arg START is non-nil, start search at that index in STRING.\n\
 407 For index of first char beyond the match, do (match-end 0).\n\
 408 `match-end' and `match-beginning' also give indices of substrings\n\
 409 matched by parenthesis constructs in the pattern.")
 410   (regexp, string, start)
 411      Lisp_Object regexp, string, start;
 412 {
 413   return string_match_1 (regexp, string, start, 1);
 414 }
 415
 416 /* Match REGEXP against STRING, searching all of STRING,
 417    and return the index of the match, or negative on failure.
 418    This does not clobber the match data.  */
 419
 420 int
 421 fast_string_match (regexp, string)
 422      Lisp_Object regexp, string;
 423 {
 424   int val;
 425   struct re_pattern_buffer *bufp;
 426
 427   bufp = compile_pattern (regexp, 0, 0, 0, STRING_MULTIBYTE (string));
 428   immediate_quit = 1;
 429   re_match_object = string;
 430
 431   val = re_search (bufp, (char *) XSTRING (string)->data,
 432                    XSTRING (string)->size, 0, XSTRING (string)->size,
 433                    0);
 434   immediate_quit = 0;
 435   return val;
 436 }
 437
 438 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 439    and return the index of the match, or negative on failure.
 440    This does not clobber the match data.
 441    We assume that STRING contains single-byte characters.  */
 442
 443 extern Lisp_Object Vascii_downcase_table;
 444
 445 int
 446 fast_c_string_match_ignore_case (regexp, string)
 447      Lisp_Object regexp;
 448      char *string;
 449 {
 450   int val;
 451   struct re_pattern_buffer *bufp;
 452   int len = strlen (string);
 453
 454   regexp = string_make_unibyte (regexp);
 455   re_match_object = Qt;
 456   bufp = compile_pattern (regexp, 0,
 457                           XCHAR_TABLE (Vascii_downcase_table)->contents, 0,
 458                           0);
 459   immediate_quit = 1;
 460   val = re_search (bufp, string, len, 0, len, 0);
 461   immediate_quit = 0;
 462   return val;
 463 }
 464 \f
 465 /* max and min.  */
 466
 467 static int
 468 max (a, b)
 469      int a, b;
 470 {
 471   return ((a > b) ? a : b);
 472 }
 473
 474 static int
 475 min (a, b)
 476      int a, b;
 477 {
 478   return ((a < b) ? a : b);
 479 }
 480
 481 \f
 482 /* The newline cache: remembering which sections of text have no newlines.  */
 483
 484 /* If the user has requested newline caching, make sure it's on.
 485    Otherwise, make sure it's off.
 486    This is our cheezy way of associating an action with the change of
 487    state of a buffer-local variable.  */
 488 static void
 489 newline_cache_on_off (buf)
 490      struct buffer *buf;
 491 {
 492   if (NILP (buf->cache_long_line_scans))
 493     {
 494       /* It should be off.  */
 495       if (buf->newline_cache)
 496         {
 497           free_region_cache (buf->newline_cache);
 498           buf->newline_cache = 0;
 499         }
 500     }
 501   else
 502     {
 503       /* It should be on.  */
 504       if (buf->newline_cache == 0)
 505         buf->newline_cache = new_region_cache ();
 506     }
 507 }
 508
 509 \f
 510 /* Search for COUNT instances of the character TARGET between START and END.
 511
 512    If COUNT is positive, search forwards; END must be >= START.
 513    If COUNT is negative, search backwards for the -COUNTth instance;
 514       END must be <= START.
 515    If COUNT is zero, do anything you please; run rogue, for all I care.
 516
 517    If END is zero, use BEGV or ZV instead, as appropriate for the
 518    direction indicated by COUNT.
 519
 520    If we find COUNT instances, set *SHORTAGE to zero, and return the
 521    position after the COUNTth match.  Note that for reverse motion
 522    this is not the same as the usual convention for Emacs motion commands.
 523
 524    If we don't find COUNT instances before reaching END, set *SHORTAGE
 525    to the number of TARGETs left unfound, and return END.
 526
 527    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 528    except when inside redisplay.  */
 529
 530 scan_buffer (target, start, end, count, shortage, allow_quit)
 531      register int target;
 532      int start, end;
 533      int count;
 534      int *shortage;
 535      int allow_quit;
 536 {
 537   struct region_cache *newline_cache;
 538   int direction;
 539
 540   if (count > 0)
 541     {
 542       direction = 1;
 543       if (! end) end = ZV;
 544     }
 545   else
 546     {
 547       direction = -1;
 548       if (! end) end = BEGV;
 549     }
 550
 551   newline_cache_on_off (current_buffer);
 552   newline_cache = current_buffer->newline_cache;
 553
 554   if (shortage != 0)
 555     *shortage = 0;
 556
 557   immediate_quit = allow_quit;
 558
 559   if (count > 0)
 560     while (start != end)
 561       {
 562         /* Our innermost scanning loop is very simple; it doesn't know
 563            about gaps, buffer ends, or the newline cache.  ceiling is
 564            the position of the last character before the next such
 565            obstacle --- the last character the dumb search loop should
 566            examine.  */
 567         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 568         int start_byte = CHAR_TO_BYTE (start);
 569
 570         /* If we're looking for a newline, consult the newline cache
 571            to see where we can avoid some scanning.  */
 572         if (target == '\n' && newline_cache)
 573           {
 574             int next_change;
 575             immediate_quit = 0;
 576             while (region_cache_forward
 577                    (current_buffer, newline_cache, start_byte, &next_change))
 578               start_byte = next_change;
 579             immediate_quit = allow_quit;
 580
 581             /* START should never be after END.  */
 582             if (start_byte > ceiling_byte)
 583               start_byte = ceiling_byte;
 584
 585             /* Now the text after start is an unknown region, and
 586                next_change is the position of the next known region. */
 587             ceiling_byte = min (next_change - 1, ceiling_byte);
 588           }
 589
 590         /* The dumb loop can only scan text stored in contiguous
 591            bytes. BUFFER_CEILING_OF returns the last character
 592            position that is contiguous, so the ceiling is the
 593            position after that.  */
 594         ceiling_byte = min (BUFFER_CEILING_OF (start_byte), ceiling_byte);
 595
 596         {
 597           /* The termination address of the dumb loop.  */
 598           register unsigned char *ceiling_addr
 599             = BYTE_POS_ADDR (ceiling_byte) + 1;
 600           register unsigned char *cursor
 601             = BYTE_POS_ADDR (start_byte);
 602           unsigned char *base = cursor;
 603
 604           while (cursor < ceiling_addr)
 605             {
 606               unsigned char *scan_start = cursor;
 607
 608               /* The dumb loop.  */
 609               while (*cursor != target && ++cursor < ceiling_addr)
 610                 ;
 611
 612               /* If we're looking for newlines, cache the fact that
 613                  the region from start to cursor is free of them. */
 614               if (target == '\n' && newline_cache)
 615                 know_region_cache (current_buffer, newline_cache,
 616                                    start_byte + scan_start - base,
 617                                    start_byte + cursor - base);
 618
 619               /* Did we find the target character?  */
 620               if (cursor < ceiling_addr)
 621                 {
 622                   if (--count == 0)
 623                     {
 624                       immediate_quit = 0;
 625                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 626                     }
 627                   cursor++;
 628                 }
 629             }
 630
 631           start = BYTE_TO_CHAR (start_byte + cursor - base);
 632         }
 633       }
 634   else
 635     while (start > end)
 636       {
 637         /* The last character to check before the next obstacle.  */
 638         int ceiling_byte = CHAR_TO_BYTE (end);
 639         int start_byte = CHAR_TO_BYTE (start);
 640
 641         /* Consult the newline cache, if appropriate.  */
 642         if (target == '\n' && newline_cache)
 643           {
 644             int next_change;
 645             immediate_quit = 0;
 646             while (region_cache_backward
 647                    (current_buffer, newline_cache, start_byte, &next_change))
 648               start_byte = next_change;
 649             immediate_quit = allow_quit;
 650
 651             /* Start should never be at or before end.  */
 652             if (start_byte <= ceiling_byte)
 653               start_byte = ceiling_byte + 1;
 654
 655             /* Now the text before start is an unknown region, and
 656                next_change is the position of the next known region. */
 657             ceiling_byte = max (next_change, ceiling_byte);
 658           }
 659
 660         /* Stop scanning before the gap.  */
 661         ceiling_byte = max (BUFFER_FLOOR_OF (start_byte - 1), ceiling_byte);
 662
 663         {
 664           /* The termination address of the dumb loop.  */
 665           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 666           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 667           unsigned char *base = cursor;
 668
 669           while (cursor >= ceiling_addr)
 670             {
 671               unsigned char *scan_start = cursor;
 672
 673               while (*cursor != target && --cursor >= ceiling_addr)
 674                 ;
 675
 676               /* If we're looking for newlines, cache the fact that
 677                  the region from after the cursor to start is free of them.  */
 678               if (target == '\n' && newline_cache)
 679                 know_region_cache (current_buffer, newline_cache,
 680                                    start_byte + cursor - base,
 681                                    start_byte + scan_start - base);
 682
 683               /* Did we find the target character?  */
 684               if (cursor >= ceiling_addr)
 685                 {
 686                   if (++count >= 0)
 687                     {
 688                       immediate_quit = 0;
 689                       return BYTE_TO_CHAR (start_byte + cursor - base);
 690                     }
 691                   cursor--;
 692                 }
 693             }
 694
 695           start = BYTE_TO_CHAR (start_byte + cursor - base);
 696         }
 697       }
 698
 699   immediate_quit = 0;
 700   if (shortage != 0)
 701     *shortage = count * direction;
 702   return start;
 703 }
 704 \f
 705 /* Search for COUNT instances of a line boundary, which means either a
 706    newline or (if selective display enabled) a carriage return.
 707    Start at START.  If COUNT is negative, search backwards.
 708
 709    We report the resulting position by calling TEMP_SET_PT_BOTH.
 710
 711    If we find COUNT instances. we position after (always after,
 712    even if scanning backwards) the COUNTth match, and return 0.
 713
 714    If we don't find COUNT instances before reaching the end of the
 715    buffer (or the beginning, if scanning backwards), we return
 716    the number of line boundaries left unfound, and position at
 717    the limit we bumped up against.
 718
 719    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 720    except in special cases.  */
 721
 722 int
 723 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 724      int start, start_byte;
 725      int limit, limit_byte;
 726      register int count;
 727      int allow_quit;
 728 {
 729   int direction = ((count > 0) ? 1 : -1);
 730
 731   register unsigned char *cursor;
 732   unsigned char *base;
 733
 734   register int ceiling;
 735   register unsigned char *ceiling_addr;
 736
 737   int old_immediate_quit = immediate_quit;
 738
 739   /* If we are not in selective display mode,
 740      check only for newlines.  */
 741   int selective_display = (!NILP (current_buffer->selective_display)
 742                            && !INTEGERP (current_buffer->selective_display));
 743
 744   /* The code that follows is like scan_buffer
 745      but checks for either newline or carriage return.  */
 746
 747   if (allow_quit)
 748     immediate_quit++;
 749
 750   start_byte = CHAR_TO_BYTE (start);
 751
 752   if (count > 0)
 753     {
 754       while (start_byte < limit_byte)
 755         {
 756           ceiling =  BUFFER_CEILING_OF (start_byte);
 757           ceiling = min (limit_byte - 1, ceiling);
 758           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 759           base = (cursor = BYTE_POS_ADDR (start_byte));
 760           while (1)
 761             {
 762               while (*cursor != '\n' && ++cursor != ceiling_addr)
 763                 ;
 764
 765               if (cursor != ceiling_addr)
 766                 {
 767                   if (--count == 0)
 768                     {
 769                       immediate_quit = old_immediate_quit;
 770                       start_byte = start_byte + cursor - base + 1;
 771                       start = BYTE_TO_CHAR (start_byte);
 772                       TEMP_SET_PT_BOTH (start, start_byte);
 773                       return 0;
 774                     }
 775                   else
 776                     if (++cursor == ceiling_addr)
 777                       break;
 778                 }
 779               else
 780                 break;
 781             }
 782           start_byte += cursor - base;
 783         }
 784     }
 785   else
 786     {
 787       int start_byte = CHAR_TO_BYTE (start);
 788       while (start_byte > limit_byte)
 789         {
 790           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 791           ceiling = max (limit_byte, ceiling);
 792           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 793           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 794           while (1)
 795             {
 796               while (--cursor != ceiling_addr && *cursor != '\n')
 797                 ;
 798
 799               if (cursor != ceiling_addr)
 800                 {
 801                   if (++count == 0)
 802                     {
 803                       immediate_quit = old_immediate_quit;
 804                       /* Return the position AFTER the match we found.  */
 805                       start_byte = start_byte + cursor - base + 1;
 806                       start = BYTE_TO_CHAR (start_byte);
 807                       TEMP_SET_PT_BOTH (start, start_byte);
 808                       return 0;
 809                     }
 810                 }
 811               else
 812                 break;
 813             }
 814           /* Here we add 1 to compensate for the last decrement
 815              of CURSOR, which took it past the valid range.  */
 816           start_byte += cursor - base + 1;
 817         }
 818     }
 819
 820   TEMP_SET_PT_BOTH (limit, limit_byte);
 821   immediate_quit = old_immediate_quit;
 822
 823   return count * direction;
 824 }
 825
 826 int
 827 find_next_newline_no_quit (from, cnt)
 828      register int from, cnt;
 829 {
 830   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 831 }
 832
 833 /* Like find_next_newline, but returns position before the newline,
 834    not after, and only search up to TO.  This isn't just
 835    find_next_newline (...)-1, because you might hit TO.  */
 836
 837 int
 838 find_before_next_newline (from, to, cnt)
 839      int from, to, cnt;
 840 {
 841   int shortage;
 842   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 843
 844   if (shortage == 0)
 845     pos--;
 846
 847   return pos;
 848 }
 849 \f
 850 /* Subroutines of Lisp buffer search functions. */
 851
 852 static Lisp_Object
 853 search_command (string, bound, noerror, count, direction, RE, posix)
 854      Lisp_Object string, bound, noerror, count;
 855      int direction;
 856      int RE;
 857      int posix;
 858 {
 859   register int np;
 860   int lim;
 861   int n = direction;
 862
 863   if (!NILP (count))
 864     {
 865       CHECK_NUMBER (count, 3);
 866       n *= XINT (count);
 867     }
 868
 869   CHECK_STRING (string, 0);
 870   if (NILP (bound))
 871     lim = n > 0 ? ZV : BEGV;
 872   else
 873     {
 874       CHECK_NUMBER_COERCE_MARKER (bound, 1);
 875       lim = XINT (bound);
 876       if (n > 0 ? lim < PT : lim > PT)
 877         error ("Invalid search bound (wrong side of point)");
 878       if (lim > ZV)
 879         lim = ZV;
 880       if (lim < BEGV)
 881         lim = BEGV;
 882     }
 883
 884   np = search_buffer (string, PT, lim, n, RE,
 885                       (!NILP (current_buffer->case_fold_search)
 886                        ? XCHAR_TABLE (current_buffer->case_canon_table)->contents
 887                        : 0),
 888                       (!NILP (current_buffer->case_fold_search)
 889                        ? XCHAR_TABLE (current_buffer->case_eqv_table)->contents
 890                        : 0),
 891                       posix);
 892   if (np <= 0)
 893     {
 894       if (NILP (noerror))
 895         return signal_failure (string);
 896       if (!EQ (noerror, Qt))
 897         {
 898           if (lim < BEGV || lim > ZV)
 899             abort ();
 900           SET_PT (lim);
 901           return Qnil;
 902 #if 0 /* This would be clean, but maybe programs depend on
 903          a value of nil here.  */
 904           np = lim;
 905 #endif
 906         }
 907       else
 908         return Qnil;
 909     }
 910
 911   if (np < BEGV || np > ZV)
 912     abort ();
 913
 914   SET_PT (np);
 915
 916   return make_number (np);
 917 }
 918 \f
 919 /* Return 1 if REGEXP it matches just one constant string.  */
 920
 921 static int
 922 trivial_regexp_p (regexp)
 923      Lisp_Object regexp;
 924 {
 925   int len = XSTRING (regexp)->size;
 926   unsigned char *s = XSTRING (regexp)->data;
 927   unsigned char c;
 928   while (--len >= 0)
 929     {
 930       switch (*s++)
 931         {
 932         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 933           return 0;
 934         case '\\':
 935           if (--len < 0)
 936             return 0;
 937           switch (*s++)
 938             {
 939             case '|': case '(': case ')': case '`': case '\'': case 'b':
 940             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 941             case 'S': case '=':
 942             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 943             case '1': case '2': case '3': case '4': case '5':
 944             case '6': case '7': case '8': case '9':
 945               return 0;
 946             }
 947         }
 948     }
 949   return 1;
 950 }
 951
 952 /* Search for the n'th occurrence of STRING in the current buffer,
 953    starting at position POS and stopping at position LIM,
 954    treating STRING as a literal string if RE is false or as
 955    a regular expression if RE is true.
 956
 957    If N is positive, searching is forward and LIM must be greater than POS.
 958    If N is negative, searching is backward and LIM must be less than POS.
 959
 960    Returns -x if only N-x occurrences found (x > 0),
 961    or else the position at the beginning of the Nth occurrence
 962    (if searching backward) or the end (if searching forward).
 963
 964    POSIX is nonzero if we want full backtracking (POSIX style)
 965    for this pattern.  0 means backtrack only enough to get a valid match.  */
 966
 967 static int
 968 search_buffer (string, pos, lim, n, RE, trt, inverse_trt, posix)
 969      Lisp_Object string;
 970      int pos;
 971      int lim;
 972      int n;
 973      int RE;
 974      Lisp_Object *trt;
 975      Lisp_Object *inverse_trt;
 976      int posix;
 977 {
 978   int len = XSTRING (string)->size;
 979   int len_byte = XSTRING (string)->size_byte;
 980   unsigned char *base_pat = XSTRING (string)->data;
 981   register int *BM_tab;
 982   int *BM_tab_base;
 983   register int direction = ((n > 0) ? 1 : -1);
 984   register int dirlen;
 985   int infinity, limit, k, stride_for_teases;
 986   register unsigned char *pat, *cursor, *p_limit;
 987   register int i, j;
 988   unsigned char *p1, *p2;
 989   int s1, s2;
 990
 991   if (running_asynch_code)
 992     save_search_regs ();
 993
 994   /* Null string is found at starting position.  */
 995   if (len == 0)
 996     {
 997       set_search_regs (pos, 0);
 998       return pos;
 999     }
1000
1001   /* Searching 0 times means don't move.  */
1002   if (n == 0)
1003     return pos;
1004
1005   if (RE && !trivial_regexp_p (string))
1006     {
1007       struct re_pattern_buffer *bufp;
1008
1009       bufp = compile_pattern (string, &search_regs, trt, posix,
1010                               !NILP (current_buffer->enable_multibyte_characters));
1011
1012       immediate_quit = 1;       /* Quit immediately if user types ^G,
1013                                    because letting this function finish
1014                                    can take too long. */
1015       QUIT;                     /* Do a pending quit right away,
1016                                    to avoid paradoxical behavior */
1017       /* Get pointers and sizes of the two strings
1018          that make up the visible portion of the buffer. */
1019
1020       p1 = BEGV_ADDR;
1021       s1 = GPT_BYTE - BEGV_BYTE;
1022       p2 = GAP_END_ADDR;
1023       s2 = ZV_BYTE - GPT_BYTE;
1024       if (s1 < 0)
1025         {
1026           p2 = p1;
1027           s2 = ZV_BYTE - BEGV_BYTE;
1028           s1 = 0;
1029         }
1030       if (s2 < 0)
1031         {
1032           s1 = ZV_BYTE - BEGV_BYTE;
1033           s2 = 0;
1034         }
1035       re_match_object = Qnil;
1036
1037       while (n < 0)
1038         {
1039           int val;
1040           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1041                              pos - BEGV, lim - pos, &search_regs,
1042                              /* Don't allow match past current point */
1043                              pos - BEGV);
1044           if (val == -2)
1045             {
1046               matcher_overflow ();
1047             }
1048           if (val >= 0)
1049             {
1050               for (i = 0; i < search_regs.num_regs; i++)
1051                 if (search_regs.start[i] >= 0)
1052                   {
1053                     search_regs.start[i]
1054                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1055                     search_regs.end[i]
1056                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1057                   }
1058               XSETBUFFER (last_thing_searched, current_buffer);
1059               /* Set pos to the new position. */
1060               pos = search_regs.start[0];
1061             }
1062           else
1063             {
1064               immediate_quit = 0;
1065               return (n);
1066             }
1067           n++;
1068         }
1069       while (n > 0)
1070         {
1071           int val;
1072           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1073                              pos - BEGV, lim - pos, &search_regs,
1074                              lim - BEGV);
1075           if (val == -2)
1076             {
1077               matcher_overflow ();
1078             }
1079           if (val >= 0)
1080             {
1081               for (i = 0; i < search_regs.num_regs; i++)
1082                 if (search_regs.start[i] >= 0)
1083                   {
1084                     search_regs.start[i]
1085                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1086                     search_regs.end[i]
1087                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1088                   }
1089               XSETBUFFER (last_thing_searched, current_buffer);
1090               pos = search_regs.end[0];
1091             }
1092           else
1093             {
1094               immediate_quit = 0;
1095               return (0 - n);
1096             }
1097           n--;
1098         }
1099       immediate_quit = 0;
1100       return (pos);
1101     }
1102   else                          /* non-RE case */
1103     {
1104       int pos_byte = CHAR_TO_BYTE (pos);
1105       int lim_byte = CHAR_TO_BYTE (lim);
1106 #ifdef C_ALLOCA
1107       int BM_tab_space[0400];
1108       BM_tab = &BM_tab_space[0];
1109 #else
1110       BM_tab = (int *) alloca (0400 * sizeof (int));
1111 #endif
1112       {
1113         unsigned char *raw_pattern;
1114         int raw_pattern_size;
1115         unsigned char *patbuf;
1116         int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1117
1118         /* MULTIBYTE says whether the text to be searched is multibyte.
1119            We must convert PATTERN to match that, or we will not really
1120            find things right.  */
1121
1122         if (multibyte == STRING_MULTIBYTE (string))
1123           {
1124             raw_pattern = (char *) XSTRING (string)->data;
1125             raw_pattern_size = XSTRING (string)->size_byte;
1126           }
1127         else if (multibyte)
1128           {
1129             raw_pattern_size = count_size_as_multibyte (XSTRING (string)->data,
1130                                                         XSTRING (string)->size);
1131             raw_pattern = (char *) alloca (raw_pattern_size + 1);
1132             copy_text (XSTRING (string)->data, raw_pattern,
1133                        XSTRING (string)->size, 0, 1);
1134           }
1135         else
1136           {
1137             /* Converting multibyte to single-byte.
1138
1139                ??? Perhaps this conversion should be done in a special way
1140                by subtracting nonascii-insert-offset from each non-ASCII char,
1141                so that only the multibyte chars which really correspond to
1142                the chosen single-byte character set can possibly match.  */
1143             raw_pattern_size = XSTRING (string)->size;
1144             raw_pattern = (char *) alloca (raw_pattern_size + 1);
1145             copy_text (XSTRING (string)->data, raw_pattern,
1146                        XSTRING (string)->size, 1, 0);
1147           }
1148
1149         len_byte = raw_pattern_size;
1150         patbuf = (unsigned char *) alloca (len_byte);
1151         pat = patbuf;
1152         base_pat = raw_pattern;
1153         while (--len_byte >= 0)
1154           {
1155             /* If we got here and the RE flag is set, it's because we're
1156                dealing with a regexp known to be trivial, so the backslash
1157                just quotes the next character.  */
1158             if (RE && *base_pat == '\\')
1159               {
1160                 len_byte--;
1161                 base_pat++;
1162               }
1163             *pat++ = (trt ? XINT (trt[*base_pat++]) : *base_pat++);
1164           }
1165         len_byte = pat - patbuf;
1166         pat = base_pat = patbuf;
1167       }
1168       /* The general approach is that we are going to maintain that we know */
1169       /* the first (closest to the present position, in whatever direction */
1170       /* we're searching) character that could possibly be the last */
1171       /* (furthest from present position) character of a valid match.  We */
1172       /* advance the state of our knowledge by looking at that character */
1173       /* and seeing whether it indeed matches the last character of the */
1174       /* pattern.  If it does, we take a closer look.  If it does not, we */
1175       /* move our pointer (to putative last characters) as far as is */
1176       /* logically possible.  This amount of movement, which I call a */
1177       /* stride, will be the length of the pattern if the actual character */
1178       /* appears nowhere in the pattern, otherwise it will be the distance */
1179       /* from the last occurrence of that character to the end of the */
1180       /* pattern. */
1181       /* As a coding trick, an enormous stride is coded into the table for */
1182       /* characters that match the last character.  This allows use of only */
1183       /* a single test, a test for having gone past the end of the */
1184       /* permissible match region, to test for both possible matches (when */
1185       /* the stride goes past the end immediately) and failure to */
1186       /* match (where you get nudged past the end one stride at a time). */
1187
1188       /* Here we make a "mickey mouse" BM table.  The stride of the search */
1189       /* is determined only by the last character of the putative match. */
1190       /* If that character does not match, we will stride the proper */
1191       /* distance to propose a match that superimposes it on the last */
1192       /* instance of a character that matches it (per trt), or misses */
1193       /* it entirely if there is none. */
1194
1195       dirlen = len_byte * direction;
1196       infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1197       if (direction < 0)
1198         pat = (base_pat += len_byte - 1);
1199       BM_tab_base = BM_tab;
1200       BM_tab += 0400;
1201       j = dirlen;               /* to get it in a register */
1202       /* A character that does not appear in the pattern induces a */
1203       /* stride equal to the pattern length. */
1204       while (BM_tab_base != BM_tab)
1205         {
1206           *--BM_tab = j;
1207           *--BM_tab = j;
1208           *--BM_tab = j;
1209           *--BM_tab = j;
1210         }
1211       i = 0;
1212       while (i != infinity)
1213         {
1214           j = pat[i]; i += direction;
1215           if (i == dirlen) i = infinity;
1216           if (trt != 0)
1217             {
1218               k = (j = XINT (trt[j]));
1219               if (i == infinity)
1220                 stride_for_teases = BM_tab[j];
1221               BM_tab[j] = dirlen - i;
1222               /* A translation table is accompanied by its inverse -- see */
1223               /* comment following downcase_table for details */
1224               while ((j = (unsigned char) XINT (inverse_trt[j])) != k)
1225                 BM_tab[j] = dirlen - i;
1226             }
1227           else
1228             {
1229               if (i == infinity)
1230                 stride_for_teases = BM_tab[j];
1231               BM_tab[j] = dirlen - i;
1232             }
1233           /* stride_for_teases tells how much to stride if we get a */
1234           /* match on the far character but are subsequently */
1235           /* disappointed, by recording what the stride would have been */
1236           /* for that character if the last character had been */
1237           /* different. */
1238         }
1239       infinity = dirlen - infinity;
1240       pos_byte += dirlen - ((direction > 0) ? direction : 0);
1241       /* loop invariant - POS_BYTE points at where last char (first
1242          char if reverse) of pattern would align in a possible match.  */
1243       while (n != 0)
1244         {
1245           /* It's been reported that some (broken) compiler thinks that
1246              Boolean expressions in an arithmetic context are unsigned.
1247              Using an explicit ?1:0 prevents this.  */
1248           if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1249               < 0)
1250             return (n * (0 - direction));
1251           /* First we do the part we can by pointers (maybe nothing) */
1252           QUIT;
1253           pat = base_pat;
1254           limit = pos_byte - dirlen + direction;
1255           limit = ((direction > 0)
1256                    ? BUFFER_CEILING_OF (limit)
1257                    : BUFFER_FLOOR_OF (limit));
1258           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1259              can take on without hitting edge of buffer or the gap.  */
1260           limit = ((direction > 0)
1261                    ? min (lim_byte - 1, min (limit, pos_byte + 20000))
1262                    : max (lim_byte, max (limit, pos_byte - 20000)));
1263           if ((limit - pos_byte) * direction > 20)
1264             {
1265               p_limit = BYTE_POS_ADDR (limit);
1266               p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1267               /* In this loop, pos + cursor - p2 is the surrogate for pos */
1268               while (1)         /* use one cursor setting as long as i can */
1269                 {
1270                   if (direction > 0) /* worth duplicating */
1271                     {
1272                       /* Use signed comparison if appropriate
1273                          to make cursor+infinity sure to be > p_limit.
1274                          Assuming that the buffer lies in a range of addresses
1275                          that are all "positive" (as ints) or all "negative",
1276                          either kind of comparison will work as long
1277                          as we don't step by infinity.  So pick the kind
1278                          that works when we do step by infinity.  */
1279                       if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1280                         while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1281                           cursor += BM_tab[*cursor];
1282                       else
1283                         while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1284                           cursor += BM_tab[*cursor];
1285                     }
1286                   else
1287                     {
1288                       if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1289                         while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1290                           cursor += BM_tab[*cursor];
1291                       else
1292                         while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1293                           cursor += BM_tab[*cursor];
1294                     }
1295 /* If you are here, cursor is beyond the end of the searched region. */
1296  /* This can happen if you match on the far character of the pattern, */
1297  /* because the "stride" of that character is infinity, a number able */
1298  /* to throw you well beyond the end of the search.  It can also */
1299  /* happen if you fail to match within the permitted region and would */
1300  /* otherwise try a character beyond that region */
1301                   if ((cursor - p_limit) * direction <= len_byte)
1302                     break;      /* a small overrun is genuine */
1303                   cursor -= infinity; /* large overrun = hit */
1304                   i = dirlen - direction;
1305                   if (trt != 0)
1306                     {
1307                       while ((i -= direction) + direction != 0)
1308                         if (pat[i] != XINT (trt[*(cursor -= direction)]))
1309                           break;
1310                     }
1311                   else
1312                     {
1313                       while ((i -= direction) + direction != 0)
1314                         if (pat[i] != *(cursor -= direction))
1315                           break;
1316                     }
1317                   cursor += dirlen - i - direction;     /* fix cursor */
1318                   if (i + direction == 0)
1319                     {
1320                       int position;
1321
1322                       cursor -= direction;
1323
1324                       position = pos_byte + cursor - p2 + ((direction > 0)
1325                                                            ? 1 - len_byte : 0);
1326                       set_search_regs (position, len_byte);
1327
1328                       if ((n -= direction) != 0)
1329                         cursor += dirlen; /* to resume search */
1330                       else
1331                         return ((direction > 0)
1332                                 ? search_regs.end[0] : search_regs.start[0]);
1333                     }
1334                   else
1335                     cursor += stride_for_teases; /* <sigh> we lose -  */
1336                 }
1337               pos_byte += cursor - p2;
1338             }
1339           else
1340             /* Now we'll pick up a clump that has to be done the hard */
1341             /* way because it covers a discontinuity */
1342             {
1343               limit = ((direction > 0)
1344                        ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1345                        : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1346               limit = ((direction > 0)
1347                        ? min (limit + len_byte, lim_byte - 1)
1348                        : max (limit - len_byte, lim_byte));
1349               /* LIMIT is now the last value POS_BYTE can have
1350                  and still be valid for a possible match.  */
1351               while (1)
1352                 {
1353                   /* This loop can be coded for space rather than */
1354                   /* speed because it will usually run only once. */
1355                   /* (the reach is at most len + 21, and typically */
1356                   /* does not exceed len) */
1357                   while ((limit - pos_byte) * direction >= 0)
1358                     pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1359                   /* now run the same tests to distinguish going off the */
1360                   /* end, a match or a phony match. */
1361                   if ((pos_byte - limit) * direction <= len_byte)
1362                     break;      /* ran off the end */
1363                   /* Found what might be a match.
1364                      Set POS_BYTE back to last (first if reverse) pos.  */
1365                   pos_byte -= infinity;
1366                   i = dirlen - direction;
1367                   while ((i -= direction) + direction != 0)
1368                     {
1369                       pos_byte -= direction;
1370                       if (pat[i] != (trt != 0
1371                                      ? XINT (trt[FETCH_BYTE (pos_byte)])
1372                                      : FETCH_BYTE (pos_byte)))
1373                         break;
1374                     }
1375                   /* Above loop has moved POS_BYTE part or all the way
1376                      back to the first pos (last pos if reverse).
1377                      Set it once again at the last (first if reverse) char.  */
1378                   pos_byte += dirlen - i- direction;
1379                   if (i + direction == 0)
1380                     {
1381                       int position;
1382                       pos_byte -= direction;
1383
1384                       position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1385
1386                       set_search_regs (position, len_byte);
1387
1388                       if ((n -= direction) != 0)
1389                         pos_byte += dirlen; /* to resume search */
1390                       else
1391                         return ((direction > 0)
1392                                 ? search_regs.end[0] : search_regs.start[0]);
1393                     }
1394                   else
1395                     pos_byte += stride_for_teases;
1396                 }
1397               }
1398           /* We have done one clump.  Can we continue? */
1399           if ((lim_byte - pos_byte) * direction < 0)
1400             return ((0 - n) * direction);
1401         }
1402       return BYTE_TO_CHAR (pos_byte);
1403     }
1404 }
1405
1406 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1407    for a match just found in the current buffer.  */
1408
1409 static void
1410 set_search_regs (beg_byte, nbytes)
1411      int beg_byte, nbytes;
1412 {
1413   /* Make sure we have registers in which to store
1414      the match position.  */
1415   if (search_regs.num_regs == 0)
1416     {
1417       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1418       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1419       search_regs.num_regs = 2;
1420     }
1421
1422   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1423   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1424   XSETBUFFER (last_thing_searched, current_buffer);
1425 }
1426 \f
1427 /* Given a string of words separated by word delimiters,
1428   compute a regexp that matches those exact words
1429   separated by arbitrary punctuation.  */
1430
1431 static Lisp_Object
1432 wordify (string)
1433      Lisp_Object string;
1434 {
1435   register unsigned char *p, *o;
1436   register int i, i_byte, len, punct_count = 0, word_count = 0;
1437   Lisp_Object val;
1438   int prev_c = 0;
1439   int adjust;
1440
1441   CHECK_STRING (string, 0);
1442   p = XSTRING (string)->data;
1443   len = XSTRING (string)->size;
1444
1445   for (i = 0, i_byte = 0; i < len; )
1446     {
1447       int c;
1448
1449       if (STRING_MULTIBYTE (string))
1450         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1451       else
1452         c = XSTRING (string)->data[i++];
1453
1454       if (SYNTAX (c) != Sword)
1455         {
1456           punct_count++;
1457           if (i > 0 && SYNTAX (prev_c) == Sword)
1458             word_count++;
1459         }
1460
1461       prev_c = c;
1462     }
1463
1464   if (SYNTAX (prev_c) == Sword)
1465     word_count++;
1466   if (!word_count)
1467     return build_string ("");
1468
1469   adjust = - punct_count + 5 * (word_count - 1) + 4;
1470   val = make_uninit_multibyte_string (len + adjust,
1471                                       XSTRING (string)->size_byte + adjust);
1472
1473   o = XSTRING (val)->data;
1474   *o++ = '\\';
1475   *o++ = 'b';
1476
1477   for (i = 0; i < XSTRING (val)->size_byte; i++)
1478     if (SYNTAX (p[i]) == Sword)
1479       *o++ = p[i];
1480     else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1481       {
1482         *o++ = '\\';
1483         *o++ = 'W';
1484         *o++ = '\\';
1485         *o++ = 'W';
1486         *o++ = '*';
1487       }
1488
1489   *o++ = '\\';
1490   *o++ = 'b';
1491
1492   return val;
1493 }
1494 \f
1495 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1496   "MSearch backward: ",
1497   "Search backward from point for STRING.\n\
1498 Set point to the beginning of the occurrence found, and return point.\n\
1499 An optional second argument bounds the search; it is a buffer position.\n\
1500 The match found must not extend before that position.\n\
1501 Optional third argument, if t, means if fail just return nil (no error).\n\
1502  If not nil and not t, position at limit of search and return nil.\n\
1503 Optional fourth argument is repeat count--search for successive occurrences.\n\
1504 See also the functions `match-beginning', `match-end' and `replace-match'.")
1505   (string, bound, noerror, count)
1506      Lisp_Object string, bound, noerror, count;
1507 {
1508   return search_command (string, bound, noerror, count, -1, 0, 0);
1509 }
1510
1511 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
1512   "Search forward from point for STRING.\n\
1513 Set point to the end of the occurrence found, and return point.\n\
1514 An optional second argument bounds the search; it is a buffer position.\n\
1515 The match found must not extend after that position.  nil is equivalent\n\
1516   to (point-max).\n\
1517 Optional third argument, if t, means if fail just return nil (no error).\n\
1518   If not nil and not t, move to limit of search and return nil.\n\
1519 Optional fourth argument is repeat count--search for successive occurrences.\n\
1520 See also the functions `match-beginning', `match-end' and `replace-match'.")
1521   (string, bound, noerror, count)
1522      Lisp_Object string, bound, noerror, count;
1523 {
1524   return search_command (string, bound, noerror, count, 1, 0, 0);
1525 }
1526
1527 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
1528   "sWord search backward: ",
1529   "Search backward from point for STRING, ignoring differences in punctuation.\n\
1530 Set point to the beginning of the occurrence found, and return point.\n\
1531 An optional second argument bounds the search; it is a buffer position.\n\
1532 The match found must not extend before that position.\n\
1533 Optional third argument, if t, means if fail just return nil (no error).\n\
1534   If not nil and not t, move to limit of search and return nil.\n\
1535 Optional fourth argument is repeat count--search for successive occurrences.")
1536   (string, bound, noerror, count)
1537      Lisp_Object string, bound, noerror, count;
1538 {
1539   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
1540 }
1541
1542 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
1543   "sWord search: ",
1544   "Search forward from point for STRING, ignoring differences in punctuation.\n\
1545 Set point to the end of the occurrence found, and return point.\n\
1546 An optional second argument bounds the search; it is a buffer position.\n\
1547 The match found must not extend after that position.\n\
1548 Optional third argument, if t, means if fail just return nil (no error).\n\
1549   If not nil and not t, move to limit of search and return nil.\n\
1550 Optional fourth argument is repeat count--search for successive occurrences.")
1551   (string, bound, noerror, count)
1552      Lisp_Object string, bound, noerror, count;
1553 {
1554   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
1555 }
1556
1557 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
1558   "sRE search backward: ",
1559   "Search backward from point for match for regular expression REGEXP.\n\
1560 Set point to the beginning of the match, and return point.\n\
1561 The match found is the one starting last in the buffer\n\
1562 and yet ending before the origin of the search.\n\
1563 An optional second argument bounds the search; it is a buffer position.\n\
1564 The match found must start at or after that position.\n\
1565 Optional third argument, if t, means if fail just return nil (no error).\n\
1566   If not nil and not t, move to limit of search and return nil.\n\
1567 Optional fourth argument is repeat count--search for successive occurrences.\n\
1568 See also the functions `match-beginning', `match-end' and `replace-match'.")
1569   (regexp, bound, noerror, count)
1570      Lisp_Object regexp, bound, noerror, count;
1571 {
1572   return search_command (regexp, bound, noerror, count, -1, 1, 0);
1573 }
1574
1575 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
1576   "sRE search: ",
1577   "Search forward from point for regular expression REGEXP.\n\
1578 Set point to the end of the occurrence found, and return point.\n\
1579 An optional second argument bounds the search; it is a buffer position.\n\
1580 The match found must not extend after that position.\n\
1581 Optional third argument, if t, means if fail just return nil (no error).\n\
1582   If not nil and not t, move to limit of search and return nil.\n\
1583 Optional fourth argument is repeat count--search for successive occurrences.\n\
1584 See also the functions `match-beginning', `match-end' and `replace-match'.")
1585   (regexp, bound, noerror, count)
1586      Lisp_Object regexp, bound, noerror, count;
1587 {
1588   return search_command (regexp, bound, noerror, count, 1, 1, 0);
1589 }
1590
1591 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
1592   "sPosix search backward: ",
1593   "Search backward from point for match for regular expression REGEXP.\n\
1594 Find the longest match in accord with Posix regular expression rules.\n\
1595 Set point to the beginning of the match, and return point.\n\
1596 The match found is the one starting last in the buffer\n\
1597 and yet ending before the origin of the search.\n\
1598 An optional second argument bounds the search; it is a buffer position.\n\
1599 The match found must start at or after that position.\n\
1600 Optional third argument, if t, means if fail just return nil (no error).\n\
1601   If not nil and not t, move to limit of search and return nil.\n\
1602 Optional fourth argument is repeat count--search for successive occurrences.\n\
1603 See also the functions `match-beginning', `match-end' and `replace-match'.")
1604   (regexp, bound, noerror, count)
1605      Lisp_Object regexp, bound, noerror, count;
1606 {
1607   return search_command (regexp, bound, noerror, count, -1, 1, 1);
1608 }
1609
1610 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
1611   "sPosix search: ",
1612   "Search forward from point for regular expression REGEXP.\n\
1613 Find the longest match in accord with Posix regular expression rules.\n\
1614 Set point to the end of the occurrence found, and return point.\n\
1615 An optional second argument bounds the search; it is a buffer position.\n\
1616 The match found must not extend after that position.\n\
1617 Optional third argument, if t, means if fail just return nil (no error).\n\
1618   If not nil and not t, move to limit of search and return nil.\n\
1619 Optional fourth argument is repeat count--search for successive occurrences.\n\
1620 See also the functions `match-beginning', `match-end' and `replace-match'.")
1621   (regexp, bound, noerror, count)
1622      Lisp_Object regexp, bound, noerror, count;
1623 {
1624   return search_command (regexp, bound, noerror, count, 1, 1, 1);
1625 }
1626 \f
1627 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
1628   "Replace text matched by last search with NEWTEXT.\n\
1629 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
1630 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1631 based on the replaced text.\n\
1632 If the replaced text has only capital letters\n\
1633 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1634 If the replaced text has at least one word starting with a capital letter,\n\
1635 then capitalize each word in NEWTEXT.\n\n\
1636 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1637 Otherwise treat `\\' as special:\n\
1638   `\\&' in NEWTEXT means substitute original matched text.\n\
1639   `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1640        If Nth parens didn't match, substitute nothing.\n\
1641   `\\\\' means insert one `\\'.\n\
1642 FIXEDCASE and LITERAL are optional arguments.\n\
1643 Leaves point at end of replacement text.\n\
1644 \n\
1645 The optional fourth argument STRING can be a string to modify.\n\
1646 In that case, this function creates and returns a new string\n\
1647 which is made by replacing the part of STRING that was matched.\n\
1648 \n\
1649 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
1650 It says to replace just that subexpression instead of the whole match.\n\
1651 This is useful only after a regular expression search or match\n\
1652 since only regular expressions have distinguished subexpressions.")
1653   (newtext, fixedcase, literal, string, subexp)
1654      Lisp_Object newtext, fixedcase, literal, string, subexp;
1655 {
1656   enum { nochange, all_caps, cap_initial } case_action;
1657   register int pos, last;
1658   int some_multiletter_word;
1659   int some_lowercase;
1660   int some_uppercase;
1661   int some_nonuppercase_initial;
1662   register int c, prevc;
1663   int inslen;
1664   int sub;
1665   int opoint, newpoint;
1666
1667   CHECK_STRING (newtext, 0);
1668
1669   if (! NILP (string))
1670     CHECK_STRING (string, 4);
1671
1672   case_action = nochange;       /* We tried an initialization */
1673                                 /* but some C compilers blew it */
1674
1675   if (search_regs.num_regs <= 0)
1676     error ("replace-match called before any match found");
1677
1678   if (NILP (subexp))
1679     sub = 0;
1680   else
1681     {
1682       CHECK_NUMBER (subexp, 3);
1683       sub = XINT (subexp);
1684       if (sub < 0 || sub >= search_regs.num_regs)
1685         args_out_of_range (subexp, make_number (search_regs.num_regs));
1686     }
1687
1688   if (NILP (string))
1689     {
1690       if (search_regs.start[sub] < BEGV
1691           || search_regs.start[sub] > search_regs.end[sub]
1692           || search_regs.end[sub] > ZV)
1693         args_out_of_range (make_number (search_regs.start[sub]),
1694                            make_number (search_regs.end[sub]));
1695     }
1696   else
1697     {
1698       if (search_regs.start[sub] < 0
1699           || search_regs.start[sub] > search_regs.end[sub]
1700           || search_regs.end[sub] > XSTRING (string)->size)
1701         args_out_of_range (make_number (search_regs.start[sub]),
1702                            make_number (search_regs.end[sub]));
1703     }
1704
1705   if (NILP (fixedcase))
1706     {
1707       int beg;
1708       /* Decide how to casify by examining the matched text. */
1709
1710       if (NILP (string))
1711         last = CHAR_TO_BYTE (search_regs.end[sub]);
1712       else
1713         last = search_regs.end[sub];
1714
1715       if (NILP (string))
1716         beg = CHAR_TO_BYTE (search_regs.start[sub]);
1717       else
1718         beg = search_regs.start[sub];
1719
1720       prevc = '\n';
1721       case_action = all_caps;
1722
1723       /* some_multiletter_word is set nonzero if any original word
1724          is more than one letter long. */
1725       some_multiletter_word = 0;
1726       some_lowercase = 0;
1727       some_nonuppercase_initial = 0;
1728       some_uppercase = 0;
1729
1730       for (pos = beg; pos < last; pos++)
1731         {
1732           if (NILP (string))
1733             c = FETCH_BYTE (pos);
1734           else
1735             c = XSTRING (string)->data[pos];
1736
1737           if (LOWERCASEP (c))
1738             {
1739               /* Cannot be all caps if any original char is lower case */
1740
1741               some_lowercase = 1;
1742               if (SYNTAX (prevc) != Sword)
1743                 some_nonuppercase_initial = 1;
1744               else
1745                 some_multiletter_word = 1;
1746             }
1747           else if (!NOCASEP (c))
1748             {
1749               some_uppercase = 1;
1750               if (SYNTAX (prevc) != Sword)
1751                 ;
1752               else
1753                 some_multiletter_word = 1;
1754             }
1755           else
1756             {
1757               /* If the initial is a caseless word constituent,
1758                  treat that like a lowercase initial.  */
1759               if (SYNTAX (prevc) != Sword)
1760                 some_nonuppercase_initial = 1;
1761             }
1762
1763           prevc = c;
1764         }
1765
1766       /* Convert to all caps if the old text is all caps
1767          and has at least one multiletter word.  */
1768       if (! some_lowercase && some_multiletter_word)
1769         case_action = all_caps;
1770       /* Capitalize each word, if the old text has all capitalized words.  */
1771       else if (!some_nonuppercase_initial && some_multiletter_word)
1772         case_action = cap_initial;
1773       else if (!some_nonuppercase_initial && some_uppercase)
1774         /* Should x -> yz, operating on X, give Yz or YZ?
1775            We'll assume the latter.  */
1776         case_action = all_caps;
1777       else
1778         case_action = nochange;
1779     }
1780
1781   /* Do replacement in a string.  */
1782   if (!NILP (string))
1783     {
1784       Lisp_Object before, after;
1785
1786       before = Fsubstring (string, make_number (0),
1787                            make_number (search_regs.start[sub]));
1788       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
1789
1790       /* Substitute parts of the match into NEWTEXT
1791          if desired.  */
1792       if (NILP (literal))
1793         {
1794           int lastpos = -1;
1795           int lastpos_byte = -1;
1796           /* We build up the substituted string in ACCUM.  */
1797           Lisp_Object accum;
1798           Lisp_Object middle;
1799           int pos_byte;
1800
1801           accum = Qnil;
1802
1803           for (pos_byte = 0, pos = 0; pos_byte < XSTRING (newtext)->size_byte;)
1804             {
1805               int substart = -1;
1806               int subend;
1807               int delbackslash = 0;
1808
1809               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
1810
1811               if (c == '\\')
1812                 {
1813                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
1814                   if (c == '&')
1815                     {
1816                       substart = search_regs.start[sub];
1817                       subend = search_regs.end[sub];
1818                     }
1819                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1820                     {
1821                       if (search_regs.start[c - '0'] >= 0)
1822                         {
1823                           substart = search_regs.start[c - '0'];
1824                           subend = search_regs.end[c - '0'];
1825                         }
1826                     }
1827                   else if (c == '\\')
1828                     delbackslash = 1;
1829                   else
1830                     error ("Invalid use of `\\' in replacement text");
1831                 }
1832               if (substart >= 0)
1833                 {
1834                   if (pos - 1 != lastpos + 1)
1835                     middle = substring_both (newtext, lastpos + 1,
1836                                              lastpos_byte + 1,
1837                                              pos - 1, pos_byte - 1);
1838                   else
1839                     middle = Qnil;
1840                   accum = concat3 (accum, middle,
1841                                    Fsubstring (string,
1842                                                make_number (substart),
1843                                                make_number (subend)));
1844                   lastpos = pos;
1845                   lastpos_byte = pos_byte;
1846                 }
1847               else if (delbackslash)
1848                 {
1849                   middle = substring_both (newtext, lastpos + 1,
1850                                            lastpos_byte + 1,
1851                                            pos, pos_byte);
1852
1853                   accum = concat2 (accum, middle);
1854                   lastpos = pos;
1855                   lastpos_byte = pos_byte;
1856                 }
1857             }
1858
1859           if (pos != lastpos + 1)
1860             middle = substring_both (newtext, lastpos + 1,
1861                                      lastpos_byte + 1,
1862                                      pos, pos_byte);
1863           else
1864             middle = Qnil;
1865
1866           newtext = concat2 (accum, middle);
1867         }
1868
1869       /* Do case substitution in NEWTEXT if desired.  */
1870       if (case_action == all_caps)
1871         newtext = Fupcase (newtext);
1872       else if (case_action == cap_initial)
1873         newtext = Fupcase_initials (newtext);
1874
1875       return concat3 (before, newtext, after);
1876     }
1877
1878   /* Record point, the move (quietly) to the start of the match.  */
1879   if (PT > search_regs.start[sub])
1880     opoint = PT - ZV;
1881   else
1882     opoint = PT;
1883
1884   TEMP_SET_PT (search_regs.start[sub]);
1885
1886   /* We insert the replacement text before the old text, and then
1887      delete the original text.  This means that markers at the
1888      beginning or end of the original will float to the corresponding
1889      position in the replacement.  */
1890   if (!NILP (literal))
1891     Finsert_and_inherit (1, &newtext);
1892   else
1893     {
1894       struct gcpro gcpro1;
1895       GCPRO1 (newtext);
1896
1897       for (pos = 0; pos < XSTRING (newtext)->size; pos++)
1898         {
1899           int offset = PT - search_regs.start[sub];
1900
1901           c = XSTRING (newtext)->data[pos];
1902           if (c == '\\')
1903             {
1904               c = XSTRING (newtext)->data[++pos];
1905               if (c == '&')
1906                 Finsert_buffer_substring
1907                   (Fcurrent_buffer (),
1908                    make_number (search_regs.start[sub] + offset),
1909                    make_number (search_regs.end[sub] + offset));
1910               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1911                 {
1912                   if (search_regs.start[c - '0'] >= 1)
1913                     Finsert_buffer_substring
1914                       (Fcurrent_buffer (),
1915                        make_number (search_regs.start[c - '0'] + offset),
1916                        make_number (search_regs.end[c - '0'] + offset));
1917                 }
1918               else if (c == '\\')
1919                 insert_char (c);
1920               else
1921                 error ("Invalid use of `\\' in replacement text");
1922             }
1923           else
1924             insert_char (c);
1925         }
1926       UNGCPRO;
1927     }
1928
1929   inslen = PT - (search_regs.start[sub]);
1930   del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
1931
1932   if (case_action == all_caps)
1933     Fupcase_region (make_number (PT - inslen), make_number (PT));
1934   else if (case_action == cap_initial)
1935     Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
1936
1937   newpoint = PT;
1938
1939   /* Put point back where it was in the text.  */
1940   if (opoint <= 0)
1941     TEMP_SET_PT (opoint + ZV);
1942   else
1943     TEMP_SET_PT (opoint);
1944
1945   /* Now move point "officially" to the start of the inserted replacement.  */
1946   move_if_not_intangible (newpoint);
1947
1948   return Qnil;
1949 }
1950 \f
1951 static Lisp_Object
1952 match_limit (num, beginningp)
1953      Lisp_Object num;
1954      int beginningp;
1955 {
1956   register int n;
1957
1958   CHECK_NUMBER (num, 0);
1959   n = XINT (num);
1960   if (n < 0 || n >= search_regs.num_regs)
1961     args_out_of_range (num, make_number (search_regs.num_regs));
1962   if (search_regs.num_regs <= 0
1963       || search_regs.start[n] < 0)
1964     return Qnil;
1965   return (make_number ((beginningp) ? search_regs.start[n]
1966                                     : search_regs.end[n]));
1967 }
1968
1969 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1970   "Return position of start of text matched by last search.\n\
1971 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1972   regexp.\n\
1973 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1974   SUBEXP pairs.\n\
1975 Zero means the entire text matched by the whole regexp or whole string.")
1976   (subexp)
1977      Lisp_Object subexp;
1978 {
1979   return match_limit (subexp, 1);
1980 }
1981
1982 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1983   "Return position of end of text matched by last search.\n\
1984 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1985   regexp.\n\
1986 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1987   SUBEXP pairs.\n\
1988 Zero means the entire text matched by the whole regexp or whole string.")
1989   (subexp)
1990      Lisp_Object subexp;
1991 {
1992   return match_limit (subexp, 0);
1993 }
1994
1995 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
1996   "Return a list containing all info on what the last search matched.\n\
1997 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1998 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1999 if the last match was on a buffer; integers or nil if a string was matched.\n\
2000 Use `store-match-data' to reinstate the data in this list.\n\
2001 \n\
2002 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2003 \(rather than markers) to represent buffer positions.\n\
2004 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough\n\
2005 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2006   (integers, reuse)
2007      Lisp_Object integers, reuse;
2008 {
2009   Lisp_Object tail, prev;
2010   Lisp_Object *data;
2011   int i, len;
2012
2013   if (NILP (last_thing_searched))
2014     return Qnil;
2015
2016   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2017                                  * sizeof (Lisp_Object));
2018
2019   len = -1;
2020   for (i = 0; i < search_regs.num_regs; i++)
2021     {
2022       int start = search_regs.start[i];
2023       if (start >= 0)
2024         {
2025           if (EQ (last_thing_searched, Qt)
2026               || ! NILP (integers))
2027             {
2028               XSETFASTINT (data[2 * i], start);
2029               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2030             }
2031           else if (BUFFERP (last_thing_searched))
2032             {
2033               data[2 * i] = Fmake_marker ();
2034               Fset_marker (data[2 * i],
2035                            make_number (start),
2036                            last_thing_searched);
2037               data[2 * i + 1] = Fmake_marker ();
2038               Fset_marker (data[2 * i + 1],
2039                            make_number (search_regs.end[i]),
2040                            last_thing_searched);
2041             }
2042           else
2043             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2044             abort ();
2045
2046           len = i;
2047         }
2048       else
2049         data[2 * i] = data [2 * i + 1] = Qnil;
2050     }
2051
2052   /* If REUSE is not usable, cons up the values and return them.  */
2053   if (! CONSP (reuse))
2054     return Flist (2 * len + 2, data);
2055
2056   /* If REUSE is a list, store as many value elements as will fit
2057      into the elements of REUSE.  */
2058   for (i = 0, tail = reuse; CONSP (tail);
2059        i++, tail = XCONS (tail)->cdr)
2060     {
2061       if (i < 2 * len + 2)
2062         XCONS (tail)->car = data[i];
2063       else
2064         XCONS (tail)->car = Qnil;
2065       prev = tail;
2066     }
2067
2068   /* If we couldn't fit all value elements into REUSE,
2069      cons up the rest of them and add them to the end of REUSE.  */
2070   if (i < 2 * len + 2)
2071     XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2072
2073   return reuse;
2074 }
2075
2076
2077 DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
2078   "Set internal data on last search match from elements of LIST.\n\
2079 LIST should have been created by calling `match-data' previously.")
2080   (list)
2081      register Lisp_Object list;
2082 {
2083   register int i;
2084   register Lisp_Object marker;
2085
2086   if (running_asynch_code)
2087     save_search_regs ();
2088
2089   if (!CONSP (list) && !NILP (list))
2090     list = wrong_type_argument (Qconsp, list);
2091
2092   /* Unless we find a marker with a buffer in LIST, assume that this
2093      match data came from a string.  */
2094   last_thing_searched = Qt;
2095
2096   /* Allocate registers if they don't already exist.  */
2097   {
2098     int length = XFASTINT (Flength (list)) / 2;
2099
2100     if (length > search_regs.num_regs)
2101       {
2102         if (search_regs.num_regs == 0)
2103           {
2104             search_regs.start
2105               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2106             search_regs.end
2107               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2108           }
2109         else
2110           {
2111             search_regs.start
2112               = (regoff_t *) xrealloc (search_regs.start,
2113                                        length * sizeof (regoff_t));
2114             search_regs.end
2115               = (regoff_t *) xrealloc (search_regs.end,
2116                                        length * sizeof (regoff_t));
2117           }
2118
2119         search_regs.num_regs = length;
2120       }
2121   }
2122
2123   for (i = 0; i < search_regs.num_regs; i++)
2124     {
2125       marker = Fcar (list);
2126       if (NILP (marker))
2127         {
2128           search_regs.start[i] = -1;
2129           list = Fcdr (list);
2130         }
2131       else
2132         {
2133           if (MARKERP (marker))
2134             {
2135               if (XMARKER (marker)->buffer == 0)
2136                 XSETFASTINT (marker, 0);
2137               else
2138                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2139             }
2140
2141           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2142           search_regs.start[i] = XINT (marker);
2143           list = Fcdr (list);
2144
2145           marker = Fcar (list);
2146           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2147             XSETFASTINT (marker, 0);
2148
2149           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2150           search_regs.end[i] = XINT (marker);
2151         }
2152       list = Fcdr (list);
2153     }
2154
2155   return Qnil;
2156 }
2157
2158 /* If non-zero the match data have been saved in saved_search_regs
2159    during the execution of a sentinel or filter. */
2160 static int search_regs_saved;
2161 static struct re_registers saved_search_regs;
2162
2163 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2164    if asynchronous code (filter or sentinel) is running. */
2165 static void
2166 save_search_regs ()
2167 {
2168   if (!search_regs_saved)
2169     {
2170       saved_search_regs.num_regs = search_regs.num_regs;
2171       saved_search_regs.start = search_regs.start;
2172       saved_search_regs.end = search_regs.end;
2173       search_regs.num_regs = 0;
2174       search_regs.start = 0;
2175       search_regs.end = 0;
2176
2177       search_regs_saved = 1;
2178     }
2179 }
2180
2181 /* Called upon exit from filters and sentinels. */
2182 void
2183 restore_match_data ()
2184 {
2185   if (search_regs_saved)
2186     {
2187       if (search_regs.num_regs > 0)
2188         {
2189           xfree (search_regs.start);
2190           xfree (search_regs.end);
2191         }
2192       search_regs.num_regs = saved_search_regs.num_regs;
2193       search_regs.start = saved_search_regs.start;
2194       search_regs.end = saved_search_regs.end;
2195
2196       search_regs_saved = 0;
2197     }
2198 }
2199
2200 /* Quote a string to inactivate reg-expr chars */
2201
2202 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2203   "Return a regexp string which matches exactly STRING and nothing else.")
2204   (string)
2205      Lisp_Object string;
2206 {
2207   register unsigned char *in, *out, *end;
2208   register unsigned char *temp;
2209   int backslashes_added = 0;
2210
2211   CHECK_STRING (string, 0);
2212
2213   temp = (unsigned char *) alloca (XSTRING (string)->size_byte * 2);
2214
2215   /* Now copy the data into the new string, inserting escapes. */
2216
2217   in = XSTRING (string)->data;
2218   end = in + XSTRING (string)->size_byte;
2219   out = temp;
2220
2221   for (; in != end; in++)
2222     {
2223       if (*in == '[' || *in == ']'
2224           || *in == '*' || *in == '.' || *in == '\\'
2225           || *in == '?' || *in == '+'
2226           || *in == '^' || *in == '$')
2227         *out++ = '\\', backslashes_added++;
2228       *out++ = *in;
2229     }
2230
2231   return make_multibyte_string (temp,
2232                                 XSTRING (string)->size + backslashes_added,
2233                                 out - temp);
2234 }
2235 \f
2236 syms_of_search ()
2237 {
2238   register int i;
2239
2240   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2241     {
2242       searchbufs[i].buf.allocated = 100;
2243       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2244       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2245       searchbufs[i].regexp = Qnil;
2246       staticpro (&searchbufs[i].regexp);
2247       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2248     }
2249   searchbuf_head = &searchbufs[0];
2250
2251   Qsearch_failed = intern ("search-failed");
2252   staticpro (&Qsearch_failed);
2253   Qinvalid_regexp = intern ("invalid-regexp");
2254   staticpro (&Qinvalid_regexp);
2255
2256   Fput (Qsearch_failed, Qerror_conditions,
2257         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2258   Fput (Qsearch_failed, Qerror_message,
2259         build_string ("Search failed"));
2260
2261   Fput (Qinvalid_regexp, Qerror_conditions,
2262         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2263   Fput (Qinvalid_regexp, Qerror_message,
2264         build_string ("Invalid regexp"));
2265
2266   last_thing_searched = Qnil;
2267   staticpro (&last_thing_searched);
2268
2269   defsubr (&Slooking_at);
2270   defsubr (&Sposix_looking_at);
2271   defsubr (&Sstring_match);
2272   defsubr (&Sposix_string_match);
2273   defsubr (&Ssearch_forward);
2274   defsubr (&Ssearch_backward);
2275   defsubr (&Sword_search_forward);
2276   defsubr (&Sword_search_backward);
2277   defsubr (&Sre_search_forward);
2278   defsubr (&Sre_search_backward);
2279   defsubr (&Sposix_search_forward);
2280   defsubr (&Sposix_search_backward);
2281   defsubr (&Sreplace_match);
2282   defsubr (&Smatch_beginning);
2283   defsubr (&Smatch_end);
2284   defsubr (&Smatch_data);
2285   defsubr (&Sstore_match_data);
2286   defsubr (&Sregexp_quote);
2287 }