(scan_sexps_forward): Prefer 2-char comment-starter over a 1-char one.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
bd2cbd56
SM
2 Copyright (C) 1985, 86,87,93,94,97,98, 1999, 2004
3 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
7c938215 9the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
ca1d1d23
JB
21
22
18160b98 23#include <config.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
5679531d 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc
KH
43 struct regexp_cache *next;
44 Lisp_Object regexp;
45 struct re_pattern_buffer buf;
46 char fastmap[0400];
b819a390
RS
47 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
48 char posix;
487282dc 49};
ca1d1d23 50
487282dc
KH
51/* The instances of that struct. */
52struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 53
487282dc
KH
54/* The head of the linked list; points to the most recently used buffer. */
55struct regexp_cache *searchbuf_head;
ca1d1d23 56
ca1d1d23 57
4746118a
JB
58/* Every call to re_match, etc., must pass &search_regs as the regs
59 argument unless you can show it is unnecessary (i.e., if re_match
60 is certainly going to be called again before region-around-match
61 can be called).
62
63 Since the registers are now dynamically allocated, we need to make
64 sure not to refer to the Nth register before checking that it has
1113d9db
JB
65 been allocated by checking search_regs.num_regs.
66
67 The regex code keeps track of whether it has allocated the search
487282dc
KH
68 buffer using bits in the re_pattern_buffer. This means that whenever
69 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
70 allocated any registers, and will allocate new registers the next
71 time you call a searching or matching function. Therefore, we need
72 to call re_set_registers after compiling a new pattern or after
73 setting the match registers, so that the regex functions will be
74 able to free or re-allocate it properly. */
ca1d1d23
JB
75static struct re_registers search_regs;
76
daa37602
JB
77/* The buffer in which the last search was performed, or
78 Qt if the last search was done in a string;
79 Qnil if no searching has been done yet. */
80static Lisp_Object last_thing_searched;
ca1d1d23 81
8e6208c5 82/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
83
84Lisp_Object Qinvalid_regexp;
85
ca325161 86static void set_search_regs ();
044f81f1 87static void save_search_regs ();
facdc750
RS
88static int simple_search ();
89static int boyer_moore ();
b819a390
RS
90static int search_buffer ();
91
ca1d1d23
JB
92static void
93matcher_overflow ()
94{
95 error ("Stack overflow in regexp matcher");
96}
97
b819a390
RS
98/* Compile a regexp and signal a Lisp error if anything goes wrong.
99 PATTERN is the pattern to compile.
100 CP is the place to put the result.
facdc750 101 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
102 REGP is the structure that says where to store the "register"
103 values that will result from matching this pattern.
104 If it is 0, we should compile the pattern not to record any
105 subexpression bounds.
106 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
107 for this pattern. 0 means backtrack only enough to get a valid match.
108 MULTIBYTE is nonzero if we want to handle multibyte characters in
109 PATTERN. 0 means all multibyte characters are recognized just as
110 sequences of binary data. */
ca1d1d23 111
487282dc 112static void
5679531d 113compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 114 struct regexp_cache *cp;
ca1d1d23 115 Lisp_Object pattern;
facdc750 116 Lisp_Object translate;
487282dc 117 struct re_registers *regp;
b819a390 118 int posix;
5679531d 119 int multibyte;
ca1d1d23 120{
7276d3d8 121 unsigned char *raw_pattern;
f8bd51c4 122 int raw_pattern_size;
d451e4db 123 char *val;
b819a390 124 reg_syntax_t old;
ca1d1d23 125
f8bd51c4
KH
126 /* MULTIBYTE says whether the text to be searched is multibyte.
127 We must convert PATTERN to match that, or we will not really
128 find things right. */
129
130 if (multibyte == STRING_MULTIBYTE (pattern))
131 {
d5db4077
KR
132 raw_pattern = (unsigned char *) SDATA (pattern);
133 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
134 }
135 else if (multibyte)
136 {
d5db4077
KR
137 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
138 SCHARS (pattern));
7276d3d8 139 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
140 copy_text (SDATA (pattern), raw_pattern,
141 SCHARS (pattern), 0, 1);
f8bd51c4
KH
142 }
143 else
144 {
145 /* Converting multibyte to single-byte.
146
147 ??? Perhaps this conversion should be done in a special way
148 by subtracting nonascii-insert-offset from each non-ASCII char,
149 so that only the multibyte chars which really correspond to
150 the chosen single-byte character set can possibly match. */
d5db4077 151 raw_pattern_size = SCHARS (pattern);
7276d3d8 152 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
153 copy_text (SDATA (pattern), raw_pattern,
154 SBYTES (pattern), 1, 0);
f8bd51c4
KH
155 }
156
487282dc 157 cp->regexp = Qnil;
59fab369 158 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 159 cp->posix = posix;
5679531d 160 cp->buf.multibyte = multibyte;
9ac0d9e0 161 BLOCK_INPUT;
fb4a568d 162 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 163 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
7276d3d8
RS
164 val = (char *) re_compile_pattern ((char *)raw_pattern,
165 raw_pattern_size, &cp->buf);
b819a390 166 re_set_syntax (old);
9ac0d9e0 167 UNBLOCK_INPUT;
ca1d1d23 168 if (val)
487282dc 169 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 170
487282dc 171 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
172}
173
6efc7887
RS
174/* Shrink each compiled regexp buffer in the cache
175 to the size actually used right now.
176 This is called from garbage collection. */
177
178void
179shrink_regexp_cache ()
180{
a968f437 181 struct regexp_cache *cp;
6efc7887
RS
182
183 for (cp = searchbuf_head; cp != 0; cp = cp->next)
184 {
185 cp->buf.allocated = cp->buf.used;
186 cp->buf.buffer
b23c0a83 187 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
188 }
189}
190
487282dc 191/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
192 the cache.
193 PATTERN is the pattern to compile.
facdc750 194 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
195 REGP is the structure that says where to store the "register"
196 values that will result from matching this pattern.
197 If it is 0, we should compile the pattern not to record any
198 subexpression bounds.
199 POSIX is nonzero if we want full backtracking (POSIX style)
200 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
201
202struct re_pattern_buffer *
0c8533c6 203compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
204 Lisp_Object pattern;
205 struct re_registers *regp;
facdc750 206 Lisp_Object translate;
0c8533c6 207 int posix, multibyte;
487282dc
KH
208{
209 struct regexp_cache *cp, **cpp;
210
211 for (cpp = &searchbuf_head; ; cpp = &cp->next)
212 {
213 cp = *cpp;
f1b9c7c1
KR
214 /* Entries are initialized to nil, and may be set to nil by
215 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
216 string accessors in those cases. However, compile_pattern_1
217 is only applied to the cache entry we pick here to reuse. So
218 nil should never appear before a non-nil entry. */
7c752c80 219 if (NILP (cp->regexp))
f1b9c7c1 220 goto compile_it;
d5db4077 221 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 222 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 223 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 224 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d
KH
225 && cp->posix == posix
226 && cp->buf.multibyte == multibyte)
487282dc
KH
227 break;
228
f1b9c7c1
KR
229 /* If we're at the end of the cache, compile into the nil cell
230 we found, or the last (least recently used) cell with a
231 string value. */
487282dc
KH
232 if (cp->next == 0)
233 {
f1b9c7c1 234 compile_it:
5679531d 235 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
236 break;
237 }
238 }
239
240 /* When we get here, cp (aka *cpp) contains the compiled pattern,
241 either because we found it in the cache or because we just compiled it.
242 Move it to the front of the queue to mark it as most recently used. */
243 *cpp = cp->next;
244 cp->next = searchbuf_head;
245 searchbuf_head = cp;
1113d9db 246
6639708c
RS
247 /* Advise the searching functions about the space we have allocated
248 for register data. */
249 if (regp)
250 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
251
487282dc 252 return &cp->buf;
ca1d1d23
JB
253}
254
255/* Error condition used for failing searches */
256Lisp_Object Qsearch_failed;
257
258Lisp_Object
259signal_failure (arg)
260 Lisp_Object arg;
261{
262 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
263 return Qnil;
264}
265\f
b819a390
RS
266static Lisp_Object
267looking_at_1 (string, posix)
ca1d1d23 268 Lisp_Object string;
b819a390 269 int posix;
ca1d1d23
JB
270{
271 Lisp_Object val;
272 unsigned char *p1, *p2;
273 int s1, s2;
274 register int i;
487282dc 275 struct re_pattern_buffer *bufp;
ca1d1d23 276
7074fde6
FP
277 if (running_asynch_code)
278 save_search_regs ();
279
b7826503 280 CHECK_STRING (string);
487282dc
KH
281 bufp = compile_pattern (string, &search_regs,
282 (!NILP (current_buffer->case_fold_search)
facdc750 283 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
284 posix,
285 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
286
287 immediate_quit = 1;
288 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
289
290 /* Get pointers and sizes of the two strings
291 that make up the visible portion of the buffer. */
292
293 p1 = BEGV_ADDR;
fa8ed3e0 294 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 295 p2 = GAP_END_ADDR;
fa8ed3e0 296 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
297 if (s1 < 0)
298 {
299 p2 = p1;
fa8ed3e0 300 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
301 s1 = 0;
302 }
303 if (s2 < 0)
304 {
fa8ed3e0 305 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
306 s2 = 0;
307 }
8bb43c28
RS
308
309 re_match_object = Qnil;
177c0ea7 310
487282dc 311 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
312 PT_BYTE - BEGV_BYTE, &search_regs,
313 ZV_BYTE - BEGV_BYTE);
de182d70 314 immediate_quit = 0;
177c0ea7 315
ca1d1d23
JB
316 if (i == -2)
317 matcher_overflow ();
318
319 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
320 if (i >= 0)
321 for (i = 0; i < search_regs.num_regs; i++)
322 if (search_regs.start[i] >= 0)
323 {
324 search_regs.start[i]
325 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
326 search_regs.end[i]
327 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
328 }
a3668d92 329 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
330 return val;
331}
332
b819a390 333DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
334 doc: /* Return t if text after point matches regular expression REGEXP.
335This function modifies the match data that `match-beginning',
336`match-end' and `match-data' access; save and restore the match
337data if you want to preserve them. */)
338 (regexp)
94f94972 339 Lisp_Object regexp;
b819a390 340{
94f94972 341 return looking_at_1 (regexp, 0);
b819a390
RS
342}
343
344DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
345 doc: /* Return t if text after point matches regular expression REGEXP.
346Find the longest match, in accord with Posix regular expression rules.
347This function modifies the match data that `match-beginning',
348`match-end' and `match-data' access; save and restore the match
349data if you want to preserve them. */)
350 (regexp)
94f94972 351 Lisp_Object regexp;
b819a390 352{
94f94972 353 return looking_at_1 (regexp, 1);
b819a390
RS
354}
355\f
356static Lisp_Object
357string_match_1 (regexp, string, start, posix)
ca1d1d23 358 Lisp_Object regexp, string, start;
b819a390 359 int posix;
ca1d1d23
JB
360{
361 int val;
487282dc 362 struct re_pattern_buffer *bufp;
0c8533c6
RS
363 int pos, pos_byte;
364 int i;
ca1d1d23 365
7074fde6
FP
366 if (running_asynch_code)
367 save_search_regs ();
368
b7826503
PJ
369 CHECK_STRING (regexp);
370 CHECK_STRING (string);
ca1d1d23
JB
371
372 if (NILP (start))
0c8533c6 373 pos = 0, pos_byte = 0;
ca1d1d23
JB
374 else
375 {
d5db4077 376 int len = SCHARS (string);
ca1d1d23 377
b7826503 378 CHECK_NUMBER (start);
0c8533c6
RS
379 pos = XINT (start);
380 if (pos < 0 && -pos <= len)
381 pos = len + pos;
382 else if (0 > pos || pos > len)
ca1d1d23 383 args_out_of_range (string, start);
0c8533c6 384 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
385 }
386
487282dc
KH
387 bufp = compile_pattern (regexp, &search_regs,
388 (!NILP (current_buffer->case_fold_search)
facdc750 389 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
390 posix,
391 STRING_MULTIBYTE (string));
ca1d1d23 392 immediate_quit = 1;
8bb43c28 393 re_match_object = string;
177c0ea7 394
d5db4077
KR
395 val = re_search (bufp, (char *) SDATA (string),
396 SBYTES (string), pos_byte,
397 SBYTES (string) - pos_byte,
ca1d1d23
JB
398 &search_regs);
399 immediate_quit = 0;
daa37602 400 last_thing_searched = Qt;
ca1d1d23
JB
401 if (val == -2)
402 matcher_overflow ();
403 if (val < 0) return Qnil;
0c8533c6
RS
404
405 for (i = 0; i < search_regs.num_regs; i++)
406 if (search_regs.start[i] >= 0)
407 {
408 search_regs.start[i]
409 = string_byte_to_char (string, search_regs.start[i]);
410 search_regs.end[i]
411 = string_byte_to_char (string, search_regs.end[i]);
412 }
413
414 return make_number (string_byte_to_char (string, val));
ca1d1d23 415}
e59a8453 416
b819a390 417DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077
PJ
418 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
419Case is ignored if `case-fold-search' is non-nil in the current buffer.
420If third arg START is non-nil, start search at that index in STRING.
421For index of first char beyond the match, do (match-end 0).
422`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
423matched by parenthesis constructs in the pattern.
424
425You can use the function `match-string' to extract the substrings
426matched by the parenthesis constructions in REGEXP. */)
8c1a1077 427 (regexp, string, start)
b819a390
RS
428 Lisp_Object regexp, string, start;
429{
430 return string_match_1 (regexp, string, start, 0);
431}
432
433DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
434 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
435Find the longest match, in accord with Posix regular expression rules.
436Case is ignored if `case-fold-search' is non-nil in the current buffer.
437If third arg START is non-nil, start search at that index in STRING.
438For index of first char beyond the match, do (match-end 0).
439`match-end' and `match-beginning' also give indices of substrings
440matched by parenthesis constructs in the pattern. */)
441 (regexp, string, start)
b819a390
RS
442 Lisp_Object regexp, string, start;
443{
444 return string_match_1 (regexp, string, start, 1);
445}
446
e59a8453
RS
447/* Match REGEXP against STRING, searching all of STRING,
448 and return the index of the match, or negative on failure.
449 This does not clobber the match data. */
450
451int
452fast_string_match (regexp, string)
453 Lisp_Object regexp, string;
454{
455 int val;
487282dc 456 struct re_pattern_buffer *bufp;
e59a8453 457
facdc750
RS
458 bufp = compile_pattern (regexp, 0, Qnil,
459 0, STRING_MULTIBYTE (string));
e59a8453 460 immediate_quit = 1;
8bb43c28 461 re_match_object = string;
177c0ea7 462
d5db4077
KR
463 val = re_search (bufp, (char *) SDATA (string),
464 SBYTES (string), 0,
465 SBYTES (string), 0);
e59a8453
RS
466 immediate_quit = 0;
467 return val;
468}
5679531d
KH
469
470/* Match REGEXP against STRING, searching all of STRING ignoring case,
471 and return the index of the match, or negative on failure.
0c8533c6
RS
472 This does not clobber the match data.
473 We assume that STRING contains single-byte characters. */
5679531d
KH
474
475extern Lisp_Object Vascii_downcase_table;
476
477int
b4577c63 478fast_c_string_match_ignore_case (regexp, string)
5679531d 479 Lisp_Object regexp;
96b80561 480 const char *string;
5679531d
KH
481{
482 int val;
483 struct re_pattern_buffer *bufp;
484 int len = strlen (string);
485
0c8533c6 486 regexp = string_make_unibyte (regexp);
b4577c63 487 re_match_object = Qt;
5679531d 488 bufp = compile_pattern (regexp, 0,
facdc750 489 Vascii_downcase_table, 0,
f8bd51c4 490 0);
5679531d
KH
491 immediate_quit = 1;
492 val = re_search (bufp, string, len, 0, len, 0);
493 immediate_quit = 0;
494 return val;
495}
be5f4dfb
KH
496
497/* Like fast_string_match but ignore case. */
498
499int
500fast_string_match_ignore_case (regexp, string)
501 Lisp_Object regexp, string;
502{
503 int val;
504 struct re_pattern_buffer *bufp;
505
506 bufp = compile_pattern (regexp, 0, Vascii_downcase_table,
507 0, STRING_MULTIBYTE (string));
508 immediate_quit = 1;
509 re_match_object = string;
510
511 val = re_search (bufp, (char *) SDATA (string),
512 SBYTES (string), 0,
513 SBYTES (string), 0);
514 immediate_quit = 0;
515 return val;
516}
ca1d1d23 517\f
9169c321
JB
518/* The newline cache: remembering which sections of text have no newlines. */
519
520/* If the user has requested newline caching, make sure it's on.
521 Otherwise, make sure it's off.
522 This is our cheezy way of associating an action with the change of
523 state of a buffer-local variable. */
524static void
525newline_cache_on_off (buf)
526 struct buffer *buf;
527{
528 if (NILP (buf->cache_long_line_scans))
529 {
530 /* It should be off. */
531 if (buf->newline_cache)
532 {
533 free_region_cache (buf->newline_cache);
534 buf->newline_cache = 0;
535 }
536 }
537 else
538 {
539 /* It should be on. */
540 if (buf->newline_cache == 0)
541 buf->newline_cache = new_region_cache ();
542 }
543}
544
545\f
546/* Search for COUNT instances of the character TARGET between START and END.
547
548 If COUNT is positive, search forwards; END must be >= START.
549 If COUNT is negative, search backwards for the -COUNTth instance;
550 END must be <= START.
551 If COUNT is zero, do anything you please; run rogue, for all I care.
552
553 If END is zero, use BEGV or ZV instead, as appropriate for the
554 direction indicated by COUNT.
ffd56f97
JB
555
556 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 557 position past the COUNTth match. Note that for reverse motion
5bfe95c9 558 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 559
9169c321
JB
560 If we don't find COUNT instances before reaching END, set *SHORTAGE
561 to the number of TARGETs left unfound, and return END.
ffd56f97 562
087a5f81
RS
563 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
564 except when inside redisplay. */
565
dfcf069d 566int
9169c321
JB
567scan_buffer (target, start, end, count, shortage, allow_quit)
568 register int target;
569 int start, end;
570 int count;
571 int *shortage;
087a5f81 572 int allow_quit;
ca1d1d23 573{
9169c321 574 struct region_cache *newline_cache;
177c0ea7 575 int direction;
ffd56f97 576
9169c321
JB
577 if (count > 0)
578 {
579 direction = 1;
580 if (! end) end = ZV;
581 }
582 else
583 {
584 direction = -1;
585 if (! end) end = BEGV;
586 }
ffd56f97 587
9169c321
JB
588 newline_cache_on_off (current_buffer);
589 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
590
591 if (shortage != 0)
592 *shortage = 0;
593
087a5f81 594 immediate_quit = allow_quit;
ca1d1d23 595
ffd56f97 596 if (count > 0)
9169c321 597 while (start != end)
ca1d1d23 598 {
9169c321
JB
599 /* Our innermost scanning loop is very simple; it doesn't know
600 about gaps, buffer ends, or the newline cache. ceiling is
601 the position of the last character before the next such
602 obstacle --- the last character the dumb search loop should
603 examine. */
fa8ed3e0
RS
604 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
605 int start_byte = CHAR_TO_BYTE (start);
67ce527d 606 int tem;
9169c321
JB
607
608 /* If we're looking for a newline, consult the newline cache
609 to see where we can avoid some scanning. */
610 if (target == '\n' && newline_cache)
611 {
612 int next_change;
613 immediate_quit = 0;
614 while (region_cache_forward
fa8ed3e0
RS
615 (current_buffer, newline_cache, start_byte, &next_change))
616 start_byte = next_change;
cbe0db0d 617 immediate_quit = allow_quit;
9169c321 618
fa8ed3e0
RS
619 /* START should never be after END. */
620 if (start_byte > ceiling_byte)
621 start_byte = ceiling_byte;
9169c321
JB
622
623 /* Now the text after start is an unknown region, and
624 next_change is the position of the next known region. */
fa8ed3e0 625 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
626 }
627
628 /* The dumb loop can only scan text stored in contiguous
629 bytes. BUFFER_CEILING_OF returns the last character
630 position that is contiguous, so the ceiling is the
631 position after that. */
67ce527d
KH
632 tem = BUFFER_CEILING_OF (start_byte);
633 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
634
635 {
177c0ea7 636 /* The termination address of the dumb loop. */
fa8ed3e0
RS
637 register unsigned char *ceiling_addr
638 = BYTE_POS_ADDR (ceiling_byte) + 1;
639 register unsigned char *cursor
640 = BYTE_POS_ADDR (start_byte);
9169c321
JB
641 unsigned char *base = cursor;
642
643 while (cursor < ceiling_addr)
644 {
645 unsigned char *scan_start = cursor;
646
647 /* The dumb loop. */
648 while (*cursor != target && ++cursor < ceiling_addr)
649 ;
650
651 /* If we're looking for newlines, cache the fact that
652 the region from start to cursor is free of them. */
653 if (target == '\n' && newline_cache)
654 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
655 start_byte + scan_start - base,
656 start_byte + cursor - base);
9169c321
JB
657
658 /* Did we find the target character? */
659 if (cursor < ceiling_addr)
660 {
661 if (--count == 0)
662 {
663 immediate_quit = 0;
fa8ed3e0 664 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
665 }
666 cursor++;
667 }
668 }
669
fa8ed3e0 670 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 671 }
ca1d1d23
JB
672 }
673 else
9169c321
JB
674 while (start > end)
675 {
676 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
677 int ceiling_byte = CHAR_TO_BYTE (end);
678 int start_byte = CHAR_TO_BYTE (start);
67ce527d 679 int tem;
9169c321
JB
680
681 /* Consult the newline cache, if appropriate. */
682 if (target == '\n' && newline_cache)
683 {
684 int next_change;
685 immediate_quit = 0;
686 while (region_cache_backward
fa8ed3e0
RS
687 (current_buffer, newline_cache, start_byte, &next_change))
688 start_byte = next_change;
cbe0db0d 689 immediate_quit = allow_quit;
9169c321
JB
690
691 /* Start should never be at or before end. */
fa8ed3e0
RS
692 if (start_byte <= ceiling_byte)
693 start_byte = ceiling_byte + 1;
9169c321
JB
694
695 /* Now the text before start is an unknown region, and
696 next_change is the position of the next known region. */
fa8ed3e0 697 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
698 }
699
700 /* Stop scanning before the gap. */
67ce527d
KH
701 tem = BUFFER_FLOOR_OF (start_byte - 1);
702 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
703
704 {
705 /* The termination address of the dumb loop. */
fa8ed3e0
RS
706 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
707 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
708 unsigned char *base = cursor;
709
710 while (cursor >= ceiling_addr)
711 {
712 unsigned char *scan_start = cursor;
713
714 while (*cursor != target && --cursor >= ceiling_addr)
715 ;
716
717 /* If we're looking for newlines, cache the fact that
718 the region from after the cursor to start is free of them. */
719 if (target == '\n' && newline_cache)
720 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
721 start_byte + cursor - base,
722 start_byte + scan_start - base);
9169c321
JB
723
724 /* Did we find the target character? */
725 if (cursor >= ceiling_addr)
726 {
727 if (++count >= 0)
728 {
729 immediate_quit = 0;
fa8ed3e0 730 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
731 }
732 cursor--;
733 }
734 }
735
fa8ed3e0 736 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
737 }
738 }
739
ca1d1d23
JB
740 immediate_quit = 0;
741 if (shortage != 0)
ffd56f97 742 *shortage = count * direction;
9169c321 743 return start;
ca1d1d23 744}
fa8ed3e0
RS
745\f
746/* Search for COUNT instances of a line boundary, which means either a
747 newline or (if selective display enabled) a carriage return.
748 Start at START. If COUNT is negative, search backwards.
749
750 We report the resulting position by calling TEMP_SET_PT_BOTH.
751
752 If we find COUNT instances. we position after (always after,
753 even if scanning backwards) the COUNTth match, and return 0.
754
755 If we don't find COUNT instances before reaching the end of the
756 buffer (or the beginning, if scanning backwards), we return
757 the number of line boundaries left unfound, and position at
758 the limit we bumped up against.
759
760 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 761 except in special cases. */
ca1d1d23 762
63fa018d 763int
fa8ed3e0
RS
764scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
765 int start, start_byte;
766 int limit, limit_byte;
767 register int count;
768 int allow_quit;
63fa018d 769{
fa8ed3e0
RS
770 int direction = ((count > 0) ? 1 : -1);
771
772 register unsigned char *cursor;
773 unsigned char *base;
774
775 register int ceiling;
776 register unsigned char *ceiling_addr;
777
d5d57b92
RS
778 int old_immediate_quit = immediate_quit;
779
fa8ed3e0
RS
780 /* The code that follows is like scan_buffer
781 but checks for either newline or carriage return. */
782
d5d57b92
RS
783 if (allow_quit)
784 immediate_quit++;
fa8ed3e0
RS
785
786 start_byte = CHAR_TO_BYTE (start);
787
788 if (count > 0)
789 {
790 while (start_byte < limit_byte)
791 {
792 ceiling = BUFFER_CEILING_OF (start_byte);
793 ceiling = min (limit_byte - 1, ceiling);
794 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
795 base = (cursor = BYTE_POS_ADDR (start_byte));
796 while (1)
797 {
798 while (*cursor != '\n' && ++cursor != ceiling_addr)
799 ;
800
801 if (cursor != ceiling_addr)
802 {
803 if (--count == 0)
804 {
d5d57b92 805 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
806 start_byte = start_byte + cursor - base + 1;
807 start = BYTE_TO_CHAR (start_byte);
808 TEMP_SET_PT_BOTH (start, start_byte);
809 return 0;
810 }
811 else
812 if (++cursor == ceiling_addr)
813 break;
814 }
815 else
816 break;
817 }
818 start_byte += cursor - base;
819 }
820 }
821 else
822 {
fa8ed3e0
RS
823 while (start_byte > limit_byte)
824 {
825 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
826 ceiling = max (limit_byte, ceiling);
827 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
828 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
829 while (1)
830 {
831 while (--cursor != ceiling_addr && *cursor != '\n')
832 ;
833
834 if (cursor != ceiling_addr)
835 {
836 if (++count == 0)
837 {
d5d57b92 838 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
839 /* Return the position AFTER the match we found. */
840 start_byte = start_byte + cursor - base + 1;
841 start = BYTE_TO_CHAR (start_byte);
842 TEMP_SET_PT_BOTH (start, start_byte);
843 return 0;
844 }
845 }
846 else
847 break;
848 }
849 /* Here we add 1 to compensate for the last decrement
850 of CURSOR, which took it past the valid range. */
851 start_byte += cursor - base + 1;
852 }
853 }
854
855 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 856 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
857
858 return count * direction;
63fa018d
RS
859}
860
ca1d1d23 861int
fa8ed3e0 862find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
863 register int from, cnt;
864{
fa8ed3e0 865 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
866}
867
9169c321
JB
868/* Like find_next_newline, but returns position before the newline,
869 not after, and only search up to TO. This isn't just
870 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 871
9169c321
JB
872int
873find_before_next_newline (from, to, cnt)
cbe0db0d 874 int from, to, cnt;
9169c321
JB
875{
876 int shortage;
877 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
878
879 if (shortage == 0)
880 pos--;
177c0ea7 881
9169c321 882 return pos;
ca1d1d23
JB
883}
884\f
ca1d1d23
JB
885/* Subroutines of Lisp buffer search functions. */
886
887static Lisp_Object
b819a390 888search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
889 Lisp_Object string, bound, noerror, count;
890 int direction;
891 int RE;
b819a390 892 int posix;
ca1d1d23
JB
893{
894 register int np;
9f43ad85 895 int lim, lim_byte;
ca1d1d23
JB
896 int n = direction;
897
898 if (!NILP (count))
899 {
b7826503 900 CHECK_NUMBER (count);
ca1d1d23
JB
901 n *= XINT (count);
902 }
903
b7826503 904 CHECK_STRING (string);
ca1d1d23 905 if (NILP (bound))
9f43ad85
RS
906 {
907 if (n > 0)
908 lim = ZV, lim_byte = ZV_BYTE;
909 else
910 lim = BEGV, lim_byte = BEGV_BYTE;
911 }
ca1d1d23
JB
912 else
913 {
b7826503 914 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 915 lim = XINT (bound);
6ec8bbd2 916 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
917 error ("Invalid search bound (wrong side of point)");
918 if (lim > ZV)
9f43ad85 919 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 920 else if (lim < BEGV)
9f43ad85 921 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
922 else
923 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
924 }
925
9f43ad85 926 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 927 (!NILP (current_buffer->case_fold_search)
facdc750 928 ? current_buffer->case_canon_table
3135e9fd 929 : Qnil),
ca1d1d23 930 (!NILP (current_buffer->case_fold_search)
facdc750 931 ? current_buffer->case_eqv_table
3135e9fd 932 : Qnil),
b819a390 933 posix);
ca1d1d23
JB
934 if (np <= 0)
935 {
936 if (NILP (noerror))
937 return signal_failure (string);
938 if (!EQ (noerror, Qt))
939 {
940 if (lim < BEGV || lim > ZV)
941 abort ();
9f43ad85 942 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
943 return Qnil;
944#if 0 /* This would be clean, but maybe programs depend on
945 a value of nil here. */
481399bf 946 np = lim;
a5f217b8 947#endif
ca1d1d23 948 }
481399bf
RS
949 else
950 return Qnil;
ca1d1d23
JB
951 }
952
953 if (np < BEGV || np > ZV)
954 abort ();
955
956 SET_PT (np);
957
958 return make_number (np);
959}
960\f
fa8ed3e0
RS
961/* Return 1 if REGEXP it matches just one constant string. */
962
b6d6a51c
KH
963static int
964trivial_regexp_p (regexp)
965 Lisp_Object regexp;
966{
d5db4077
KR
967 int len = SBYTES (regexp);
968 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
969 while (--len >= 0)
970 {
971 switch (*s++)
972 {
973 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
974 return 0;
975 case '\\':
976 if (--len < 0)
977 return 0;
978 switch (*s++)
979 {
980 case '|': case '(': case ')': case '`': case '\'': case 'b':
981 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 982 case 'S': case '=': case '{': case '}': case '_':
5679531d 983 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 984 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
985 case '6': case '7': case '8': case '9':
986 return 0;
987 }
988 }
989 }
990 return 1;
991}
992
ca325161 993/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 994 starting at position POS and stopping at position LIM,
b819a390 995 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
996 a regular expression if RE is true.
997
998 If N is positive, searching is forward and LIM must be greater than POS.
999 If N is negative, searching is backward and LIM must be less than POS.
1000
facdc750 1001 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1002 or else the position at the beginning of the Nth occurrence
b819a390
RS
1003 (if searching backward) or the end (if searching forward).
1004
1005 POSIX is nonzero if we want full backtracking (POSIX style)
1006 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1007
aff2ce94
RS
1008#define TRANSLATE(out, trt, d) \
1009do \
1010 { \
1011 if (! NILP (trt)) \
1012 { \
1013 Lisp_Object temp; \
1014 temp = Faref (trt, make_number (d)); \
1015 if (INTEGERP (temp)) \
1016 out = XINT (temp); \
1017 else \
1018 out = d; \
1019 } \
1020 else \
1021 out = d; \
1022 } \
1023while (0)
facdc750 1024
b819a390 1025static int
9f43ad85
RS
1026search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1027 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1028 Lisp_Object string;
1029 int pos;
9f43ad85 1030 int pos_byte;
ca1d1d23 1031 int lim;
9f43ad85 1032 int lim_byte;
ca1d1d23
JB
1033 int n;
1034 int RE;
facdc750
RS
1035 Lisp_Object trt;
1036 Lisp_Object inverse_trt;
b819a390 1037 int posix;
ca1d1d23 1038{
d5db4077
KR
1039 int len = SCHARS (string);
1040 int len_byte = SBYTES (string);
facdc750 1041 register int i;
ca1d1d23 1042
7074fde6
FP
1043 if (running_asynch_code)
1044 save_search_regs ();
1045
a7e4cdde 1046 /* Searching 0 times means don't move. */
ca1d1d23 1047 /* Null string is found at starting position. */
a7e4cdde 1048 if (len == 0 || n == 0)
ca325161 1049 {
0353b28f 1050 set_search_regs (pos_byte, 0);
ca325161
RS
1051 return pos;
1052 }
3f57a499 1053
b6d6a51c 1054 if (RE && !trivial_regexp_p (string))
ca1d1d23 1055 {
facdc750
RS
1056 unsigned char *p1, *p2;
1057 int s1, s2;
487282dc
KH
1058 struct re_pattern_buffer *bufp;
1059
0c8533c6
RS
1060 bufp = compile_pattern (string, &search_regs, trt, posix,
1061 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1062
ca1d1d23
JB
1063 immediate_quit = 1; /* Quit immediately if user types ^G,
1064 because letting this function finish
1065 can take too long. */
1066 QUIT; /* Do a pending quit right away,
1067 to avoid paradoxical behavior */
1068 /* Get pointers and sizes of the two strings
1069 that make up the visible portion of the buffer. */
1070
1071 p1 = BEGV_ADDR;
fa8ed3e0 1072 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1073 p2 = GAP_END_ADDR;
fa8ed3e0 1074 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1075 if (s1 < 0)
1076 {
1077 p2 = p1;
fa8ed3e0 1078 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1079 s1 = 0;
1080 }
1081 if (s2 < 0)
1082 {
fa8ed3e0 1083 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1084 s2 = 0;
1085 }
8bb43c28 1086 re_match_object = Qnil;
177c0ea7 1087
ca1d1d23
JB
1088 while (n < 0)
1089 {
42db823b 1090 int val;
487282dc 1091 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1092 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1093 &search_regs,
42db823b 1094 /* Don't allow match past current point */
4996330b 1095 pos_byte - BEGV_BYTE);
ca1d1d23 1096 if (val == -2)
b6d6a51c
KH
1097 {
1098 matcher_overflow ();
1099 }
ca1d1d23
JB
1100 if (val >= 0)
1101 {
26aff150 1102 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1103 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1104 if (search_regs.start[i] >= 0)
1105 {
fa8ed3e0
RS
1106 search_regs.start[i]
1107 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1108 search_regs.end[i]
1109 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1110 }
a3668d92 1111 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1112 /* Set pos to the new position. */
1113 pos = search_regs.start[0];
1114 }
1115 else
1116 {
1117 immediate_quit = 0;
1118 return (n);
1119 }
1120 n++;
1121 }
1122 while (n > 0)
1123 {
42db823b 1124 int val;
487282dc 1125 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1126 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1127 &search_regs,
1128 lim_byte - BEGV_BYTE);
ca1d1d23 1129 if (val == -2)
b6d6a51c
KH
1130 {
1131 matcher_overflow ();
1132 }
ca1d1d23
JB
1133 if (val >= 0)
1134 {
26aff150 1135 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1136 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1137 if (search_regs.start[i] >= 0)
1138 {
fa8ed3e0
RS
1139 search_regs.start[i]
1140 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1141 search_regs.end[i]
1142 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1143 }
a3668d92 1144 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1145 pos = search_regs.end[0];
1146 }
1147 else
1148 {
1149 immediate_quit = 0;
1150 return (0 - n);
1151 }
1152 n--;
1153 }
1154 immediate_quit = 0;
1155 return (pos);
1156 }
1157 else /* non-RE case */
1158 {
facdc750
RS
1159 unsigned char *raw_pattern, *pat;
1160 int raw_pattern_size;
1161 int raw_pattern_size_byte;
1162 unsigned char *patbuf;
1163 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
d5db4077 1164 unsigned char *base_pat = SDATA (string);
facdc750 1165 int charset_base = -1;
040272ce 1166 int boyer_moore_ok = 1;
facdc750
RS
1167
1168 /* MULTIBYTE says whether the text to be searched is multibyte.
1169 We must convert PATTERN to match that, or we will not really
1170 find things right. */
1171
1172 if (multibyte == STRING_MULTIBYTE (string))
1173 {
d5db4077
KR
1174 raw_pattern = (unsigned char *) SDATA (string);
1175 raw_pattern_size = SCHARS (string);
1176 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1177 }
1178 else if (multibyte)
1179 {
d5db4077 1180 raw_pattern_size = SCHARS (string);
facdc750 1181 raw_pattern_size_byte
d5db4077 1182 = count_size_as_multibyte (SDATA (string),
facdc750 1183 raw_pattern_size);
7276d3d8 1184 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1185 copy_text (SDATA (string), raw_pattern,
1186 SCHARS (string), 0, 1);
facdc750
RS
1187 }
1188 else
1189 {
1190 /* Converting multibyte to single-byte.
1191
1192 ??? Perhaps this conversion should be done in a special way
1193 by subtracting nonascii-insert-offset from each non-ASCII char,
1194 so that only the multibyte chars which really correspond to
1195 the chosen single-byte character set can possibly match. */
d5db4077
KR
1196 raw_pattern_size = SCHARS (string);
1197 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1198 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1199 copy_text (SDATA (string), raw_pattern,
1200 SBYTES (string), 1, 0);
facdc750
RS
1201 }
1202
1203 /* Copy and optionally translate the pattern. */
1204 len = raw_pattern_size;
1205 len_byte = raw_pattern_size_byte;
1206 patbuf = (unsigned char *) alloca (len_byte);
1207 pat = patbuf;
1208 base_pat = raw_pattern;
1209 if (multibyte)
1210 {
1211 while (--len >= 0)
1212 {
daaa6ed8 1213 unsigned char str[MAX_MULTIBYTE_LENGTH];
aff2ce94 1214 int c, translated, inverse;
facdc750
RS
1215 int in_charlen, charlen;
1216
1217 /* If we got here and the RE flag is set, it's because we're
1218 dealing with a regexp known to be trivial, so the backslash
1219 just quotes the next character. */
1220 if (RE && *base_pat == '\\')
1221 {
1222 len--;
1223 len_byte--;
1224 base_pat++;
1225 }
1226
1227 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1228
facdc750 1229 /* Translate the character, if requested. */
aff2ce94 1230 TRANSLATE (translated, trt, c);
facdc750
RS
1231 /* If translation changed the byte-length, go back
1232 to the original character. */
daaa6ed8 1233 charlen = CHAR_STRING (translated, str);
facdc750
RS
1234 if (in_charlen != charlen)
1235 {
1236 translated = c;
daaa6ed8 1237 charlen = CHAR_STRING (c, str);
facdc750
RS
1238 }
1239
5ffaf437
RS
1240 /* If we are searching for something strange,
1241 an invalid multibyte code, don't use boyer-moore. */
1242 if (! ASCII_BYTE_P (translated)
1243 && (charlen == 1 /* 8bit code */
1244 || charlen != in_charlen /* invalid multibyte code */
1245 ))
1246 boyer_moore_ok = 0;
1247
aff2ce94
RS
1248 TRANSLATE (inverse, inverse_trt, c);
1249
facdc750
RS
1250 /* Did this char actually get translated?
1251 Would any other char get translated into it? */
aff2ce94 1252 if (translated != c || inverse != c)
facdc750
RS
1253 {
1254 /* Keep track of which character set row
1255 contains the characters that need translation. */
5ffaf437 1256 int charset_base_code = c & ~CHAR_FIELD3_MASK;
d2ac725b
KH
1257 int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;
1258
1259 if (charset_base_code != inverse_charset_base)
1260 boyer_moore_ok = 0;
1261 else if (charset_base == -1)
facdc750
RS
1262 charset_base = charset_base_code;
1263 else if (charset_base != charset_base_code)
1264 /* If two different rows appear, needing translation,
1265 then we cannot use boyer_moore search. */
040272ce 1266 boyer_moore_ok = 0;
aff2ce94 1267 }
facdc750
RS
1268
1269 /* Store this character into the translated pattern. */
1270 bcopy (str, pat, charlen);
1271 pat += charlen;
1272 base_pat += in_charlen;
1273 len_byte -= in_charlen;
1274 }
1275 }
1276 else
1277 {
040272ce
KH
1278 /* Unibyte buffer. */
1279 charset_base = 0;
facdc750
RS
1280 while (--len >= 0)
1281 {
040272ce 1282 int c, translated;
facdc750
RS
1283
1284 /* If we got here and the RE flag is set, it's because we're
1285 dealing with a regexp known to be trivial, so the backslash
1286 just quotes the next character. */
1287 if (RE && *base_pat == '\\')
1288 {
1289 len--;
1290 base_pat++;
1291 }
1292 c = *base_pat++;
aff2ce94 1293 TRANSLATE (translated, trt, c);
facdc750
RS
1294 *pat++ = translated;
1295 }
1296 }
1297
1298 len_byte = pat - patbuf;
1299 len = raw_pattern_size;
1300 pat = base_pat = patbuf;
1301
040272ce 1302 if (boyer_moore_ok)
facdc750 1303 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1304 pos, pos_byte, lim, lim_byte,
1305 charset_base);
facdc750
RS
1306 else
1307 return simple_search (n, pat, len, len_byte, trt,
1308 pos, pos_byte, lim, lim_byte);
1309 }
1310}
1311\f
1312/* Do a simple string search N times for the string PAT,
1313 whose length is LEN/LEN_BYTE,
1314 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1315 TRT is the translation table.
f8bd51c4 1316
facdc750
RS
1317 Return the character position where the match is found.
1318 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1319
facdc750
RS
1320 This kind of search works regardless of what is in PAT and
1321 regardless of what is in TRT. It is used in cases where
1322 boyer_moore cannot work. */
1323
1324static int
1325simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1326 int n;
1327 unsigned char *pat;
1328 int len, len_byte;
1329 Lisp_Object trt;
1330 int pos, pos_byte;
1331 int lim, lim_byte;
1332{
1333 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1334 int forward = n > 0;
facdc750
RS
1335
1336 if (lim > pos && multibyte)
1337 while (n > 0)
1338 {
1339 while (1)
f8bd51c4 1340 {
facdc750
RS
1341 /* Try matching at position POS. */
1342 int this_pos = pos;
1343 int this_pos_byte = pos_byte;
1344 int this_len = len;
1345 int this_len_byte = len_byte;
1346 unsigned char *p = pat;
1347 if (pos + len > lim)
1348 goto stop;
1349
1350 while (this_len > 0)
1351 {
1352 int charlen, buf_charlen;
ab228c24 1353 int pat_ch, buf_ch;
facdc750 1354
ab228c24 1355 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1356 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1357 ZV_BYTE - this_pos_byte,
1358 buf_charlen);
aff2ce94 1359 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1360
1361 if (buf_ch != pat_ch)
1362 break;
ab228c24
RS
1363
1364 this_len_byte -= charlen;
1365 this_len--;
1366 p += charlen;
1367
1368 this_pos_byte += buf_charlen;
1369 this_pos++;
facdc750
RS
1370 }
1371
1372 if (this_len == 0)
1373 {
1374 pos += len;
1375 pos_byte += len_byte;
1376 break;
1377 }
1378
1379 INC_BOTH (pos, pos_byte);
f8bd51c4 1380 }
facdc750
RS
1381
1382 n--;
1383 }
1384 else if (lim > pos)
1385 while (n > 0)
1386 {
1387 while (1)
f8bd51c4 1388 {
facdc750
RS
1389 /* Try matching at position POS. */
1390 int this_pos = pos;
1391 int this_len = len;
1392 unsigned char *p = pat;
1393
1394 if (pos + len > lim)
1395 goto stop;
1396
1397 while (this_len > 0)
1398 {
1399 int pat_ch = *p++;
1400 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1401 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1402
1403 if (buf_ch != pat_ch)
1404 break;
ab228c24
RS
1405
1406 this_len--;
1407 this_pos++;
facdc750
RS
1408 }
1409
1410 if (this_len == 0)
1411 {
1412 pos += len;
1413 break;
1414 }
1415
1416 pos++;
f8bd51c4 1417 }
facdc750
RS
1418
1419 n--;
1420 }
1421 /* Backwards search. */
1422 else if (lim < pos && multibyte)
1423 while (n < 0)
1424 {
1425 while (1)
f8bd51c4 1426 {
facdc750
RS
1427 /* Try matching at position POS. */
1428 int this_pos = pos - len;
1429 int this_pos_byte = pos_byte - len_byte;
1430 int this_len = len;
1431 int this_len_byte = len_byte;
1432 unsigned char *p = pat;
1433
1434 if (pos - len < lim)
1435 goto stop;
1436
1437 while (this_len > 0)
1438 {
1439 int charlen, buf_charlen;
ab228c24 1440 int pat_ch, buf_ch;
facdc750 1441
ab228c24 1442 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1443 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1444 ZV_BYTE - this_pos_byte,
1445 buf_charlen);
aff2ce94 1446 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1447
1448 if (buf_ch != pat_ch)
1449 break;
ab228c24
RS
1450
1451 this_len_byte -= charlen;
1452 this_len--;
1453 p += charlen;
1454 this_pos_byte += buf_charlen;
1455 this_pos++;
facdc750
RS
1456 }
1457
1458 if (this_len == 0)
1459 {
1460 pos -= len;
1461 pos_byte -= len_byte;
1462 break;
1463 }
1464
1465 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1466 }
1467
facdc750
RS
1468 n++;
1469 }
1470 else if (lim < pos)
1471 while (n < 0)
1472 {
1473 while (1)
b6d6a51c 1474 {
facdc750
RS
1475 /* Try matching at position POS. */
1476 int this_pos = pos - len;
1477 int this_len = len;
1478 unsigned char *p = pat;
1479
1480 if (pos - len < lim)
1481 goto stop;
1482
1483 while (this_len > 0)
1484 {
1485 int pat_ch = *p++;
1486 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1487 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1488
1489 if (buf_ch != pat_ch)
1490 break;
ab228c24
RS
1491 this_len--;
1492 this_pos++;
facdc750
RS
1493 }
1494
1495 if (this_len == 0)
b6d6a51c 1496 {
facdc750
RS
1497 pos -= len;
1498 break;
b6d6a51c 1499 }
facdc750
RS
1500
1501 pos--;
b6d6a51c 1502 }
facdc750
RS
1503
1504 n++;
b6d6a51c 1505 }
facdc750
RS
1506
1507 stop:
1508 if (n == 0)
aff2ce94 1509 {
ab228c24
RS
1510 if (forward)
1511 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1512 else
1513 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1514
1515 return pos;
1516 }
facdc750
RS
1517 else if (n > 0)
1518 return -n;
1519 else
1520 return n;
1521}
1522\f
1523/* Do Boyer-Moore search N times for the string PAT,
1524 whose length is LEN/LEN_BYTE,
1525 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1526 DIRECTION says which direction we search in.
1527 TRT and INVERSE_TRT are translation tables.
1528
1529 This kind of search works if all the characters in PAT that have
1530 nontrivial translation are the same aside from the last byte. This
1531 makes it possible to translate just the last byte of a character,
1532 and do so after just a simple test of the context.
1533
1534 If that criterion is not satisfied, do not call this function. */
1535
1536static int
1537boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1538 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1539 int n;
1540 unsigned char *base_pat;
1541 int len, len_byte;
1542 Lisp_Object trt;
1543 Lisp_Object inverse_trt;
1544 int pos, pos_byte;
1545 int lim, lim_byte;
aff2ce94 1546 int charset_base;
facdc750
RS
1547{
1548 int direction = ((n > 0) ? 1 : -1);
1549 register int dirlen;
a968f437 1550 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1551 register int *BM_tab;
1552 int *BM_tab_base;
177c0ea7 1553 register unsigned char *cursor, *p_limit;
facdc750 1554 register int i, j;
cb6792d2 1555 unsigned char *pat, *pat_end;
facdc750
RS
1556 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1557
1558 unsigned char simple_translate[0400];
6bbd7a29
GM
1559 int translate_prev_byte = 0;
1560 int translate_anteprev_byte = 0;
facdc750
RS
1561
1562#ifdef C_ALLOCA
1563 int BM_tab_space[0400];
1564 BM_tab = &BM_tab_space[0];
1565#else
1566 BM_tab = (int *) alloca (0400 * sizeof (int));
1567#endif
1568 /* The general approach is that we are going to maintain that we know */
1569 /* the first (closest to the present position, in whatever direction */
1570 /* we're searching) character that could possibly be the last */
1571 /* (furthest from present position) character of a valid match. We */
1572 /* advance the state of our knowledge by looking at that character */
1573 /* and seeing whether it indeed matches the last character of the */
1574 /* pattern. If it does, we take a closer look. If it does not, we */
1575 /* move our pointer (to putative last characters) as far as is */
1576 /* logically possible. This amount of movement, which I call a */
1577 /* stride, will be the length of the pattern if the actual character */
1578 /* appears nowhere in the pattern, otherwise it will be the distance */
1579 /* from the last occurrence of that character to the end of the */
1580 /* pattern. */
1581 /* As a coding trick, an enormous stride is coded into the table for */
1582 /* characters that match the last character. This allows use of only */
1583 /* a single test, a test for having gone past the end of the */
1584 /* permissible match region, to test for both possible matches (when */
1585 /* the stride goes past the end immediately) and failure to */
177c0ea7 1586 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1587
1588 /* Here we make a "mickey mouse" BM table. The stride of the search */
1589 /* is determined only by the last character of the putative match. */
1590 /* If that character does not match, we will stride the proper */
1591 /* distance to propose a match that superimposes it on the last */
1592 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1593 /* it entirely if there is none. */
facdc750
RS
1594
1595 dirlen = len_byte * direction;
1596 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1597
1598 /* Record position after the end of the pattern. */
1599 pat_end = base_pat + len_byte;
1600 /* BASE_PAT points to a character that we start scanning from.
1601 It is the first character in a forward search,
1602 the last character in a backward search. */
facdc750 1603 if (direction < 0)
cb6792d2
RS
1604 base_pat = pat_end - 1;
1605
facdc750
RS
1606 BM_tab_base = BM_tab;
1607 BM_tab += 0400;
1608 j = dirlen; /* to get it in a register */
1609 /* A character that does not appear in the pattern induces a */
1610 /* stride equal to the pattern length. */
1611 while (BM_tab_base != BM_tab)
1612 {
1613 *--BM_tab = j;
1614 *--BM_tab = j;
1615 *--BM_tab = j;
1616 *--BM_tab = j;
1617 }
1618
1619 /* We use this for translation, instead of TRT itself.
1620 We fill this in to handle the characters that actually
1621 occur in the pattern. Others don't matter anyway! */
1622 bzero (simple_translate, sizeof simple_translate);
1623 for (i = 0; i < 0400; i++)
1624 simple_translate[i] = i;
1625
1626 i = 0;
1627 while (i != infinity)
1628 {
cb6792d2 1629 unsigned char *ptr = base_pat + i;
facdc750
RS
1630 i += direction;
1631 if (i == dirlen)
1632 i = infinity;
1633 if (! NILP (trt))
ca1d1d23 1634 {
facdc750 1635 int ch;
aff2ce94 1636 int untranslated;
facdc750
RS
1637 int this_translated = 1;
1638
1639 if (multibyte
cb6792d2
RS
1640 /* Is *PTR the last byte of a character? */
1641 && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1642 {
facdc750
RS
1643 unsigned char *charstart = ptr;
1644 while (! CHAR_HEAD_P (*charstart))
1645 charstart--;
aff2ce94 1646 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
6397418a 1647 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
facdc750 1648 {
ab228c24 1649 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1650 if (! CHAR_HEAD_P (*ptr))
1651 {
1652 translate_prev_byte = ptr[-1];
1653 if (! CHAR_HEAD_P (translate_prev_byte))
1654 translate_anteprev_byte = ptr[-2];
1655 }
facdc750 1656 }
aff2ce94 1657 else
ab228c24
RS
1658 {
1659 this_translated = 0;
1660 ch = *ptr;
1661 }
ca1d1d23 1662 }
facdc750 1663 else if (!multibyte)
aff2ce94 1664 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1665 else
1666 {
facdc750
RS
1667 ch = *ptr;
1668 this_translated = 0;
ca1d1d23 1669 }
facdc750 1670
ab228c24
RS
1671 if (ch > 0400)
1672 j = ((unsigned char) ch) | 0200;
1673 else
1674 j = (unsigned char) ch;
1675
facdc750
RS
1676 if (i == infinity)
1677 stride_for_teases = BM_tab[j];
ab228c24 1678
facdc750
RS
1679 BM_tab[j] = dirlen - i;
1680 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1681 /* comment following downcase_table for details */
facdc750 1682 if (this_translated)
ab228c24
RS
1683 {
1684 int starting_ch = ch;
1685 int starting_j = j;
1686 while (1)
1687 {
1688 TRANSLATE (ch, inverse_trt, ch);
1689 if (ch > 0400)
1690 j = ((unsigned char) ch) | 0200;
1691 else
1692 j = (unsigned char) ch;
1693
1694 /* For all the characters that map into CH,
1695 set up simple_translate to map the last byte
1696 into STARTING_J. */
1697 simple_translate[j] = starting_j;
1698 if (ch == starting_ch)
1699 break;
1700 BM_tab[j] = dirlen - i;
1701 }
1702 }
facdc750
RS
1703 }
1704 else
1705 {
1706 j = *ptr;
1707
1708 if (i == infinity)
1709 stride_for_teases = BM_tab[j];
1710 BM_tab[j] = dirlen - i;
ca1d1d23 1711 }
facdc750
RS
1712 /* stride_for_teases tells how much to stride if we get a */
1713 /* match on the far character but are subsequently */
1714 /* disappointed, by recording what the stride would have been */
1715 /* for that character if the last character had been */
1716 /* different. */
1717 }
1718 infinity = dirlen - infinity;
1719 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1720 /* loop invariant - POS_BYTE points at where last char (first
1721 char if reverse) of pattern would align in a possible match. */
1722 while (n != 0)
1723 {
1724 int tail_end;
1725 unsigned char *tail_end_ptr;
1726
1727 /* It's been reported that some (broken) compiler thinks that
1728 Boolean expressions in an arithmetic context are unsigned.
1729 Using an explicit ?1:0 prevents this. */
1730 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1731 < 0)
1732 return (n * (0 - direction));
1733 /* First we do the part we can by pointers (maybe nothing) */
1734 QUIT;
1735 pat = base_pat;
1736 limit = pos_byte - dirlen + direction;
67ce527d
KH
1737 if (direction > 0)
1738 {
1739 limit = BUFFER_CEILING_OF (limit);
1740 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1741 can take on without hitting edge of buffer or the gap. */
1742 limit = min (limit, pos_byte + 20000);
1743 limit = min (limit, lim_byte - 1);
1744 }
1745 else
1746 {
1747 limit = BUFFER_FLOOR_OF (limit);
1748 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1749 can take on without hitting edge of buffer or the gap. */
1750 limit = max (limit, pos_byte - 20000);
1751 limit = max (limit, lim_byte);
1752 }
facdc750
RS
1753 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1754 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1755
1756 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1757 {
facdc750
RS
1758 unsigned char *p2;
1759
1760 p_limit = BYTE_POS_ADDR (limit);
1761 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1762 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1763 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1764 {
facdc750 1765 if (direction > 0) /* worth duplicating */
ca1d1d23 1766 {
facdc750
RS
1767 /* Use signed comparison if appropriate
1768 to make cursor+infinity sure to be > p_limit.
1769 Assuming that the buffer lies in a range of addresses
1770 that are all "positive" (as ints) or all "negative",
1771 either kind of comparison will work as long
1772 as we don't step by infinity. So pick the kind
1773 that works when we do step by infinity. */
1774 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1775 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1776 cursor += BM_tab[*cursor];
ca1d1d23 1777 else
facdc750
RS
1778 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1779 cursor += BM_tab[*cursor];
1780 }
1781 else
1782 {
1783 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1784 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1785 cursor += BM_tab[*cursor];
1786 else
1787 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1788 cursor += BM_tab[*cursor];
1789 }
ca1d1d23 1790/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1791/* This can happen if you match on the far character of the pattern, */
1792/* because the "stride" of that character is infinity, a number able */
1793/* to throw you well beyond the end of the search. It can also */
1794/* happen if you fail to match within the permitted region and would */
1795/* otherwise try a character beyond that region */
1796 if ((cursor - p_limit) * direction <= len_byte)
1797 break; /* a small overrun is genuine */
1798 cursor -= infinity; /* large overrun = hit */
1799 i = dirlen - direction;
1800 if (! NILP (trt))
1801 {
1802 while ((i -= direction) + direction != 0)
ca1d1d23 1803 {
facdc750
RS
1804 int ch;
1805 cursor -= direction;
1806 /* Translate only the last byte of a character. */
1807 if (! multibyte
1808 || ((cursor == tail_end_ptr
1809 || CHAR_HEAD_P (cursor[1]))
1810 && (CHAR_HEAD_P (cursor[0])
1811 || (translate_prev_byte == cursor[-1]
1812 && (CHAR_HEAD_P (translate_prev_byte)
1813 || translate_anteprev_byte == cursor[-2])))))
1814 ch = simple_translate[*cursor];
1815 else
1816 ch = *cursor;
1817 if (pat[i] != ch)
1818 break;
ca1d1d23 1819 }
facdc750
RS
1820 }
1821 else
1822 {
1823 while ((i -= direction) + direction != 0)
ca1d1d23 1824 {
facdc750
RS
1825 cursor -= direction;
1826 if (pat[i] != *cursor)
1827 break;
ca1d1d23 1828 }
facdc750
RS
1829 }
1830 cursor += dirlen - i - direction; /* fix cursor */
1831 if (i + direction == 0)
1832 {
1833 int position;
0c8533c6 1834
facdc750 1835 cursor -= direction;
1113d9db 1836
facdc750
RS
1837 position = pos_byte + cursor - p2 + ((direction > 0)
1838 ? 1 - len_byte : 0);
1839 set_search_regs (position, len_byte);
ca325161 1840
facdc750
RS
1841 if ((n -= direction) != 0)
1842 cursor += dirlen; /* to resume search */
ca1d1d23 1843 else
facdc750
RS
1844 return ((direction > 0)
1845 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1846 }
facdc750
RS
1847 else
1848 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1849 }
facdc750
RS
1850 pos_byte += cursor - p2;
1851 }
1852 else
1853 /* Now we'll pick up a clump that has to be done the hard */
1854 /* way because it covers a discontinuity */
1855 {
1856 limit = ((direction > 0)
1857 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1858 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1859 limit = ((direction > 0)
1860 ? min (limit + len_byte, lim_byte - 1)
1861 : max (limit - len_byte, lim_byte));
1862 /* LIMIT is now the last value POS_BYTE can have
1863 and still be valid for a possible match. */
1864 while (1)
ca1d1d23 1865 {
facdc750
RS
1866 /* This loop can be coded for space rather than */
1867 /* speed because it will usually run only once. */
1868 /* (the reach is at most len + 21, and typically */
177c0ea7 1869 /* does not exceed len) */
facdc750
RS
1870 while ((limit - pos_byte) * direction >= 0)
1871 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1872 /* now run the same tests to distinguish going off the */
1873 /* end, a match or a phony match. */
1874 if ((pos_byte - limit) * direction <= len_byte)
1875 break; /* ran off the end */
1876 /* Found what might be a match.
1877 Set POS_BYTE back to last (first if reverse) pos. */
1878 pos_byte -= infinity;
1879 i = dirlen - direction;
1880 while ((i -= direction) + direction != 0)
ca1d1d23 1881 {
facdc750
RS
1882 int ch;
1883 unsigned char *ptr;
1884 pos_byte -= direction;
1885 ptr = BYTE_POS_ADDR (pos_byte);
1886 /* Translate only the last byte of a character. */
1887 if (! multibyte
1888 || ((ptr == tail_end_ptr
1889 || CHAR_HEAD_P (ptr[1]))
1890 && (CHAR_HEAD_P (ptr[0])
1891 || (translate_prev_byte == ptr[-1]
1892 && (CHAR_HEAD_P (translate_prev_byte)
1893 || translate_anteprev_byte == ptr[-2])))))
1894 ch = simple_translate[*ptr];
1895 else
1896 ch = *ptr;
1897 if (pat[i] != ch)
1898 break;
1899 }
1900 /* Above loop has moved POS_BYTE part or all the way
1901 back to the first pos (last pos if reverse).
1902 Set it once again at the last (first if reverse) char. */
1903 pos_byte += dirlen - i- direction;
1904 if (i + direction == 0)
1905 {
1906 int position;
1907 pos_byte -= direction;
1113d9db 1908
facdc750 1909 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1910
facdc750 1911 set_search_regs (position, len_byte);
ca325161 1912
facdc750
RS
1913 if ((n -= direction) != 0)
1914 pos_byte += dirlen; /* to resume search */
ca1d1d23 1915 else
facdc750
RS
1916 return ((direction > 0)
1917 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1918 }
facdc750
RS
1919 else
1920 pos_byte += stride_for_teases;
1921 }
1922 }
1923 /* We have done one clump. Can we continue? */
1924 if ((lim_byte - pos_byte) * direction < 0)
1925 return ((0 - n) * direction);
ca1d1d23 1926 }
facdc750 1927 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1928}
ca325161 1929
fa8ed3e0 1930/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
1931 for the overall match just found in the current buffer.
1932 Also clear out the match data for registers 1 and up. */
ca325161
RS
1933
1934static void
fa8ed3e0
RS
1935set_search_regs (beg_byte, nbytes)
1936 int beg_byte, nbytes;
ca325161 1937{
a7e4cdde
RS
1938 int i;
1939
ca325161
RS
1940 /* Make sure we have registers in which to store
1941 the match position. */
1942 if (search_regs.num_regs == 0)
1943 {
2d4a771a
RS
1944 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1945 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1946 search_regs.num_regs = 2;
ca325161
RS
1947 }
1948
a7e4cdde
RS
1949 /* Clear out the other registers. */
1950 for (i = 1; i < search_regs.num_regs; i++)
1951 {
1952 search_regs.start[i] = -1;
1953 search_regs.end[i] = -1;
1954 }
1955
fa8ed3e0
RS
1956 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1957 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1958 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1959}
ca1d1d23
JB
1960\f
1961/* Given a string of words separated by word delimiters,
1962 compute a regexp that matches those exact words
1963 separated by arbitrary punctuation. */
1964
1965static Lisp_Object
1966wordify (string)
1967 Lisp_Object string;
1968{
1969 register unsigned char *p, *o;
0c8533c6 1970 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1971 Lisp_Object val;
0c8533c6
RS
1972 int prev_c = 0;
1973 int adjust;
ca1d1d23 1974
b7826503 1975 CHECK_STRING (string);
d5db4077
KR
1976 p = SDATA (string);
1977 len = SCHARS (string);
ca1d1d23 1978
0c8533c6
RS
1979 for (i = 0, i_byte = 0; i < len; )
1980 {
1981 int c;
177c0ea7 1982
eb99a8dd 1983 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
1984
1985 if (SYNTAX (c) != Sword)
1986 {
1987 punct_count++;
1988 if (i > 0 && SYNTAX (prev_c) == Sword)
1989 word_count++;
1990 }
ca1d1d23 1991
0c8533c6
RS
1992 prev_c = c;
1993 }
1994
1995 if (SYNTAX (prev_c) == Sword)
1996 word_count++;
1997 if (!word_count)
b07b65aa 1998 return empty_string;
0c8533c6
RS
1999
2000 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
2001 if (STRING_MULTIBYTE (string))
2002 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2003 SBYTES (string)
8a2df937
RS
2004 + adjust);
2005 else
2006 val = make_uninit_string (len + adjust);
ca1d1d23 2007
d5db4077 2008 o = SDATA (val);
ca1d1d23
JB
2009 *o++ = '\\';
2010 *o++ = 'b';
1e9582d4 2011 prev_c = 0;
ca1d1d23 2012
1e9582d4
RS
2013 for (i = 0, i_byte = 0; i < len; )
2014 {
2015 int c;
2016 int i_byte_orig = i_byte;
177c0ea7 2017
eb99a8dd 2018 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2019
2020 if (SYNTAX (c) == Sword)
2021 {
5d69fe10 2022 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2023 i_byte - i_byte_orig);
2024 o += i_byte - i_byte_orig;
2025 }
2026 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2027 {
2028 *o++ = '\\';
2029 *o++ = 'W';
2030 *o++ = '\\';
2031 *o++ = 'W';
2032 *o++ = '*';
2033 }
2034
2035 prev_c = c;
2036 }
ca1d1d23
JB
2037
2038 *o++ = '\\';
2039 *o++ = 'b';
2040
2041 return val;
2042}
2043\f
2044DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2045 "MSearch backward: ",
2046 doc: /* Search backward from point for STRING.
2047Set point to the beginning of the occurrence found, and return point.
2048An optional second argument bounds the search; it is a buffer position.
2049The match found must not extend before that position.
2050Optional third argument, if t, means if fail just return nil (no error).
2051 If not nil and not t, position at limit of search and return nil.
2052Optional fourth argument is repeat count--search for successive occurrences.
2053
2054Search case-sensitivity is determined by the value of the variable
2055`case-fold-search', which see.
2056
2057See also the functions `match-beginning', `match-end' and `replace-match'. */)
2058 (string, bound, noerror, count)
ca1d1d23
JB
2059 Lisp_Object string, bound, noerror, count;
2060{
b819a390 2061 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2062}
2063
6af43974 2064DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2065 doc: /* Search forward from point for STRING.
2066Set point to the end of the occurrence found, and return point.
2067An optional second argument bounds the search; it is a buffer position.
2068The match found must not extend after that position. nil is equivalent
2069 to (point-max).
2070Optional third argument, if t, means if fail just return nil (no error).
2071 If not nil and not t, move to limit of search and return nil.
2072Optional fourth argument is repeat count--search for successive occurrences.
2073
2074Search case-sensitivity is determined by the value of the variable
2075`case-fold-search', which see.
2076
2077See also the functions `match-beginning', `match-end' and `replace-match'. */)
2078 (string, bound, noerror, count)
ca1d1d23
JB
2079 Lisp_Object string, bound, noerror, count;
2080{
b819a390 2081 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2082}
2083
2084DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2085 "sWord search backward: ",
2086 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2087Set point to the beginning of the occurrence found, and return point.
2088An optional second argument bounds the search; it is a buffer position.
2089The match found must not extend before that position.
2090Optional third argument, if t, means if fail just return nil (no error).
2091 If not nil and not t, move to limit of search and return nil.
2092Optional fourth argument is repeat count--search for successive occurrences. */)
2093 (string, bound, noerror, count)
ca1d1d23
JB
2094 Lisp_Object string, bound, noerror, count;
2095{
b819a390 2096 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2097}
2098
2099DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2100 "sWord search: ",
2101 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2102Set point to the end of the occurrence found, and return point.
2103An optional second argument bounds the search; it is a buffer position.
2104The match found must not extend after that position.
2105Optional third argument, if t, means if fail just return nil (no error).
2106 If not nil and not t, move to limit of search and return nil.
2107Optional fourth argument is repeat count--search for successive occurrences. */)
2108 (string, bound, noerror, count)
ca1d1d23
JB
2109 Lisp_Object string, bound, noerror, count;
2110{
b819a390 2111 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2112}
2113
2114DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2115 "sRE search backward: ",
2116 doc: /* Search backward from point for match for regular expression REGEXP.
2117Set point to the beginning of the match, and return point.
2118The match found is the one starting last in the buffer
2119and yet ending before the origin of the search.
2120An optional second argument bounds the search; it is a buffer position.
2121The match found must start at or after that position.
2122Optional third argument, if t, means if fail just return nil (no error).
2123 If not nil and not t, move to limit of search and return nil.
2124Optional fourth argument is repeat count--search for successive occurrences.
2125See also the functions `match-beginning', `match-end', `match-string',
2126and `replace-match'. */)
2127 (regexp, bound, noerror, count)
19c0a730 2128 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2129{
b819a390 2130 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2131}
2132
2133DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2134 "sRE search: ",
2135 doc: /* Search forward from point for regular expression REGEXP.
2136Set point to the end of the occurrence found, and return point.
2137An optional second argument bounds the search; it is a buffer position.
2138The match found must not extend after that position.
2139Optional third argument, if t, means if fail just return nil (no error).
2140 If not nil and not t, move to limit of search and return nil.
2141Optional fourth argument is repeat count--search for successive occurrences.
2142See also the functions `match-beginning', `match-end', `match-string',
2143and `replace-match'. */)
2144 (regexp, bound, noerror, count)
19c0a730 2145 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2146{
b819a390
RS
2147 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2148}
2149
2150DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2151 "sPosix search backward: ",
2152 doc: /* Search backward from point for match for regular expression REGEXP.
2153Find the longest match in accord with Posix regular expression rules.
2154Set point to the beginning of the match, and return point.
2155The match found is the one starting last in the buffer
2156and yet ending before the origin of the search.
2157An optional second argument bounds the search; it is a buffer position.
2158The match found must start at or after that position.
2159Optional third argument, if t, means if fail just return nil (no error).
2160 If not nil and not t, move to limit of search and return nil.
2161Optional fourth argument is repeat count--search for successive occurrences.
2162See also the functions `match-beginning', `match-end', `match-string',
2163and `replace-match'. */)
2164 (regexp, bound, noerror, count)
b819a390
RS
2165 Lisp_Object regexp, bound, noerror, count;
2166{
2167 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2168}
2169
2170DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2171 "sPosix search: ",
2172 doc: /* Search forward from point for regular expression REGEXP.
2173Find the longest match in accord with Posix regular expression rules.
2174Set point to the end of the occurrence found, and return point.
2175An optional second argument bounds the search; it is a buffer position.
2176The match found must not extend after that position.
2177Optional third argument, if t, means if fail just return nil (no error).
2178 If not nil and not t, move to limit of search and return nil.
2179Optional fourth argument is repeat count--search for successive occurrences.
2180See also the functions `match-beginning', `match-end', `match-string',
2181and `replace-match'. */)
2182 (regexp, bound, noerror, count)
b819a390
RS
2183 Lisp_Object regexp, bound, noerror, count;
2184{
2185 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2186}
2187\f
d7a5ad5f 2188DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2189 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2190Leave point at the end of the replacement text.
2191
8c1a1077
PJ
2192If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2193Otherwise maybe capitalize the whole text, or maybe just word initials,
2194based on the replaced text.
2195If the replaced text has only capital letters
2196and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2197Otherwise if all words are capitalized in the replaced text,
2198capitalize each word in NEWTEXT.
8c1a1077
PJ
2199
2200If third arg LITERAL is non-nil, insert NEWTEXT literally.
2201Otherwise treat `\\' as special:
2202 `\\&' in NEWTEXT means substitute original matched text.
2203 `\\N' means substitute what matched the Nth `\\(...\\)'.
2204 If Nth parens didn't match, substitute nothing.
2205 `\\\\' means insert one `\\'.
4dd0c271
RS
2206Case conversion does not apply to these substitutions.
2207
8c1a1077 2208FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2209
2210The optional fourth argument STRING can be a string to modify.
2211This is meaningful when the previous match was done against STRING,
2212using `string-match'. When used this way, `replace-match'
2213creates and returns a new string made by copying STRING and replacing
2214the part of STRING that was matched.
2215
2216The optional fifth argument SUBEXP specifies a subexpression;
2217it says to replace just that subexpression with NEWTEXT,
2218rather than replacing the entire matched text.
2219This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2220`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2221NEWTEXT in place of subexp N.
2222This is useful only after a regular expression search or match,
2223since only regular expressions have distinguished subexpressions. */)
2224 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2225 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2226{
2227 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2228 register int pos, pos_byte;
ca1d1d23 2229 int some_multiletter_word;
97832bd0 2230 int some_lowercase;
73dc8771 2231 int some_uppercase;
208767c3 2232 int some_nonuppercase_initial;
ca1d1d23 2233 register int c, prevc;
d7a5ad5f 2234 int sub;
3e18eecf 2235 int opoint, newpoint;
ca1d1d23 2236
b7826503 2237 CHECK_STRING (newtext);
ca1d1d23 2238
080c45fd 2239 if (! NILP (string))
b7826503 2240 CHECK_STRING (string);
080c45fd 2241
ca1d1d23
JB
2242 case_action = nochange; /* We tried an initialization */
2243 /* but some C compilers blew it */
4746118a
JB
2244
2245 if (search_regs.num_regs <= 0)
2246 error ("replace-match called before any match found");
2247
d7a5ad5f
RS
2248 if (NILP (subexp))
2249 sub = 0;
2250 else
2251 {
b7826503 2252 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2253 sub = XINT (subexp);
2254 if (sub < 0 || sub >= search_regs.num_regs)
2255 args_out_of_range (subexp, make_number (search_regs.num_regs));
2256 }
2257
080c45fd
RS
2258 if (NILP (string))
2259 {
d7a5ad5f
RS
2260 if (search_regs.start[sub] < BEGV
2261 || search_regs.start[sub] > search_regs.end[sub]
2262 || search_regs.end[sub] > ZV)
2263 args_out_of_range (make_number (search_regs.start[sub]),
2264 make_number (search_regs.end[sub]));
080c45fd
RS
2265 }
2266 else
2267 {
d7a5ad5f
RS
2268 if (search_regs.start[sub] < 0
2269 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2270 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2271 args_out_of_range (make_number (search_regs.start[sub]),
2272 make_number (search_regs.end[sub]));
080c45fd 2273 }
ca1d1d23
JB
2274
2275 if (NILP (fixedcase))
2276 {
2277 /* Decide how to casify by examining the matched text. */
ac3b28b1 2278 int last;
ca1d1d23 2279
ac3b28b1
KH
2280 pos = search_regs.start[sub];
2281 last = search_regs.end[sub];
fa8ed3e0
RS
2282
2283 if (NILP (string))
ac3b28b1 2284 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2285 else
ac3b28b1 2286 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2287
ca1d1d23
JB
2288 prevc = '\n';
2289 case_action = all_caps;
2290
2291 /* some_multiletter_word is set nonzero if any original word
2292 is more than one letter long. */
2293 some_multiletter_word = 0;
97832bd0 2294 some_lowercase = 0;
208767c3 2295 some_nonuppercase_initial = 0;
73dc8771 2296 some_uppercase = 0;
ca1d1d23 2297
ac3b28b1 2298 while (pos < last)
ca1d1d23 2299 {
080c45fd 2300 if (NILP (string))
ac3b28b1
KH
2301 {
2302 c = FETCH_CHAR (pos_byte);
2303 INC_BOTH (pos, pos_byte);
2304 }
080c45fd 2305 else
ac3b28b1 2306 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2307
ca1d1d23
JB
2308 if (LOWERCASEP (c))
2309 {
2310 /* Cannot be all caps if any original char is lower case */
2311
97832bd0 2312 some_lowercase = 1;
ca1d1d23 2313 if (SYNTAX (prevc) != Sword)
208767c3 2314 some_nonuppercase_initial = 1;
ca1d1d23
JB
2315 else
2316 some_multiletter_word = 1;
2317 }
2318 else if (!NOCASEP (c))
2319 {
73dc8771 2320 some_uppercase = 1;
97832bd0 2321 if (SYNTAX (prevc) != Sword)
c4d460ce 2322 ;
97832bd0 2323 else
ca1d1d23
JB
2324 some_multiletter_word = 1;
2325 }
208767c3
RS
2326 else
2327 {
2328 /* If the initial is a caseless word constituent,
2329 treat that like a lowercase initial. */
2330 if (SYNTAX (prevc) != Sword)
2331 some_nonuppercase_initial = 1;
2332 }
ca1d1d23
JB
2333
2334 prevc = c;
2335 }
2336
97832bd0
RS
2337 /* Convert to all caps if the old text is all caps
2338 and has at least one multiletter word. */
2339 if (! some_lowercase && some_multiletter_word)
2340 case_action = all_caps;
c4d460ce 2341 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2342 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2343 case_action = cap_initial;
208767c3 2344 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2345 /* Should x -> yz, operating on X, give Yz or YZ?
2346 We'll assume the latter. */
2347 case_action = all_caps;
97832bd0
RS
2348 else
2349 case_action = nochange;
ca1d1d23
JB
2350 }
2351
080c45fd
RS
2352 /* Do replacement in a string. */
2353 if (!NILP (string))
2354 {
2355 Lisp_Object before, after;
2356
2357 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2358 make_number (search_regs.start[sub]));
2359 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2360
636a5e28
RS
2361 /* Substitute parts of the match into NEWTEXT
2362 if desired. */
080c45fd
RS
2363 if (NILP (literal))
2364 {
d131e79c
RS
2365 int lastpos = 0;
2366 int lastpos_byte = 0;
080c45fd
RS
2367 /* We build up the substituted string in ACCUM. */
2368 Lisp_Object accum;
2369 Lisp_Object middle;
d5db4077 2370 int length = SBYTES (newtext);
080c45fd
RS
2371
2372 accum = Qnil;
2373
ac3b28b1 2374 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2375 {
2376 int substart = -1;
6bbd7a29 2377 int subend = 0;
1e79ec24 2378 int delbackslash = 0;
080c45fd 2379
0c8533c6
RS
2380 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2381
080c45fd
RS
2382 if (c == '\\')
2383 {
0c8533c6 2384 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2385
080c45fd
RS
2386 if (c == '&')
2387 {
d7a5ad5f
RS
2388 substart = search_regs.start[sub];
2389 subend = search_regs.end[sub];
080c45fd 2390 }
5fbbc83b 2391 else if (c >= '1' && c <= '9')
080c45fd 2392 {
5fbbc83b
RS
2393 if (search_regs.start[c - '0'] >= 0
2394 && c <= search_regs.num_regs + '0')
080c45fd
RS
2395 {
2396 substart = search_regs.start[c - '0'];
2397 subend = search_regs.end[c - '0'];
2398 }
5fbbc83b
RS
2399 else
2400 {
2401 /* If that subexp did not match,
2402 replace \\N with nothing. */
2403 substart = 0;
2404 subend = 0;
2405 }
080c45fd 2406 }
1e79ec24
KH
2407 else if (c == '\\')
2408 delbackslash = 1;
636a5e28
RS
2409 else
2410 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2411 }
2412 if (substart >= 0)
2413 {
d131e79c
RS
2414 if (pos - 2 != lastpos)
2415 middle = substring_both (newtext, lastpos,
2416 lastpos_byte,
2417 pos - 2, pos_byte - 2);
080c45fd
RS
2418 else
2419 middle = Qnil;
2420 accum = concat3 (accum, middle,
0c8533c6
RS
2421 Fsubstring (string,
2422 make_number (substart),
080c45fd
RS
2423 make_number (subend)));
2424 lastpos = pos;
0c8533c6 2425 lastpos_byte = pos_byte;
080c45fd 2426 }
1e79ec24
KH
2427 else if (delbackslash)
2428 {
d131e79c
RS
2429 middle = substring_both (newtext, lastpos,
2430 lastpos_byte,
2431 pos - 1, pos_byte - 1);
0c8533c6 2432
1e79ec24
KH
2433 accum = concat2 (accum, middle);
2434 lastpos = pos;
0c8533c6 2435 lastpos_byte = pos_byte;
1e79ec24 2436 }
080c45fd
RS
2437 }
2438
d131e79c
RS
2439 if (pos != lastpos)
2440 middle = substring_both (newtext, lastpos,
2441 lastpos_byte,
0c8533c6 2442 pos, pos_byte);
080c45fd
RS
2443 else
2444 middle = Qnil;
2445
2446 newtext = concat2 (accum, middle);
2447 }
2448
636a5e28 2449 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2450 if (case_action == all_caps)
2451 newtext = Fupcase (newtext);
2452 else if (case_action == cap_initial)
2b2eead9 2453 newtext = Fupcase_initials (newtext);
080c45fd
RS
2454
2455 return concat3 (before, newtext, after);
2456 }
2457
09c4719e 2458 /* Record point, then move (quietly) to the start of the match. */
9160906f 2459 if (PT >= search_regs.end[sub])
b0eba991 2460 opoint = PT - ZV;
9160906f
RS
2461 else if (PT > search_regs.start[sub])
2462 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2463 else
2464 opoint = PT;
2465
886ed6ec
RS
2466 /* If we want non-literal replacement,
2467 perform substitution on the replacement string. */
2468 if (NILP (literal))
ca1d1d23 2469 {
d5db4077 2470 int length = SBYTES (newtext);
68e69fbd
RS
2471 unsigned char *substed;
2472 int substed_alloc_size, substed_len;
3bc25e52
KH
2473 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2474 int str_multibyte = STRING_MULTIBYTE (newtext);
2475 Lisp_Object rev_tbl;
886ed6ec 2476 int really_changed = 0;
3bc25e52
KH
2477
2478 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2479 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2480 make_number (0))
2481 : Qnil);
ac3b28b1 2482
68e69fbd
RS
2483 substed_alloc_size = length * 2 + 100;
2484 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2485 substed_len = 0;
2486
3bc25e52
KH
2487 /* Go thru NEWTEXT, producing the actual text to insert in
2488 SUBSTED while adjusting multibyteness to that of the current
2489 buffer. */
ca1d1d23 2490
ac3b28b1 2491 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2492 {
68e69fbd 2493 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2494 unsigned char *add_stuff = NULL;
2495 int add_len = 0;
68e69fbd 2496 int idx = -1;
9a76659d 2497
3bc25e52
KH
2498 if (str_multibyte)
2499 {
eb99a8dd 2500 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2501 if (!buf_multibyte)
2502 c = multibyte_char_to_unibyte (c, rev_tbl);
2503 }
2504 else
2505 {
2506 /* Note that we don't have to increment POS. */
5d69fe10 2507 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2508 if (buf_multibyte)
2509 c = unibyte_char_to_multibyte (c);
2510 }
ac3b28b1 2511
68e69fbd
RS
2512 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2513 or set IDX to a match index, which means put that part
2514 of the buffer text into SUBSTED. */
2515
ca1d1d23
JB
2516 if (c == '\\')
2517 {
886ed6ec
RS
2518 really_changed = 1;
2519
3bc25e52
KH
2520 if (str_multibyte)
2521 {
eb99a8dd
KH
2522 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2523 pos, pos_byte);
3bc25e52
KH
2524 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2525 c = multibyte_char_to_unibyte (c, rev_tbl);
2526 }
2527 else
2528 {
d5db4077 2529 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2530 if (buf_multibyte)
2531 c = unibyte_char_to_multibyte (c);
2532 }
2533
ca1d1d23 2534 if (c == '&')
68e69fbd 2535 idx = sub;
78445046 2536 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2537 {
2538 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2539 idx = c - '0';
ca1d1d23 2540 }
636a5e28 2541 else if (c == '\\')
68e69fbd 2542 add_len = 1, add_stuff = "\\";
636a5e28 2543 else
3bc25e52
KH
2544 {
2545 xfree (substed);
2546 error ("Invalid use of `\\' in replacement text");
2547 }
ca1d1d23
JB
2548 }
2549 else
68e69fbd
RS
2550 {
2551 add_len = CHAR_STRING (c, str);
2552 add_stuff = str;
2553 }
2554
2555 /* If we want to copy part of a previous match,
2556 set up ADD_STUFF and ADD_LEN to point to it. */
2557 if (idx >= 0)
2558 {
2559 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2560 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2561 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2562 move_gap (search_regs.start[idx]);
2563 add_stuff = BYTE_POS_ADDR (begbyte);
2564 }
2565
2566 /* Now the stuff we want to add to SUBSTED
2567 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2568
2569 /* Make sure SUBSTED is big enough. */
2570 if (substed_len + add_len >= substed_alloc_size)
2571 {
2572 substed_alloc_size = substed_len + add_len + 500;
2573 substed = (unsigned char *) xrealloc (substed,
2574 substed_alloc_size + 1);
2575 }
2576
2577 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2578 if (add_stuff)
2579 {
2580 bcopy (add_stuff, substed + substed_len, add_len);
2581 substed_len += add_len;
2582 }
ca1d1d23 2583 }
68e69fbd 2584
886ed6ec 2585 if (really_changed)
80460525
KH
2586 {
2587 if (buf_multibyte)
2588 {
2589 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2590
80460525
KH
2591 newtext = make_multibyte_string (substed, nchars, substed_len);
2592 }
2593 else
2594 newtext = make_unibyte_string (substed, substed_len);
2595 }
68e69fbd 2596 xfree (substed);
ca1d1d23
JB
2597 }
2598
886ed6ec
RS
2599 /* Replace the old text with the new in the cleanest possible way. */
2600 replace_range (search_regs.start[sub], search_regs.end[sub],
2601 newtext, 1, 0, 1);
d5db4077 2602 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2603
2604 if (case_action == all_caps)
886ed6ec
RS
2605 Fupcase_region (make_number (search_regs.start[sub]),
2606 make_number (newpoint));
ca1d1d23 2607 else if (case_action == cap_initial)
886ed6ec
RS
2608 Fupcase_initials_region (make_number (search_regs.start[sub]),
2609 make_number (newpoint));
3e18eecf 2610
98e942e0
RS
2611 /* Adjust search data for this change. */
2612 {
5b88a2c5 2613 int oldend = search_regs.end[sub];
41c01205 2614 int oldstart = search_regs.start[sub];
98e942e0
RS
2615 int change = newpoint - search_regs.end[sub];
2616 int i;
2617
2618 for (i = 0; i < search_regs.num_regs; i++)
2619 {
41c01205 2620 if (search_regs.start[i] >= oldend)
98e942e0 2621 search_regs.start[i] += change;
41c01205
DK
2622 else if (search_regs.start[i] > oldstart)
2623 search_regs.start[i] = oldstart;
2624 if (search_regs.end[i] >= oldend)
98e942e0 2625 search_regs.end[i] += change;
41c01205
DK
2626 else if (search_regs.end[i] > oldstart)
2627 search_regs.end[i] = oldstart;
98e942e0
RS
2628 }
2629 }
2630
b0eba991 2631 /* Put point back where it was in the text. */
8d808a65 2632 if (opoint <= 0)
fa8ed3e0 2633 TEMP_SET_PT (opoint + ZV);
b0eba991 2634 else
fa8ed3e0 2635 TEMP_SET_PT (opoint);
b0eba991
RS
2636
2637 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2638 move_if_not_intangible (newpoint);
177c0ea7 2639
ca1d1d23
JB
2640 return Qnil;
2641}
2642\f
2643static Lisp_Object
2644match_limit (num, beginningp)
2645 Lisp_Object num;
2646 int beginningp;
2647{
2648 register int n;
2649
b7826503 2650 CHECK_NUMBER (num);
ca1d1d23 2651 n = XINT (num);
f90a5bf5 2652 if (n < 0)
bd2cbd56 2653 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2654 if (search_regs.num_regs <= 0)
2655 error ("No match data, because no search succeeded");
9b9ceb61 2656 if (n >= search_regs.num_regs
4746118a 2657 || search_regs.start[n] < 0)
ca1d1d23
JB
2658 return Qnil;
2659 return (make_number ((beginningp) ? search_regs.start[n]
2660 : search_regs.end[n]));
2661}
2662
2663DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2664 doc: /* Return position of start of text matched by last search.
2665SUBEXP, a number, specifies which parenthesized expression in the last
2666 regexp.
2667Value is nil if SUBEXPth pair didn't match, or there were less than
2668 SUBEXP pairs.
2669Zero means the entire text matched by the whole regexp or whole string. */)
2670 (subexp)
5806161b 2671 Lisp_Object subexp;
ca1d1d23 2672{
5806161b 2673 return match_limit (subexp, 1);
ca1d1d23
JB
2674}
2675
2676DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2677 doc: /* Return position of end of text matched by last search.
2678SUBEXP, a number, specifies which parenthesized expression in the last
2679 regexp.
2680Value is nil if SUBEXPth pair didn't match, or there were less than
2681 SUBEXP pairs.
2682Zero means the entire text matched by the whole regexp or whole string. */)
2683 (subexp)
5806161b 2684 Lisp_Object subexp;
ca1d1d23 2685{
5806161b 2686 return match_limit (subexp, 0);
177c0ea7 2687}
ca1d1d23 2688
56256c2a 2689DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
8c1a1077
PJ
2690 doc: /* Return a list containing all info on what the last search matched.
2691Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2692All the elements are markers or nil (nil if the Nth pair didn't match)
2693if the last match was on a buffer; integers or nil if a string was matched.
2694Use `store-match-data' to reinstate the data in this list.
2695
41c01205
DK
2696If INTEGERS (the optional first argument) is non-nil, always use
2697integers \(rather than markers) to represent buffer positions. In
2698this case, and if the last match was in a buffer, the buffer will get
2699stored as one additional element at the end of the list.
2700
8c1a1077 2701If REUSE is a list, reuse it as part of the value. If REUSE is long enough
140a6b7e
KS
2702to hold all the values, and if INTEGERS is non-nil, no consing is done.
2703
2704Return value is undefined if the last search failed. */)
8c1a1077 2705 (integers, reuse)
56256c2a 2706 Lisp_Object integers, reuse;
ca1d1d23 2707{
56256c2a 2708 Lisp_Object tail, prev;
4746118a 2709 Lisp_Object *data;
ca1d1d23
JB
2710 int i, len;
2711
daa37602 2712 if (NILP (last_thing_searched))
c36bcf1b 2713 return Qnil;
daa37602 2714
6bbd7a29
GM
2715 prev = Qnil;
2716
41c01205 2717 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2718 * sizeof (Lisp_Object));
2719
41c01205 2720 len = 0;
4746118a 2721 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2722 {
2723 int start = search_regs.start[i];
2724 if (start >= 0)
2725 {
56256c2a
RS
2726 if (EQ (last_thing_searched, Qt)
2727 || ! NILP (integers))
ca1d1d23 2728 {
c235cce7
KH
2729 XSETFASTINT (data[2 * i], start);
2730 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2731 }
0ed62dc7 2732 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2733 {
2734 data[2 * i] = Fmake_marker ();
daa37602
JB
2735 Fset_marker (data[2 * i],
2736 make_number (start),
2737 last_thing_searched);
ca1d1d23
JB
2738 data[2 * i + 1] = Fmake_marker ();
2739 Fset_marker (data[2 * i + 1],
177c0ea7 2740 make_number (search_regs.end[i]),
daa37602 2741 last_thing_searched);
ca1d1d23 2742 }
daa37602
JB
2743 else
2744 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2745 abort ();
2746
41c01205 2747 len = 2*(i+1);
ca1d1d23
JB
2748 }
2749 else
2750 data[2 * i] = data [2 * i + 1] = Qnil;
2751 }
56256c2a 2752
bd2cbd56 2753 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2754 {
bd2cbd56 2755 data[len] = last_thing_searched;
41c01205
DK
2756 len++;
2757 }
2758
56256c2a
RS
2759 /* If REUSE is not usable, cons up the values and return them. */
2760 if (! CONSP (reuse))
41c01205 2761 return Flist (len, data);
56256c2a
RS
2762
2763 /* If REUSE is a list, store as many value elements as will fit
2764 into the elements of REUSE. */
2765 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2766 i++, tail = XCDR (tail))
56256c2a 2767 {
41c01205 2768 if (i < len)
f3fbd155 2769 XSETCAR (tail, data[i]);
56256c2a 2770 else
f3fbd155 2771 XSETCAR (tail, Qnil);
56256c2a
RS
2772 prev = tail;
2773 }
2774
2775 /* If we couldn't fit all value elements into REUSE,
2776 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2777 if (i < len)
2778 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2779
2780 return reuse;
ca1d1d23
JB
2781}
2782
2783
3f1c005b 2784DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
8c1a1077
PJ
2785 doc: /* Set internal data on last search match from elements of LIST.
2786LIST should have been created by calling `match-data' previously. */)
2787 (list)
ca1d1d23
JB
2788 register Lisp_Object list;
2789{
2790 register int i;
2791 register Lisp_Object marker;
2792
7074fde6
FP
2793 if (running_asynch_code)
2794 save_search_regs ();
2795
ca1d1d23 2796 if (!CONSP (list) && !NILP (list))
b37902c8 2797 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2798
41c01205
DK
2799 /* Unless we find a marker with a buffer or an explicit buffer
2800 in LIST, assume that this match data came from a string. */
daa37602
JB
2801 last_thing_searched = Qt;
2802
4746118a
JB
2803 /* Allocate registers if they don't already exist. */
2804 {
d084e942 2805 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2806
2807 if (length > search_regs.num_regs)
2808 {
1113d9db
JB
2809 if (search_regs.num_regs == 0)
2810 {
2811 search_regs.start
2812 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2813 search_regs.end
2814 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2815 }
4746118a 2816 else
1113d9db
JB
2817 {
2818 search_regs.start
2819 = (regoff_t *) xrealloc (search_regs.start,
2820 length * sizeof (regoff_t));
2821 search_regs.end
2822 = (regoff_t *) xrealloc (search_regs.end,
2823 length * sizeof (regoff_t));
2824 }
4746118a 2825
e62371e9
KH
2826 for (i = search_regs.num_regs; i < length; i++)
2827 search_regs.start[i] = -1;
2828
487282dc 2829 search_regs.num_regs = length;
4746118a 2830 }
ca1d1d23 2831
c3762cbd 2832 for (i = 0;; i++)
41c01205
DK
2833 {
2834 marker = Fcar (list);
bd2cbd56 2835 if (BUFFERP (marker))
c3762cbd 2836 {
bd2cbd56 2837 last_thing_searched = marker;
c3762cbd
DK
2838 break;
2839 }
2840 if (i >= length)
2841 break;
41c01205
DK
2842 if (NILP (marker))
2843 {
2844 search_regs.start[i] = -1;
2845 list = Fcdr (list);
2846 }
2847 else
2848 {
2849 int from;
2850
2851 if (MARKERP (marker))
2852 {
2853 if (XMARKER (marker)->buffer == 0)
2854 XSETFASTINT (marker, 0);
2855 else
2856 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2857 }
2858
2859 CHECK_NUMBER_COERCE_MARKER (marker);
2860 from = XINT (marker);
2861 list = Fcdr (list);
2862
2863 marker = Fcar (list);
2864 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2865 XSETFASTINT (marker, 0);
2866
2867 CHECK_NUMBER_COERCE_MARKER (marker);
2868 search_regs.start[i] = from;
2869 search_regs.end[i] = XINT (marker);
2870 }
2871 list = Fcdr (list);
2872 }
ca1d1d23 2873
41c01205
DK
2874 for (; i < search_regs.num_regs; i++)
2875 search_regs.start[i] = -1;
2876 }
ca1d1d23 2877
177c0ea7 2878 return Qnil;
ca1d1d23
JB
2879}
2880
7074fde6
FP
2881/* If non-zero the match data have been saved in saved_search_regs
2882 during the execution of a sentinel or filter. */
75ebf74b 2883static int search_regs_saved;
7074fde6 2884static struct re_registers saved_search_regs;
41c01205 2885static Lisp_Object saved_last_thing_searched;
7074fde6
FP
2886
2887/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2888 if asynchronous code (filter or sentinel) is running. */
2889static void
2890save_search_regs ()
2891{
2892 if (!search_regs_saved)
2893 {
2894 saved_search_regs.num_regs = search_regs.num_regs;
2895 saved_search_regs.start = search_regs.start;
2896 saved_search_regs.end = search_regs.end;
41c01205
DK
2897 saved_last_thing_searched = last_thing_searched;
2898 last_thing_searched = Qnil;
7074fde6 2899 search_regs.num_regs = 0;
2d4a771a
RS
2900 search_regs.start = 0;
2901 search_regs.end = 0;
7074fde6
FP
2902
2903 search_regs_saved = 1;
2904 }
2905}
2906
2907/* Called upon exit from filters and sentinels. */
2908void
2909restore_match_data ()
2910{
2911 if (search_regs_saved)
2912 {
2913 if (search_regs.num_regs > 0)
2914 {
2915 xfree (search_regs.start);
2916 xfree (search_regs.end);
2917 }
2918 search_regs.num_regs = saved_search_regs.num_regs;
2919 search_regs.start = saved_search_regs.start;
2920 search_regs.end = saved_search_regs.end;
41c01205
DK
2921 last_thing_searched = saved_last_thing_searched;
2922 saved_last_thing_searched = Qnil;
7074fde6
FP
2923 search_regs_saved = 0;
2924 }
2925}
2926
ca1d1d23
JB
2927/* Quote a string to inactivate reg-expr chars */
2928
2929DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
2930 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
2931 (string)
5806161b 2932 Lisp_Object string;
ca1d1d23
JB
2933{
2934 register unsigned char *in, *out, *end;
2935 register unsigned char *temp;
0c8533c6 2936 int backslashes_added = 0;
ca1d1d23 2937
b7826503 2938 CHECK_STRING (string);
ca1d1d23 2939
d5db4077 2940 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
2941
2942 /* Now copy the data into the new string, inserting escapes. */
2943
d5db4077
KR
2944 in = SDATA (string);
2945 end = in + SBYTES (string);
177c0ea7 2946 out = temp;
ca1d1d23
JB
2947
2948 for (; in != end; in++)
2949 {
2950 if (*in == '[' || *in == ']'
2951 || *in == '*' || *in == '.' || *in == '\\'
2952 || *in == '?' || *in == '+'
2953 || *in == '^' || *in == '$')
0c8533c6 2954 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2955 *out++ = *in;
2956 }
2957
3f8100f1 2958 return make_specified_string (temp,
d5db4077 2959 SCHARS (string) + backslashes_added,
3f8100f1
RS
2960 out - temp,
2961 STRING_MULTIBYTE (string));
ca1d1d23 2962}
177c0ea7 2963\f
dfcf069d 2964void
ca1d1d23
JB
2965syms_of_search ()
2966{
2967 register int i;
2968
487282dc
KH
2969 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2970 {
2971 searchbufs[i].buf.allocated = 100;
b23c0a83 2972 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
2973 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2974 searchbufs[i].regexp = Qnil;
2975 staticpro (&searchbufs[i].regexp);
2976 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2977 }
2978 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2979
2980 Qsearch_failed = intern ("search-failed");
2981 staticpro (&Qsearch_failed);
2982 Qinvalid_regexp = intern ("invalid-regexp");
2983 staticpro (&Qinvalid_regexp);
2984
2985 Fput (Qsearch_failed, Qerror_conditions,
2986 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2987 Fput (Qsearch_failed, Qerror_message,
2988 build_string ("Search failed"));
2989
2990 Fput (Qinvalid_regexp, Qerror_conditions,
2991 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2992 Fput (Qinvalid_regexp, Qerror_message,
2993 build_string ("Invalid regexp"));
2994
daa37602
JB
2995 last_thing_searched = Qnil;
2996 staticpro (&last_thing_searched);
2997
0f6af254
DK
2998 saved_last_thing_searched = Qnil;
2999 staticpro (&saved_last_thing_searched);
3000
ca1d1d23 3001 defsubr (&Slooking_at);
b819a390
RS
3002 defsubr (&Sposix_looking_at);
3003 defsubr (&Sstring_match);
3004 defsubr (&Sposix_string_match);
ca1d1d23
JB
3005 defsubr (&Ssearch_forward);
3006 defsubr (&Ssearch_backward);
3007 defsubr (&Sword_search_forward);
3008 defsubr (&Sword_search_backward);
3009 defsubr (&Sre_search_forward);
3010 defsubr (&Sre_search_backward);
b819a390
RS
3011 defsubr (&Sposix_search_forward);
3012 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3013 defsubr (&Sreplace_match);
3014 defsubr (&Smatch_beginning);
3015 defsubr (&Smatch_end);
3016 defsubr (&Smatch_data);
3f1c005b 3017 defsubr (&Sset_match_data);
ca1d1d23
JB
3018 defsubr (&Sregexp_quote);
3019}
ab5796a9
MB
3020
3021/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3022 (do not change this comment) */