Mention C-/.
[bpt/emacs.git] / src / search.c
CommitLineData
ca1d1d23 1/* String search routines for GNU Emacs.
bd2cbd56
SM
2 Copyright (C) 1985, 86,87,93,94,97,98, 1999, 2004
3 Free Software Foundation, Inc.
ca1d1d23
JB
4
5This file is part of GNU Emacs.
6
7GNU Emacs is free software; you can redistribute it and/or modify
8it under the terms of the GNU General Public License as published by
7c938215 9the Free Software Foundation; either version 2, or (at your option)
ca1d1d23
JB
10any later version.
11
12GNU Emacs is distributed in the hope that it will be useful,
13but WITHOUT ANY WARRANTY; without even the implied warranty of
14MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15GNU General Public License for more details.
16
17You should have received a copy of the GNU General Public License
18along with GNU Emacs; see the file COPYING. If not, write to
3b7ad313
EN
19the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20Boston, MA 02111-1307, USA. */
ca1d1d23
JB
21
22
18160b98 23#include <config.h>
ca1d1d23
JB
24#include "lisp.h"
25#include "syntax.h"
5679531d 26#include "category.h"
ca1d1d23 27#include "buffer.h"
5679531d 28#include "charset.h"
9169c321 29#include "region-cache.h"
ca1d1d23 30#include "commands.h"
9ac0d9e0 31#include "blockinput.h"
bf1760bb 32#include "intervals.h"
4746118a 33
ca1d1d23
JB
34#include <sys/types.h>
35#include "regex.h"
36
1d288aef 37#define REGEXP_CACHE_SIZE 20
ca1d1d23 38
487282dc
KH
39/* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
1d288aef
RS
41struct regexp_cache
42{
487282dc 43 struct regexp_cache *next;
ecdb561e 44 Lisp_Object regexp, whitespace_regexp;
487282dc
KH
45 struct re_pattern_buffer buf;
46 char fastmap[0400];
b819a390
RS
47 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
48 char posix;
487282dc 49};
ca1d1d23 50
487282dc
KH
51/* The instances of that struct. */
52struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
ca1d1d23 53
487282dc
KH
54/* The head of the linked list; points to the most recently used buffer. */
55struct regexp_cache *searchbuf_head;
ca1d1d23 56
ca1d1d23 57
4746118a
JB
58/* Every call to re_match, etc., must pass &search_regs as the regs
59 argument unless you can show it is unnecessary (i.e., if re_match
60 is certainly going to be called again before region-around-match
61 can be called).
62
63 Since the registers are now dynamically allocated, we need to make
64 sure not to refer to the Nth register before checking that it has
1113d9db
JB
65 been allocated by checking search_regs.num_regs.
66
67 The regex code keeps track of whether it has allocated the search
487282dc
KH
68 buffer using bits in the re_pattern_buffer. This means that whenever
69 you compile a new pattern, it completely forgets whether it has
1113d9db
JB
70 allocated any registers, and will allocate new registers the next
71 time you call a searching or matching function. Therefore, we need
72 to call re_set_registers after compiling a new pattern or after
73 setting the match registers, so that the regex functions will be
74 able to free or re-allocate it properly. */
ca1d1d23
JB
75static struct re_registers search_regs;
76
daa37602
JB
77/* The buffer in which the last search was performed, or
78 Qt if the last search was done in a string;
79 Qnil if no searching has been done yet. */
80static Lisp_Object last_thing_searched;
ca1d1d23 81
8e6208c5 82/* error condition signaled when regexp compile_pattern fails */
ca1d1d23
JB
83
84Lisp_Object Qinvalid_regexp;
85
f31a9a68
RS
86Lisp_Object Vsearch_whitespace_regexp;
87
ca325161 88static void set_search_regs ();
044f81f1 89static void save_search_regs ();
facdc750
RS
90static int simple_search ();
91static int boyer_moore ();
b819a390
RS
92static int search_buffer ();
93
ca1d1d23
JB
94static void
95matcher_overflow ()
96{
97 error ("Stack overflow in regexp matcher");
98}
99
b819a390
RS
100/* Compile a regexp and signal a Lisp error if anything goes wrong.
101 PATTERN is the pattern to compile.
102 CP is the place to put the result.
facdc750 103 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
104 REGP is the structure that says where to store the "register"
105 values that will result from matching this pattern.
106 If it is 0, we should compile the pattern not to record any
107 subexpression bounds.
108 POSIX is nonzero if we want full backtracking (POSIX style)
5679531d
KH
109 for this pattern. 0 means backtrack only enough to get a valid match.
110 MULTIBYTE is nonzero if we want to handle multibyte characters in
111 PATTERN. 0 means all multibyte characters are recognized just as
ecdb561e
RS
112 sequences of binary data.
113
114 The behavior also depends on Vsearch_whitespace_regexp. */
ca1d1d23 115
487282dc 116static void
5679531d 117compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
487282dc 118 struct regexp_cache *cp;
ca1d1d23 119 Lisp_Object pattern;
facdc750 120 Lisp_Object translate;
487282dc 121 struct re_registers *regp;
b819a390 122 int posix;
5679531d 123 int multibyte;
ca1d1d23 124{
7276d3d8 125 unsigned char *raw_pattern;
f8bd51c4 126 int raw_pattern_size;
d451e4db 127 char *val;
b819a390 128 reg_syntax_t old;
ca1d1d23 129
f8bd51c4
KH
130 /* MULTIBYTE says whether the text to be searched is multibyte.
131 We must convert PATTERN to match that, or we will not really
132 find things right. */
133
134 if (multibyte == STRING_MULTIBYTE (pattern))
135 {
d5db4077
KR
136 raw_pattern = (unsigned char *) SDATA (pattern);
137 raw_pattern_size = SBYTES (pattern);
f8bd51c4
KH
138 }
139 else if (multibyte)
140 {
d5db4077
KR
141 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
142 SCHARS (pattern));
7276d3d8 143 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
144 copy_text (SDATA (pattern), raw_pattern,
145 SCHARS (pattern), 0, 1);
f8bd51c4
KH
146 }
147 else
148 {
149 /* Converting multibyte to single-byte.
150
151 ??? Perhaps this conversion should be done in a special way
152 by subtracting nonascii-insert-offset from each non-ASCII char,
153 so that only the multibyte chars which really correspond to
154 the chosen single-byte character set can possibly match. */
d5db4077 155 raw_pattern_size = SCHARS (pattern);
7276d3d8 156 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
157 copy_text (SDATA (pattern), raw_pattern,
158 SBYTES (pattern), 1, 0);
f8bd51c4
KH
159 }
160
487282dc 161 cp->regexp = Qnil;
59fab369 162 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
b819a390 163 cp->posix = posix;
5679531d 164 cp->buf.multibyte = multibyte;
ecdb561e 165 cp->whitespace_regexp = Vsearch_whitespace_regexp;
9ac0d9e0 166 BLOCK_INPUT;
fb4a568d 167 old = re_set_syntax (RE_SYNTAX_EMACS
b819a390 168 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
f31a9a68
RS
169
170 re_set_whitespace_regexp (NILP (Vsearch_whitespace_regexp) ? NULL
171 : SDATA (Vsearch_whitespace_regexp));
172
7276d3d8
RS
173 val = (char *) re_compile_pattern ((char *)raw_pattern,
174 raw_pattern_size, &cp->buf);
f31a9a68
RS
175
176 re_set_whitespace_regexp (NULL);
177
b819a390 178 re_set_syntax (old);
9ac0d9e0 179 UNBLOCK_INPUT;
ca1d1d23 180 if (val)
487282dc 181 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
1113d9db 182
487282dc 183 cp->regexp = Fcopy_sequence (pattern);
487282dc
KH
184}
185
6efc7887
RS
186/* Shrink each compiled regexp buffer in the cache
187 to the size actually used right now.
188 This is called from garbage collection. */
189
190void
191shrink_regexp_cache ()
192{
a968f437 193 struct regexp_cache *cp;
6efc7887
RS
194
195 for (cp = searchbuf_head; cp != 0; cp = cp->next)
196 {
197 cp->buf.allocated = cp->buf.used;
198 cp->buf.buffer
b23c0a83 199 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
6efc7887
RS
200 }
201}
202
487282dc 203/* Compile a regexp if necessary, but first check to see if there's one in
b819a390
RS
204 the cache.
205 PATTERN is the pattern to compile.
facdc750 206 TRANSLATE is a translation table for ignoring case, or nil for none.
b819a390
RS
207 REGP is the structure that says where to store the "register"
208 values that will result from matching this pattern.
209 If it is 0, we should compile the pattern not to record any
210 subexpression bounds.
211 POSIX is nonzero if we want full backtracking (POSIX style)
212 for this pattern. 0 means backtrack only enough to get a valid match. */
487282dc
KH
213
214struct re_pattern_buffer *
0c8533c6 215compile_pattern (pattern, regp, translate, posix, multibyte)
487282dc
KH
216 Lisp_Object pattern;
217 struct re_registers *regp;
facdc750 218 Lisp_Object translate;
0c8533c6 219 int posix, multibyte;
487282dc
KH
220{
221 struct regexp_cache *cp, **cpp;
222
223 for (cpp = &searchbuf_head; ; cpp = &cp->next)
224 {
225 cp = *cpp;
f1b9c7c1
KR
226 /* Entries are initialized to nil, and may be set to nil by
227 compile_pattern_1 if the pattern isn't valid. Don't apply
49a5f770
KR
228 string accessors in those cases. However, compile_pattern_1
229 is only applied to the cache entry we pick here to reuse. So
230 nil should never appear before a non-nil entry. */
7c752c80 231 if (NILP (cp->regexp))
f1b9c7c1 232 goto compile_it;
d5db4077 233 if (SCHARS (cp->regexp) == SCHARS (pattern)
cf69b13e 234 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
1d288aef 235 && !NILP (Fstring_equal (cp->regexp, pattern))
59fab369 236 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
5679531d 237 && cp->posix == posix
ecdb561e
RS
238 && cp->buf.multibyte == multibyte
239 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_whitespace_regexp)))
487282dc
KH
240 break;
241
f1b9c7c1
KR
242 /* If we're at the end of the cache, compile into the nil cell
243 we found, or the last (least recently used) cell with a
244 string value. */
487282dc
KH
245 if (cp->next == 0)
246 {
f1b9c7c1 247 compile_it:
5679531d 248 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
487282dc
KH
249 break;
250 }
251 }
252
253 /* When we get here, cp (aka *cpp) contains the compiled pattern,
254 either because we found it in the cache or because we just compiled it.
255 Move it to the front of the queue to mark it as most recently used. */
256 *cpp = cp->next;
257 cp->next = searchbuf_head;
258 searchbuf_head = cp;
1113d9db 259
6639708c
RS
260 /* Advise the searching functions about the space we have allocated
261 for register data. */
262 if (regp)
263 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
264
487282dc 265 return &cp->buf;
ca1d1d23
JB
266}
267
268/* Error condition used for failing searches */
269Lisp_Object Qsearch_failed;
270
271Lisp_Object
272signal_failure (arg)
273 Lisp_Object arg;
274{
275 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
276 return Qnil;
277}
278\f
b819a390
RS
279static Lisp_Object
280looking_at_1 (string, posix)
ca1d1d23 281 Lisp_Object string;
b819a390 282 int posix;
ca1d1d23
JB
283{
284 Lisp_Object val;
285 unsigned char *p1, *p2;
286 int s1, s2;
287 register int i;
487282dc 288 struct re_pattern_buffer *bufp;
ca1d1d23 289
7074fde6
FP
290 if (running_asynch_code)
291 save_search_regs ();
292
b7826503 293 CHECK_STRING (string);
487282dc
KH
294 bufp = compile_pattern (string, &search_regs,
295 (!NILP (current_buffer->case_fold_search)
facdc750 296 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
297 posix,
298 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23
JB
299
300 immediate_quit = 1;
301 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
302
303 /* Get pointers and sizes of the two strings
304 that make up the visible portion of the buffer. */
305
306 p1 = BEGV_ADDR;
fa8ed3e0 307 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 308 p2 = GAP_END_ADDR;
fa8ed3e0 309 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
310 if (s1 < 0)
311 {
312 p2 = p1;
fa8ed3e0 313 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
314 s1 = 0;
315 }
316 if (s2 < 0)
317 {
fa8ed3e0 318 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
319 s2 = 0;
320 }
8bb43c28
RS
321
322 re_match_object = Qnil;
177c0ea7 323
487282dc 324 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
fa8ed3e0
RS
325 PT_BYTE - BEGV_BYTE, &search_regs,
326 ZV_BYTE - BEGV_BYTE);
de182d70 327 immediate_quit = 0;
177c0ea7 328
ca1d1d23
JB
329 if (i == -2)
330 matcher_overflow ();
331
332 val = (0 <= i ? Qt : Qnil);
fa8ed3e0
RS
333 if (i >= 0)
334 for (i = 0; i < search_regs.num_regs; i++)
335 if (search_regs.start[i] >= 0)
336 {
337 search_regs.start[i]
338 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
339 search_regs.end[i]
340 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
341 }
a3668d92 342 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
343 return val;
344}
345
b819a390 346DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
8c1a1077
PJ
347 doc: /* Return t if text after point matches regular expression REGEXP.
348This function modifies the match data that `match-beginning',
349`match-end' and `match-data' access; save and restore the match
350data if you want to preserve them. */)
351 (regexp)
94f94972 352 Lisp_Object regexp;
b819a390 353{
94f94972 354 return looking_at_1 (regexp, 0);
b819a390
RS
355}
356
357DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
8c1a1077
PJ
358 doc: /* Return t if text after point matches regular expression REGEXP.
359Find the longest match, in accord with Posix regular expression rules.
360This function modifies the match data that `match-beginning',
361`match-end' and `match-data' access; save and restore the match
362data if you want to preserve them. */)
363 (regexp)
94f94972 364 Lisp_Object regexp;
b819a390 365{
94f94972 366 return looking_at_1 (regexp, 1);
b819a390
RS
367}
368\f
369static Lisp_Object
370string_match_1 (regexp, string, start, posix)
ca1d1d23 371 Lisp_Object regexp, string, start;
b819a390 372 int posix;
ca1d1d23
JB
373{
374 int val;
487282dc 375 struct re_pattern_buffer *bufp;
0c8533c6
RS
376 int pos, pos_byte;
377 int i;
ca1d1d23 378
7074fde6
FP
379 if (running_asynch_code)
380 save_search_regs ();
381
b7826503
PJ
382 CHECK_STRING (regexp);
383 CHECK_STRING (string);
ca1d1d23
JB
384
385 if (NILP (start))
0c8533c6 386 pos = 0, pos_byte = 0;
ca1d1d23
JB
387 else
388 {
d5db4077 389 int len = SCHARS (string);
ca1d1d23 390
b7826503 391 CHECK_NUMBER (start);
0c8533c6
RS
392 pos = XINT (start);
393 if (pos < 0 && -pos <= len)
394 pos = len + pos;
395 else if (0 > pos || pos > len)
ca1d1d23 396 args_out_of_range (string, start);
0c8533c6 397 pos_byte = string_char_to_byte (string, pos);
ca1d1d23
JB
398 }
399
487282dc
KH
400 bufp = compile_pattern (regexp, &search_regs,
401 (!NILP (current_buffer->case_fold_search)
facdc750 402 ? DOWNCASE_TABLE : Qnil),
0c8533c6
RS
403 posix,
404 STRING_MULTIBYTE (string));
ca1d1d23 405 immediate_quit = 1;
8bb43c28 406 re_match_object = string;
177c0ea7 407
d5db4077
KR
408 val = re_search (bufp, (char *) SDATA (string),
409 SBYTES (string), pos_byte,
410 SBYTES (string) - pos_byte,
ca1d1d23
JB
411 &search_regs);
412 immediate_quit = 0;
daa37602 413 last_thing_searched = Qt;
ca1d1d23
JB
414 if (val == -2)
415 matcher_overflow ();
416 if (val < 0) return Qnil;
0c8533c6
RS
417
418 for (i = 0; i < search_regs.num_regs; i++)
419 if (search_regs.start[i] >= 0)
420 {
421 search_regs.start[i]
422 = string_byte_to_char (string, search_regs.start[i]);
423 search_regs.end[i]
424 = string_byte_to_char (string, search_regs.end[i]);
425 }
426
427 return make_number (string_byte_to_char (string, val));
ca1d1d23 428}
e59a8453 429
b819a390 430DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
8c1a1077
PJ
431 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
432Case is ignored if `case-fold-search' is non-nil in the current buffer.
433If third arg START is non-nil, start search at that index in STRING.
434For index of first char beyond the match, do (match-end 0).
435`match-end' and `match-beginning' also give indices of substrings
2bd2f32d
RS
436matched by parenthesis constructs in the pattern.
437
438You can use the function `match-string' to extract the substrings
439matched by the parenthesis constructions in REGEXP. */)
8c1a1077 440 (regexp, string, start)
b819a390
RS
441 Lisp_Object regexp, string, start;
442{
443 return string_match_1 (regexp, string, start, 0);
444}
445
446DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
8c1a1077
PJ
447 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
448Find the longest match, in accord with Posix regular expression rules.
449Case is ignored if `case-fold-search' is non-nil in the current buffer.
450If third arg START is non-nil, start search at that index in STRING.
451For index of first char beyond the match, do (match-end 0).
452`match-end' and `match-beginning' also give indices of substrings
453matched by parenthesis constructs in the pattern. */)
454 (regexp, string, start)
b819a390
RS
455 Lisp_Object regexp, string, start;
456{
457 return string_match_1 (regexp, string, start, 1);
458}
459
e59a8453
RS
460/* Match REGEXP against STRING, searching all of STRING,
461 and return the index of the match, or negative on failure.
462 This does not clobber the match data. */
463
464int
465fast_string_match (regexp, string)
466 Lisp_Object regexp, string;
467{
468 int val;
487282dc 469 struct re_pattern_buffer *bufp;
e59a8453 470
facdc750
RS
471 bufp = compile_pattern (regexp, 0, Qnil,
472 0, STRING_MULTIBYTE (string));
e59a8453 473 immediate_quit = 1;
8bb43c28 474 re_match_object = string;
177c0ea7 475
d5db4077
KR
476 val = re_search (bufp, (char *) SDATA (string),
477 SBYTES (string), 0,
478 SBYTES (string), 0);
e59a8453
RS
479 immediate_quit = 0;
480 return val;
481}
5679531d
KH
482
483/* Match REGEXP against STRING, searching all of STRING ignoring case,
484 and return the index of the match, or negative on failure.
0c8533c6
RS
485 This does not clobber the match data.
486 We assume that STRING contains single-byte characters. */
5679531d
KH
487
488extern Lisp_Object Vascii_downcase_table;
489
490int
b4577c63 491fast_c_string_match_ignore_case (regexp, string)
5679531d 492 Lisp_Object regexp;
96b80561 493 const char *string;
5679531d
KH
494{
495 int val;
496 struct re_pattern_buffer *bufp;
497 int len = strlen (string);
498
0c8533c6 499 regexp = string_make_unibyte (regexp);
b4577c63 500 re_match_object = Qt;
5679531d 501 bufp = compile_pattern (regexp, 0,
facdc750 502 Vascii_downcase_table, 0,
f8bd51c4 503 0);
5679531d
KH
504 immediate_quit = 1;
505 val = re_search (bufp, string, len, 0, len, 0);
506 immediate_quit = 0;
507 return val;
508}
be5f4dfb
KH
509
510/* Like fast_string_match but ignore case. */
511
512int
513fast_string_match_ignore_case (regexp, string)
514 Lisp_Object regexp, string;
515{
516 int val;
517 struct re_pattern_buffer *bufp;
518
519 bufp = compile_pattern (regexp, 0, Vascii_downcase_table,
520 0, STRING_MULTIBYTE (string));
521 immediate_quit = 1;
522 re_match_object = string;
523
524 val = re_search (bufp, (char *) SDATA (string),
525 SBYTES (string), 0,
526 SBYTES (string), 0);
527 immediate_quit = 0;
528 return val;
529}
ca1d1d23 530\f
9169c321
JB
531/* The newline cache: remembering which sections of text have no newlines. */
532
533/* If the user has requested newline caching, make sure it's on.
534 Otherwise, make sure it's off.
535 This is our cheezy way of associating an action with the change of
536 state of a buffer-local variable. */
537static void
538newline_cache_on_off (buf)
539 struct buffer *buf;
540{
541 if (NILP (buf->cache_long_line_scans))
542 {
543 /* It should be off. */
544 if (buf->newline_cache)
545 {
546 free_region_cache (buf->newline_cache);
547 buf->newline_cache = 0;
548 }
549 }
550 else
551 {
552 /* It should be on. */
553 if (buf->newline_cache == 0)
554 buf->newline_cache = new_region_cache ();
555 }
556}
557
558\f
559/* Search for COUNT instances of the character TARGET between START and END.
560
561 If COUNT is positive, search forwards; END must be >= START.
562 If COUNT is negative, search backwards for the -COUNTth instance;
563 END must be <= START.
564 If COUNT is zero, do anything you please; run rogue, for all I care.
565
566 If END is zero, use BEGV or ZV instead, as appropriate for the
567 direction indicated by COUNT.
ffd56f97
JB
568
569 If we find COUNT instances, set *SHORTAGE to zero, and return the
a9f2a45f 570 position past the COUNTth match. Note that for reverse motion
5bfe95c9 571 this is not the same as the usual convention for Emacs motion commands.
ffd56f97 572
9169c321
JB
573 If we don't find COUNT instances before reaching END, set *SHORTAGE
574 to the number of TARGETs left unfound, and return END.
ffd56f97 575
087a5f81
RS
576 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
577 except when inside redisplay. */
578
dfcf069d 579int
9169c321
JB
580scan_buffer (target, start, end, count, shortage, allow_quit)
581 register int target;
582 int start, end;
583 int count;
584 int *shortage;
087a5f81 585 int allow_quit;
ca1d1d23 586{
9169c321 587 struct region_cache *newline_cache;
177c0ea7 588 int direction;
ffd56f97 589
9169c321
JB
590 if (count > 0)
591 {
592 direction = 1;
593 if (! end) end = ZV;
594 }
595 else
596 {
597 direction = -1;
598 if (! end) end = BEGV;
599 }
ffd56f97 600
9169c321
JB
601 newline_cache_on_off (current_buffer);
602 newline_cache = current_buffer->newline_cache;
ca1d1d23
JB
603
604 if (shortage != 0)
605 *shortage = 0;
606
087a5f81 607 immediate_quit = allow_quit;
ca1d1d23 608
ffd56f97 609 if (count > 0)
9169c321 610 while (start != end)
ca1d1d23 611 {
9169c321
JB
612 /* Our innermost scanning loop is very simple; it doesn't know
613 about gaps, buffer ends, or the newline cache. ceiling is
614 the position of the last character before the next such
615 obstacle --- the last character the dumb search loop should
616 examine. */
fa8ed3e0
RS
617 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
618 int start_byte = CHAR_TO_BYTE (start);
67ce527d 619 int tem;
9169c321
JB
620
621 /* If we're looking for a newline, consult the newline cache
622 to see where we can avoid some scanning. */
623 if (target == '\n' && newline_cache)
624 {
625 int next_change;
626 immediate_quit = 0;
627 while (region_cache_forward
fa8ed3e0
RS
628 (current_buffer, newline_cache, start_byte, &next_change))
629 start_byte = next_change;
cbe0db0d 630 immediate_quit = allow_quit;
9169c321 631
fa8ed3e0
RS
632 /* START should never be after END. */
633 if (start_byte > ceiling_byte)
634 start_byte = ceiling_byte;
9169c321
JB
635
636 /* Now the text after start is an unknown region, and
637 next_change is the position of the next known region. */
fa8ed3e0 638 ceiling_byte = min (next_change - 1, ceiling_byte);
9169c321
JB
639 }
640
641 /* The dumb loop can only scan text stored in contiguous
642 bytes. BUFFER_CEILING_OF returns the last character
643 position that is contiguous, so the ceiling is the
644 position after that. */
67ce527d
KH
645 tem = BUFFER_CEILING_OF (start_byte);
646 ceiling_byte = min (tem, ceiling_byte);
9169c321
JB
647
648 {
177c0ea7 649 /* The termination address of the dumb loop. */
fa8ed3e0
RS
650 register unsigned char *ceiling_addr
651 = BYTE_POS_ADDR (ceiling_byte) + 1;
652 register unsigned char *cursor
653 = BYTE_POS_ADDR (start_byte);
9169c321
JB
654 unsigned char *base = cursor;
655
656 while (cursor < ceiling_addr)
657 {
658 unsigned char *scan_start = cursor;
659
660 /* The dumb loop. */
661 while (*cursor != target && ++cursor < ceiling_addr)
662 ;
663
664 /* If we're looking for newlines, cache the fact that
665 the region from start to cursor is free of them. */
666 if (target == '\n' && newline_cache)
667 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
668 start_byte + scan_start - base,
669 start_byte + cursor - base);
9169c321
JB
670
671 /* Did we find the target character? */
672 if (cursor < ceiling_addr)
673 {
674 if (--count == 0)
675 {
676 immediate_quit = 0;
fa8ed3e0 677 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
9169c321
JB
678 }
679 cursor++;
680 }
681 }
682
fa8ed3e0 683 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321 684 }
ca1d1d23
JB
685 }
686 else
9169c321
JB
687 while (start > end)
688 {
689 /* The last character to check before the next obstacle. */
fa8ed3e0
RS
690 int ceiling_byte = CHAR_TO_BYTE (end);
691 int start_byte = CHAR_TO_BYTE (start);
67ce527d 692 int tem;
9169c321
JB
693
694 /* Consult the newline cache, if appropriate. */
695 if (target == '\n' && newline_cache)
696 {
697 int next_change;
698 immediate_quit = 0;
699 while (region_cache_backward
fa8ed3e0
RS
700 (current_buffer, newline_cache, start_byte, &next_change))
701 start_byte = next_change;
cbe0db0d 702 immediate_quit = allow_quit;
9169c321
JB
703
704 /* Start should never be at or before end. */
fa8ed3e0
RS
705 if (start_byte <= ceiling_byte)
706 start_byte = ceiling_byte + 1;
9169c321
JB
707
708 /* Now the text before start is an unknown region, and
709 next_change is the position of the next known region. */
fa8ed3e0 710 ceiling_byte = max (next_change, ceiling_byte);
9169c321
JB
711 }
712
713 /* Stop scanning before the gap. */
67ce527d
KH
714 tem = BUFFER_FLOOR_OF (start_byte - 1);
715 ceiling_byte = max (tem, ceiling_byte);
9169c321
JB
716
717 {
718 /* The termination address of the dumb loop. */
fa8ed3e0
RS
719 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
720 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
9169c321
JB
721 unsigned char *base = cursor;
722
723 while (cursor >= ceiling_addr)
724 {
725 unsigned char *scan_start = cursor;
726
727 while (*cursor != target && --cursor >= ceiling_addr)
728 ;
729
730 /* If we're looking for newlines, cache the fact that
731 the region from after the cursor to start is free of them. */
732 if (target == '\n' && newline_cache)
733 know_region_cache (current_buffer, newline_cache,
fa8ed3e0
RS
734 start_byte + cursor - base,
735 start_byte + scan_start - base);
9169c321
JB
736
737 /* Did we find the target character? */
738 if (cursor >= ceiling_addr)
739 {
740 if (++count >= 0)
741 {
742 immediate_quit = 0;
fa8ed3e0 743 return BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
744 }
745 cursor--;
746 }
747 }
748
fa8ed3e0 749 start = BYTE_TO_CHAR (start_byte + cursor - base);
9169c321
JB
750 }
751 }
752
ca1d1d23
JB
753 immediate_quit = 0;
754 if (shortage != 0)
ffd56f97 755 *shortage = count * direction;
9169c321 756 return start;
ca1d1d23 757}
fa8ed3e0
RS
758\f
759/* Search for COUNT instances of a line boundary, which means either a
760 newline or (if selective display enabled) a carriage return.
761 Start at START. If COUNT is negative, search backwards.
762
763 We report the resulting position by calling TEMP_SET_PT_BOTH.
764
765 If we find COUNT instances. we position after (always after,
766 even if scanning backwards) the COUNTth match, and return 0.
767
768 If we don't find COUNT instances before reaching the end of the
769 buffer (or the beginning, if scanning backwards), we return
770 the number of line boundaries left unfound, and position at
771 the limit we bumped up against.
772
773 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
d5d57b92 774 except in special cases. */
ca1d1d23 775
63fa018d 776int
fa8ed3e0
RS
777scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
778 int start, start_byte;
779 int limit, limit_byte;
780 register int count;
781 int allow_quit;
63fa018d 782{
fa8ed3e0
RS
783 int direction = ((count > 0) ? 1 : -1);
784
785 register unsigned char *cursor;
786 unsigned char *base;
787
788 register int ceiling;
789 register unsigned char *ceiling_addr;
790
d5d57b92
RS
791 int old_immediate_quit = immediate_quit;
792
fa8ed3e0
RS
793 /* The code that follows is like scan_buffer
794 but checks for either newline or carriage return. */
795
d5d57b92
RS
796 if (allow_quit)
797 immediate_quit++;
fa8ed3e0
RS
798
799 start_byte = CHAR_TO_BYTE (start);
800
801 if (count > 0)
802 {
803 while (start_byte < limit_byte)
804 {
805 ceiling = BUFFER_CEILING_OF (start_byte);
806 ceiling = min (limit_byte - 1, ceiling);
807 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
808 base = (cursor = BYTE_POS_ADDR (start_byte));
809 while (1)
810 {
811 while (*cursor != '\n' && ++cursor != ceiling_addr)
812 ;
813
814 if (cursor != ceiling_addr)
815 {
816 if (--count == 0)
817 {
d5d57b92 818 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
819 start_byte = start_byte + cursor - base + 1;
820 start = BYTE_TO_CHAR (start_byte);
821 TEMP_SET_PT_BOTH (start, start_byte);
822 return 0;
823 }
824 else
825 if (++cursor == ceiling_addr)
826 break;
827 }
828 else
829 break;
830 }
831 start_byte += cursor - base;
832 }
833 }
834 else
835 {
fa8ed3e0
RS
836 while (start_byte > limit_byte)
837 {
838 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
839 ceiling = max (limit_byte, ceiling);
840 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
841 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
842 while (1)
843 {
844 while (--cursor != ceiling_addr && *cursor != '\n')
845 ;
846
847 if (cursor != ceiling_addr)
848 {
849 if (++count == 0)
850 {
d5d57b92 851 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
852 /* Return the position AFTER the match we found. */
853 start_byte = start_byte + cursor - base + 1;
854 start = BYTE_TO_CHAR (start_byte);
855 TEMP_SET_PT_BOTH (start, start_byte);
856 return 0;
857 }
858 }
859 else
860 break;
861 }
862 /* Here we add 1 to compensate for the last decrement
863 of CURSOR, which took it past the valid range. */
864 start_byte += cursor - base + 1;
865 }
866 }
867
868 TEMP_SET_PT_BOTH (limit, limit_byte);
d5d57b92 869 immediate_quit = old_immediate_quit;
fa8ed3e0
RS
870
871 return count * direction;
63fa018d
RS
872}
873
ca1d1d23 874int
fa8ed3e0 875find_next_newline_no_quit (from, cnt)
ca1d1d23
JB
876 register int from, cnt;
877{
fa8ed3e0 878 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
9169c321
JB
879}
880
9169c321
JB
881/* Like find_next_newline, but returns position before the newline,
882 not after, and only search up to TO. This isn't just
883 find_next_newline (...)-1, because you might hit TO. */
fa8ed3e0 884
9169c321
JB
885int
886find_before_next_newline (from, to, cnt)
cbe0db0d 887 int from, to, cnt;
9169c321
JB
888{
889 int shortage;
890 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
891
892 if (shortage == 0)
893 pos--;
177c0ea7 894
9169c321 895 return pos;
ca1d1d23
JB
896}
897\f
ca1d1d23
JB
898/* Subroutines of Lisp buffer search functions. */
899
900static Lisp_Object
b819a390 901search_command (string, bound, noerror, count, direction, RE, posix)
ca1d1d23
JB
902 Lisp_Object string, bound, noerror, count;
903 int direction;
904 int RE;
b819a390 905 int posix;
ca1d1d23
JB
906{
907 register int np;
9f43ad85 908 int lim, lim_byte;
ca1d1d23
JB
909 int n = direction;
910
911 if (!NILP (count))
912 {
b7826503 913 CHECK_NUMBER (count);
ca1d1d23
JB
914 n *= XINT (count);
915 }
916
b7826503 917 CHECK_STRING (string);
ca1d1d23 918 if (NILP (bound))
9f43ad85
RS
919 {
920 if (n > 0)
921 lim = ZV, lim_byte = ZV_BYTE;
922 else
923 lim = BEGV, lim_byte = BEGV_BYTE;
924 }
ca1d1d23
JB
925 else
926 {
b7826503 927 CHECK_NUMBER_COERCE_MARKER (bound);
ca1d1d23 928 lim = XINT (bound);
6ec8bbd2 929 if (n > 0 ? lim < PT : lim > PT)
ca1d1d23
JB
930 error ("Invalid search bound (wrong side of point)");
931 if (lim > ZV)
9f43ad85 932 lim = ZV, lim_byte = ZV_BYTE;
588d2fd5 933 else if (lim < BEGV)
9f43ad85 934 lim = BEGV, lim_byte = BEGV_BYTE;
588d2fd5
KH
935 else
936 lim_byte = CHAR_TO_BYTE (lim);
ca1d1d23
JB
937 }
938
9f43ad85 939 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
ca1d1d23 940 (!NILP (current_buffer->case_fold_search)
facdc750 941 ? current_buffer->case_canon_table
3135e9fd 942 : Qnil),
ca1d1d23 943 (!NILP (current_buffer->case_fold_search)
facdc750 944 ? current_buffer->case_eqv_table
3135e9fd 945 : Qnil),
b819a390 946 posix);
ca1d1d23
JB
947 if (np <= 0)
948 {
949 if (NILP (noerror))
950 return signal_failure (string);
951 if (!EQ (noerror, Qt))
952 {
953 if (lim < BEGV || lim > ZV)
954 abort ();
9f43ad85 955 SET_PT_BOTH (lim, lim_byte);
a5f217b8
RS
956 return Qnil;
957#if 0 /* This would be clean, but maybe programs depend on
958 a value of nil here. */
481399bf 959 np = lim;
a5f217b8 960#endif
ca1d1d23 961 }
481399bf
RS
962 else
963 return Qnil;
ca1d1d23
JB
964 }
965
966 if (np < BEGV || np > ZV)
967 abort ();
968
969 SET_PT (np);
970
971 return make_number (np);
972}
973\f
fa8ed3e0
RS
974/* Return 1 if REGEXP it matches just one constant string. */
975
b6d6a51c
KH
976static int
977trivial_regexp_p (regexp)
978 Lisp_Object regexp;
979{
d5db4077
KR
980 int len = SBYTES (regexp);
981 unsigned char *s = SDATA (regexp);
b6d6a51c
KH
982 while (--len >= 0)
983 {
984 switch (*s++)
985 {
986 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
987 return 0;
988 case '\\':
989 if (--len < 0)
990 return 0;
991 switch (*s++)
992 {
993 case '|': case '(': case ')': case '`': case '\'': case 'b':
994 case 'B': case '<': case '>': case 'w': case 'W': case 's':
29f89fe7 995 case 'S': case '=': case '{': case '}': case '_':
5679531d 996 case 'c': case 'C': /* for categoryspec and notcategoryspec */
866f60fd 997 case '1': case '2': case '3': case '4': case '5':
b6d6a51c
KH
998 case '6': case '7': case '8': case '9':
999 return 0;
1000 }
1001 }
1002 }
1003 return 1;
1004}
1005
ca325161 1006/* Search for the n'th occurrence of STRING in the current buffer,
ca1d1d23 1007 starting at position POS and stopping at position LIM,
b819a390 1008 treating STRING as a literal string if RE is false or as
ca1d1d23
JB
1009 a regular expression if RE is true.
1010
1011 If N is positive, searching is forward and LIM must be greater than POS.
1012 If N is negative, searching is backward and LIM must be less than POS.
1013
facdc750 1014 Returns -x if x occurrences remain to be found (x > 0),
ca1d1d23 1015 or else the position at the beginning of the Nth occurrence
b819a390
RS
1016 (if searching backward) or the end (if searching forward).
1017
1018 POSIX is nonzero if we want full backtracking (POSIX style)
1019 for this pattern. 0 means backtrack only enough to get a valid match. */
ca1d1d23 1020
aff2ce94
RS
1021#define TRANSLATE(out, trt, d) \
1022do \
1023 { \
1024 if (! NILP (trt)) \
1025 { \
1026 Lisp_Object temp; \
1027 temp = Faref (trt, make_number (d)); \
1028 if (INTEGERP (temp)) \
1029 out = XINT (temp); \
1030 else \
1031 out = d; \
1032 } \
1033 else \
1034 out = d; \
1035 } \
1036while (0)
facdc750 1037
b819a390 1038static int
9f43ad85
RS
1039search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1040 RE, trt, inverse_trt, posix)
ca1d1d23
JB
1041 Lisp_Object string;
1042 int pos;
9f43ad85 1043 int pos_byte;
ca1d1d23 1044 int lim;
9f43ad85 1045 int lim_byte;
ca1d1d23
JB
1046 int n;
1047 int RE;
facdc750
RS
1048 Lisp_Object trt;
1049 Lisp_Object inverse_trt;
b819a390 1050 int posix;
ca1d1d23 1051{
d5db4077
KR
1052 int len = SCHARS (string);
1053 int len_byte = SBYTES (string);
facdc750 1054 register int i;
ca1d1d23 1055
7074fde6
FP
1056 if (running_asynch_code)
1057 save_search_regs ();
1058
a7e4cdde 1059 /* Searching 0 times means don't move. */
ca1d1d23 1060 /* Null string is found at starting position. */
a7e4cdde 1061 if (len == 0 || n == 0)
ca325161 1062 {
0353b28f 1063 set_search_regs (pos_byte, 0);
ca325161
RS
1064 return pos;
1065 }
3f57a499 1066
f31a9a68 1067 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_whitespace_regexp)))
ca1d1d23 1068 {
facdc750
RS
1069 unsigned char *p1, *p2;
1070 int s1, s2;
487282dc
KH
1071 struct re_pattern_buffer *bufp;
1072
0c8533c6
RS
1073 bufp = compile_pattern (string, &search_regs, trt, posix,
1074 !NILP (current_buffer->enable_multibyte_characters));
ca1d1d23 1075
ca1d1d23
JB
1076 immediate_quit = 1; /* Quit immediately if user types ^G,
1077 because letting this function finish
1078 can take too long. */
1079 QUIT; /* Do a pending quit right away,
1080 to avoid paradoxical behavior */
1081 /* Get pointers and sizes of the two strings
1082 that make up the visible portion of the buffer. */
1083
1084 p1 = BEGV_ADDR;
fa8ed3e0 1085 s1 = GPT_BYTE - BEGV_BYTE;
ca1d1d23 1086 p2 = GAP_END_ADDR;
fa8ed3e0 1087 s2 = ZV_BYTE - GPT_BYTE;
ca1d1d23
JB
1088 if (s1 < 0)
1089 {
1090 p2 = p1;
fa8ed3e0 1091 s2 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1092 s1 = 0;
1093 }
1094 if (s2 < 0)
1095 {
fa8ed3e0 1096 s1 = ZV_BYTE - BEGV_BYTE;
ca1d1d23
JB
1097 s2 = 0;
1098 }
8bb43c28 1099 re_match_object = Qnil;
177c0ea7 1100
ca1d1d23
JB
1101 while (n < 0)
1102 {
42db823b 1103 int val;
487282dc 1104 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1105 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106 &search_regs,
42db823b 1107 /* Don't allow match past current point */
4996330b 1108 pos_byte - BEGV_BYTE);
ca1d1d23 1109 if (val == -2)
b6d6a51c
KH
1110 {
1111 matcher_overflow ();
1112 }
ca1d1d23
JB
1113 if (val >= 0)
1114 {
26aff150 1115 pos_byte = search_regs.start[0] + BEGV_BYTE;
4746118a 1116 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1117 if (search_regs.start[i] >= 0)
1118 {
fa8ed3e0
RS
1119 search_regs.start[i]
1120 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1121 search_regs.end[i]
1122 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1123 }
a3668d92 1124 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1125 /* Set pos to the new position. */
1126 pos = search_regs.start[0];
1127 }
1128 else
1129 {
1130 immediate_quit = 0;
1131 return (n);
1132 }
1133 n++;
1134 }
1135 while (n > 0)
1136 {
42db823b 1137 int val;
487282dc 1138 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
4996330b
KH
1139 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1140 &search_regs,
1141 lim_byte - BEGV_BYTE);
ca1d1d23 1142 if (val == -2)
b6d6a51c
KH
1143 {
1144 matcher_overflow ();
1145 }
ca1d1d23
JB
1146 if (val >= 0)
1147 {
26aff150 1148 pos_byte = search_regs.end[0] + BEGV_BYTE;
4746118a 1149 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
1150 if (search_regs.start[i] >= 0)
1151 {
fa8ed3e0
RS
1152 search_regs.start[i]
1153 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1154 search_regs.end[i]
1155 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
ca1d1d23 1156 }
a3668d92 1157 XSETBUFFER (last_thing_searched, current_buffer);
ca1d1d23
JB
1158 pos = search_regs.end[0];
1159 }
1160 else
1161 {
1162 immediate_quit = 0;
1163 return (0 - n);
1164 }
1165 n--;
1166 }
1167 immediate_quit = 0;
1168 return (pos);
1169 }
1170 else /* non-RE case */
1171 {
facdc750
RS
1172 unsigned char *raw_pattern, *pat;
1173 int raw_pattern_size;
1174 int raw_pattern_size_byte;
1175 unsigned char *patbuf;
1176 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
d5db4077 1177 unsigned char *base_pat = SDATA (string);
facdc750 1178 int charset_base = -1;
040272ce 1179 int boyer_moore_ok = 1;
facdc750
RS
1180
1181 /* MULTIBYTE says whether the text to be searched is multibyte.
1182 We must convert PATTERN to match that, or we will not really
1183 find things right. */
1184
1185 if (multibyte == STRING_MULTIBYTE (string))
1186 {
d5db4077
KR
1187 raw_pattern = (unsigned char *) SDATA (string);
1188 raw_pattern_size = SCHARS (string);
1189 raw_pattern_size_byte = SBYTES (string);
facdc750
RS
1190 }
1191 else if (multibyte)
1192 {
d5db4077 1193 raw_pattern_size = SCHARS (string);
facdc750 1194 raw_pattern_size_byte
d5db4077 1195 = count_size_as_multibyte (SDATA (string),
facdc750 1196 raw_pattern_size);
7276d3d8 1197 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
d5db4077
KR
1198 copy_text (SDATA (string), raw_pattern,
1199 SCHARS (string), 0, 1);
facdc750
RS
1200 }
1201 else
1202 {
1203 /* Converting multibyte to single-byte.
1204
1205 ??? Perhaps this conversion should be done in a special way
1206 by subtracting nonascii-insert-offset from each non-ASCII char,
1207 so that only the multibyte chars which really correspond to
1208 the chosen single-byte character set can possibly match. */
d5db4077
KR
1209 raw_pattern_size = SCHARS (string);
1210 raw_pattern_size_byte = SCHARS (string);
7276d3d8 1211 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
d5db4077
KR
1212 copy_text (SDATA (string), raw_pattern,
1213 SBYTES (string), 1, 0);
facdc750
RS
1214 }
1215
1216 /* Copy and optionally translate the pattern. */
1217 len = raw_pattern_size;
1218 len_byte = raw_pattern_size_byte;
1219 patbuf = (unsigned char *) alloca (len_byte);
1220 pat = patbuf;
1221 base_pat = raw_pattern;
1222 if (multibyte)
1223 {
1224 while (--len >= 0)
1225 {
daaa6ed8 1226 unsigned char str[MAX_MULTIBYTE_LENGTH];
aff2ce94 1227 int c, translated, inverse;
facdc750
RS
1228 int in_charlen, charlen;
1229
1230 /* If we got here and the RE flag is set, it's because we're
1231 dealing with a regexp known to be trivial, so the backslash
1232 just quotes the next character. */
1233 if (RE && *base_pat == '\\')
1234 {
1235 len--;
1236 len_byte--;
1237 base_pat++;
1238 }
1239
1240 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
040272ce 1241
facdc750 1242 /* Translate the character, if requested. */
aff2ce94 1243 TRANSLATE (translated, trt, c);
facdc750
RS
1244 /* If translation changed the byte-length, go back
1245 to the original character. */
daaa6ed8 1246 charlen = CHAR_STRING (translated, str);
facdc750
RS
1247 if (in_charlen != charlen)
1248 {
1249 translated = c;
daaa6ed8 1250 charlen = CHAR_STRING (c, str);
facdc750
RS
1251 }
1252
5ffaf437
RS
1253 /* If we are searching for something strange,
1254 an invalid multibyte code, don't use boyer-moore. */
1255 if (! ASCII_BYTE_P (translated)
1256 && (charlen == 1 /* 8bit code */
1257 || charlen != in_charlen /* invalid multibyte code */
1258 ))
1259 boyer_moore_ok = 0;
1260
aff2ce94
RS
1261 TRANSLATE (inverse, inverse_trt, c);
1262
facdc750
RS
1263 /* Did this char actually get translated?
1264 Would any other char get translated into it? */
aff2ce94 1265 if (translated != c || inverse != c)
facdc750
RS
1266 {
1267 /* Keep track of which character set row
1268 contains the characters that need translation. */
5ffaf437 1269 int charset_base_code = c & ~CHAR_FIELD3_MASK;
d2ac725b
KH
1270 int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;
1271
1272 if (charset_base_code != inverse_charset_base)
1273 boyer_moore_ok = 0;
1274 else if (charset_base == -1)
facdc750
RS
1275 charset_base = charset_base_code;
1276 else if (charset_base != charset_base_code)
1277 /* If two different rows appear, needing translation,
1278 then we cannot use boyer_moore search. */
040272ce 1279 boyer_moore_ok = 0;
aff2ce94 1280 }
facdc750
RS
1281
1282 /* Store this character into the translated pattern. */
1283 bcopy (str, pat, charlen);
1284 pat += charlen;
1285 base_pat += in_charlen;
1286 len_byte -= in_charlen;
1287 }
1288 }
1289 else
1290 {
040272ce
KH
1291 /* Unibyte buffer. */
1292 charset_base = 0;
facdc750
RS
1293 while (--len >= 0)
1294 {
040272ce 1295 int c, translated;
facdc750
RS
1296
1297 /* If we got here and the RE flag is set, it's because we're
1298 dealing with a regexp known to be trivial, so the backslash
1299 just quotes the next character. */
1300 if (RE && *base_pat == '\\')
1301 {
1302 len--;
1303 base_pat++;
1304 }
1305 c = *base_pat++;
aff2ce94 1306 TRANSLATE (translated, trt, c);
facdc750
RS
1307 *pat++ = translated;
1308 }
1309 }
1310
1311 len_byte = pat - patbuf;
1312 len = raw_pattern_size;
1313 pat = base_pat = patbuf;
1314
040272ce 1315 if (boyer_moore_ok)
facdc750 1316 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
aff2ce94
RS
1317 pos, pos_byte, lim, lim_byte,
1318 charset_base);
facdc750
RS
1319 else
1320 return simple_search (n, pat, len, len_byte, trt,
1321 pos, pos_byte, lim, lim_byte);
1322 }
1323}
1324\f
1325/* Do a simple string search N times for the string PAT,
1326 whose length is LEN/LEN_BYTE,
1327 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1328 TRT is the translation table.
f8bd51c4 1329
facdc750
RS
1330 Return the character position where the match is found.
1331 Otherwise, if M matches remained to be found, return -M.
f8bd51c4 1332
facdc750
RS
1333 This kind of search works regardless of what is in PAT and
1334 regardless of what is in TRT. It is used in cases where
1335 boyer_moore cannot work. */
1336
1337static int
1338simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1339 int n;
1340 unsigned char *pat;
1341 int len, len_byte;
1342 Lisp_Object trt;
1343 int pos, pos_byte;
1344 int lim, lim_byte;
1345{
1346 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
ab228c24 1347 int forward = n > 0;
facdc750
RS
1348
1349 if (lim > pos && multibyte)
1350 while (n > 0)
1351 {
1352 while (1)
f8bd51c4 1353 {
facdc750
RS
1354 /* Try matching at position POS. */
1355 int this_pos = pos;
1356 int this_pos_byte = pos_byte;
1357 int this_len = len;
1358 int this_len_byte = len_byte;
1359 unsigned char *p = pat;
1360 if (pos + len > lim)
1361 goto stop;
1362
1363 while (this_len > 0)
1364 {
1365 int charlen, buf_charlen;
ab228c24 1366 int pat_ch, buf_ch;
facdc750 1367
ab228c24 1368 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1369 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1370 ZV_BYTE - this_pos_byte,
1371 buf_charlen);
aff2ce94 1372 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1373
1374 if (buf_ch != pat_ch)
1375 break;
ab228c24
RS
1376
1377 this_len_byte -= charlen;
1378 this_len--;
1379 p += charlen;
1380
1381 this_pos_byte += buf_charlen;
1382 this_pos++;
facdc750
RS
1383 }
1384
1385 if (this_len == 0)
1386 {
1387 pos += len;
1388 pos_byte += len_byte;
1389 break;
1390 }
1391
1392 INC_BOTH (pos, pos_byte);
f8bd51c4 1393 }
facdc750
RS
1394
1395 n--;
1396 }
1397 else if (lim > pos)
1398 while (n > 0)
1399 {
1400 while (1)
f8bd51c4 1401 {
facdc750
RS
1402 /* Try matching at position POS. */
1403 int this_pos = pos;
1404 int this_len = len;
1405 unsigned char *p = pat;
1406
1407 if (pos + len > lim)
1408 goto stop;
1409
1410 while (this_len > 0)
1411 {
1412 int pat_ch = *p++;
1413 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1414 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1415
1416 if (buf_ch != pat_ch)
1417 break;
ab228c24
RS
1418
1419 this_len--;
1420 this_pos++;
facdc750
RS
1421 }
1422
1423 if (this_len == 0)
1424 {
1425 pos += len;
1426 break;
1427 }
1428
1429 pos++;
f8bd51c4 1430 }
facdc750
RS
1431
1432 n--;
1433 }
1434 /* Backwards search. */
1435 else if (lim < pos && multibyte)
1436 while (n < 0)
1437 {
1438 while (1)
f8bd51c4 1439 {
facdc750
RS
1440 /* Try matching at position POS. */
1441 int this_pos = pos - len;
1442 int this_pos_byte = pos_byte - len_byte;
1443 int this_len = len;
1444 int this_len_byte = len_byte;
1445 unsigned char *p = pat;
1446
1447 if (pos - len < lim)
1448 goto stop;
1449
1450 while (this_len > 0)
1451 {
1452 int charlen, buf_charlen;
ab228c24 1453 int pat_ch, buf_ch;
facdc750 1454
ab228c24 1455 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
facdc750
RS
1456 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1457 ZV_BYTE - this_pos_byte,
1458 buf_charlen);
aff2ce94 1459 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1460
1461 if (buf_ch != pat_ch)
1462 break;
ab228c24
RS
1463
1464 this_len_byte -= charlen;
1465 this_len--;
1466 p += charlen;
1467 this_pos_byte += buf_charlen;
1468 this_pos++;
facdc750
RS
1469 }
1470
1471 if (this_len == 0)
1472 {
1473 pos -= len;
1474 pos_byte -= len_byte;
1475 break;
1476 }
1477
1478 DEC_BOTH (pos, pos_byte);
f8bd51c4
KH
1479 }
1480
facdc750
RS
1481 n++;
1482 }
1483 else if (lim < pos)
1484 while (n < 0)
1485 {
1486 while (1)
b6d6a51c 1487 {
facdc750
RS
1488 /* Try matching at position POS. */
1489 int this_pos = pos - len;
1490 int this_len = len;
1491 unsigned char *p = pat;
1492
1493 if (pos - len < lim)
1494 goto stop;
1495
1496 while (this_len > 0)
1497 {
1498 int pat_ch = *p++;
1499 int buf_ch = FETCH_BYTE (this_pos);
aff2ce94 1500 TRANSLATE (buf_ch, trt, buf_ch);
facdc750
RS
1501
1502 if (buf_ch != pat_ch)
1503 break;
ab228c24
RS
1504 this_len--;
1505 this_pos++;
facdc750
RS
1506 }
1507
1508 if (this_len == 0)
b6d6a51c 1509 {
facdc750
RS
1510 pos -= len;
1511 break;
b6d6a51c 1512 }
facdc750
RS
1513
1514 pos--;
b6d6a51c 1515 }
facdc750
RS
1516
1517 n++;
b6d6a51c 1518 }
facdc750
RS
1519
1520 stop:
1521 if (n == 0)
aff2ce94 1522 {
ab228c24
RS
1523 if (forward)
1524 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1525 else
1526 set_search_regs (multibyte ? pos_byte : pos, len_byte);
aff2ce94
RS
1527
1528 return pos;
1529 }
facdc750
RS
1530 else if (n > 0)
1531 return -n;
1532 else
1533 return n;
1534}
1535\f
1536/* Do Boyer-Moore search N times for the string PAT,
1537 whose length is LEN/LEN_BYTE,
1538 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1539 DIRECTION says which direction we search in.
1540 TRT and INVERSE_TRT are translation tables.
1541
1542 This kind of search works if all the characters in PAT that have
1543 nontrivial translation are the same aside from the last byte. This
1544 makes it possible to translate just the last byte of a character,
1545 and do so after just a simple test of the context.
1546
1547 If that criterion is not satisfied, do not call this function. */
1548
1549static int
1550boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
aff2ce94 1551 pos, pos_byte, lim, lim_byte, charset_base)
facdc750
RS
1552 int n;
1553 unsigned char *base_pat;
1554 int len, len_byte;
1555 Lisp_Object trt;
1556 Lisp_Object inverse_trt;
1557 int pos, pos_byte;
1558 int lim, lim_byte;
aff2ce94 1559 int charset_base;
facdc750
RS
1560{
1561 int direction = ((n > 0) ? 1 : -1);
1562 register int dirlen;
a968f437 1563 int infinity, limit, stride_for_teases = 0;
facdc750
RS
1564 register int *BM_tab;
1565 int *BM_tab_base;
177c0ea7 1566 register unsigned char *cursor, *p_limit;
facdc750 1567 register int i, j;
cb6792d2 1568 unsigned char *pat, *pat_end;
facdc750
RS
1569 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1570
1571 unsigned char simple_translate[0400];
6bbd7a29
GM
1572 int translate_prev_byte = 0;
1573 int translate_anteprev_byte = 0;
facdc750
RS
1574
1575#ifdef C_ALLOCA
1576 int BM_tab_space[0400];
1577 BM_tab = &BM_tab_space[0];
1578#else
1579 BM_tab = (int *) alloca (0400 * sizeof (int));
1580#endif
1581 /* The general approach is that we are going to maintain that we know */
1582 /* the first (closest to the present position, in whatever direction */
1583 /* we're searching) character that could possibly be the last */
1584 /* (furthest from present position) character of a valid match. We */
1585 /* advance the state of our knowledge by looking at that character */
1586 /* and seeing whether it indeed matches the last character of the */
1587 /* pattern. If it does, we take a closer look. If it does not, we */
1588 /* move our pointer (to putative last characters) as far as is */
1589 /* logically possible. This amount of movement, which I call a */
1590 /* stride, will be the length of the pattern if the actual character */
1591 /* appears nowhere in the pattern, otherwise it will be the distance */
1592 /* from the last occurrence of that character to the end of the */
1593 /* pattern. */
1594 /* As a coding trick, an enormous stride is coded into the table for */
1595 /* characters that match the last character. This allows use of only */
1596 /* a single test, a test for having gone past the end of the */
1597 /* permissible match region, to test for both possible matches (when */
1598 /* the stride goes past the end immediately) and failure to */
177c0ea7 1599 /* match (where you get nudged past the end one stride at a time). */
facdc750
RS
1600
1601 /* Here we make a "mickey mouse" BM table. The stride of the search */
1602 /* is determined only by the last character of the putative match. */
1603 /* If that character does not match, we will stride the proper */
1604 /* distance to propose a match that superimposes it on the last */
1605 /* instance of a character that matches it (per trt), or misses */
177c0ea7 1606 /* it entirely if there is none. */
facdc750
RS
1607
1608 dirlen = len_byte * direction;
1609 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
cb6792d2
RS
1610
1611 /* Record position after the end of the pattern. */
1612 pat_end = base_pat + len_byte;
1613 /* BASE_PAT points to a character that we start scanning from.
1614 It is the first character in a forward search,
1615 the last character in a backward search. */
facdc750 1616 if (direction < 0)
cb6792d2
RS
1617 base_pat = pat_end - 1;
1618
facdc750
RS
1619 BM_tab_base = BM_tab;
1620 BM_tab += 0400;
1621 j = dirlen; /* to get it in a register */
1622 /* A character that does not appear in the pattern induces a */
1623 /* stride equal to the pattern length. */
1624 while (BM_tab_base != BM_tab)
1625 {
1626 *--BM_tab = j;
1627 *--BM_tab = j;
1628 *--BM_tab = j;
1629 *--BM_tab = j;
1630 }
1631
1632 /* We use this for translation, instead of TRT itself.
1633 We fill this in to handle the characters that actually
1634 occur in the pattern. Others don't matter anyway! */
1635 bzero (simple_translate, sizeof simple_translate);
1636 for (i = 0; i < 0400; i++)
1637 simple_translate[i] = i;
1638
1639 i = 0;
1640 while (i != infinity)
1641 {
cb6792d2 1642 unsigned char *ptr = base_pat + i;
facdc750
RS
1643 i += direction;
1644 if (i == dirlen)
1645 i = infinity;
1646 if (! NILP (trt))
ca1d1d23 1647 {
facdc750 1648 int ch;
aff2ce94 1649 int untranslated;
facdc750
RS
1650 int this_translated = 1;
1651
1652 if (multibyte
cb6792d2
RS
1653 /* Is *PTR the last byte of a character? */
1654 && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
ca1d1d23 1655 {
facdc750
RS
1656 unsigned char *charstart = ptr;
1657 while (! CHAR_HEAD_P (*charstart))
1658 charstart--;
aff2ce94 1659 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
6397418a 1660 if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
facdc750 1661 {
ab228c24 1662 TRANSLATE (ch, trt, untranslated);
aff2ce94
RS
1663 if (! CHAR_HEAD_P (*ptr))
1664 {
1665 translate_prev_byte = ptr[-1];
1666 if (! CHAR_HEAD_P (translate_prev_byte))
1667 translate_anteprev_byte = ptr[-2];
1668 }
facdc750 1669 }
aff2ce94 1670 else
ab228c24
RS
1671 {
1672 this_translated = 0;
1673 ch = *ptr;
1674 }
ca1d1d23 1675 }
facdc750 1676 else if (!multibyte)
aff2ce94 1677 TRANSLATE (ch, trt, *ptr);
ca1d1d23
JB
1678 else
1679 {
facdc750
RS
1680 ch = *ptr;
1681 this_translated = 0;
ca1d1d23 1682 }
facdc750 1683
ab228c24
RS
1684 if (ch > 0400)
1685 j = ((unsigned char) ch) | 0200;
1686 else
1687 j = (unsigned char) ch;
1688
facdc750
RS
1689 if (i == infinity)
1690 stride_for_teases = BM_tab[j];
ab228c24 1691
facdc750
RS
1692 BM_tab[j] = dirlen - i;
1693 /* A translation table is accompanied by its inverse -- see */
177c0ea7 1694 /* comment following downcase_table for details */
facdc750 1695 if (this_translated)
ab228c24
RS
1696 {
1697 int starting_ch = ch;
1698 int starting_j = j;
1699 while (1)
1700 {
1701 TRANSLATE (ch, inverse_trt, ch);
1702 if (ch > 0400)
1703 j = ((unsigned char) ch) | 0200;
1704 else
1705 j = (unsigned char) ch;
1706
1707 /* For all the characters that map into CH,
1708 set up simple_translate to map the last byte
1709 into STARTING_J. */
1710 simple_translate[j] = starting_j;
1711 if (ch == starting_ch)
1712 break;
1713 BM_tab[j] = dirlen - i;
1714 }
1715 }
facdc750
RS
1716 }
1717 else
1718 {
1719 j = *ptr;
1720
1721 if (i == infinity)
1722 stride_for_teases = BM_tab[j];
1723 BM_tab[j] = dirlen - i;
ca1d1d23 1724 }
facdc750
RS
1725 /* stride_for_teases tells how much to stride if we get a */
1726 /* match on the far character but are subsequently */
1727 /* disappointed, by recording what the stride would have been */
1728 /* for that character if the last character had been */
1729 /* different. */
1730 }
1731 infinity = dirlen - infinity;
1732 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1733 /* loop invariant - POS_BYTE points at where last char (first
1734 char if reverse) of pattern would align in a possible match. */
1735 while (n != 0)
1736 {
1737 int tail_end;
1738 unsigned char *tail_end_ptr;
1739
1740 /* It's been reported that some (broken) compiler thinks that
1741 Boolean expressions in an arithmetic context are unsigned.
1742 Using an explicit ?1:0 prevents this. */
1743 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1744 < 0)
1745 return (n * (0 - direction));
1746 /* First we do the part we can by pointers (maybe nothing) */
1747 QUIT;
1748 pat = base_pat;
1749 limit = pos_byte - dirlen + direction;
67ce527d
KH
1750 if (direction > 0)
1751 {
1752 limit = BUFFER_CEILING_OF (limit);
1753 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1754 can take on without hitting edge of buffer or the gap. */
1755 limit = min (limit, pos_byte + 20000);
1756 limit = min (limit, lim_byte - 1);
1757 }
1758 else
1759 {
1760 limit = BUFFER_FLOOR_OF (limit);
1761 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1762 can take on without hitting edge of buffer or the gap. */
1763 limit = max (limit, pos_byte - 20000);
1764 limit = max (limit, lim_byte);
1765 }
facdc750
RS
1766 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1767 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1768
1769 if ((limit - pos_byte) * direction > 20)
ca1d1d23 1770 {
facdc750
RS
1771 unsigned char *p2;
1772
1773 p_limit = BYTE_POS_ADDR (limit);
1774 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1775 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1776 while (1) /* use one cursor setting as long as i can */
ca1d1d23 1777 {
facdc750 1778 if (direction > 0) /* worth duplicating */
ca1d1d23 1779 {
facdc750
RS
1780 /* Use signed comparison if appropriate
1781 to make cursor+infinity sure to be > p_limit.
1782 Assuming that the buffer lies in a range of addresses
1783 that are all "positive" (as ints) or all "negative",
1784 either kind of comparison will work as long
1785 as we don't step by infinity. So pick the kind
1786 that works when we do step by infinity. */
1787 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1788 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1789 cursor += BM_tab[*cursor];
ca1d1d23 1790 else
facdc750
RS
1791 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1792 cursor += BM_tab[*cursor];
1793 }
1794 else
1795 {
1796 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1797 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1798 cursor += BM_tab[*cursor];
1799 else
1800 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1801 cursor += BM_tab[*cursor];
1802 }
ca1d1d23 1803/* If you are here, cursor is beyond the end of the searched region. */
facdc750
RS
1804/* This can happen if you match on the far character of the pattern, */
1805/* because the "stride" of that character is infinity, a number able */
1806/* to throw you well beyond the end of the search. It can also */
1807/* happen if you fail to match within the permitted region and would */
1808/* otherwise try a character beyond that region */
1809 if ((cursor - p_limit) * direction <= len_byte)
1810 break; /* a small overrun is genuine */
1811 cursor -= infinity; /* large overrun = hit */
1812 i = dirlen - direction;
1813 if (! NILP (trt))
1814 {
1815 while ((i -= direction) + direction != 0)
ca1d1d23 1816 {
facdc750
RS
1817 int ch;
1818 cursor -= direction;
1819 /* Translate only the last byte of a character. */
1820 if (! multibyte
1821 || ((cursor == tail_end_ptr
1822 || CHAR_HEAD_P (cursor[1]))
1823 && (CHAR_HEAD_P (cursor[0])
1824 || (translate_prev_byte == cursor[-1]
1825 && (CHAR_HEAD_P (translate_prev_byte)
1826 || translate_anteprev_byte == cursor[-2])))))
1827 ch = simple_translate[*cursor];
1828 else
1829 ch = *cursor;
1830 if (pat[i] != ch)
1831 break;
ca1d1d23 1832 }
facdc750
RS
1833 }
1834 else
1835 {
1836 while ((i -= direction) + direction != 0)
ca1d1d23 1837 {
facdc750
RS
1838 cursor -= direction;
1839 if (pat[i] != *cursor)
1840 break;
ca1d1d23 1841 }
facdc750
RS
1842 }
1843 cursor += dirlen - i - direction; /* fix cursor */
1844 if (i + direction == 0)
1845 {
1846 int position;
0c8533c6 1847
facdc750 1848 cursor -= direction;
1113d9db 1849
facdc750
RS
1850 position = pos_byte + cursor - p2 + ((direction > 0)
1851 ? 1 - len_byte : 0);
1852 set_search_regs (position, len_byte);
ca325161 1853
facdc750
RS
1854 if ((n -= direction) != 0)
1855 cursor += dirlen; /* to resume search */
ca1d1d23 1856 else
facdc750
RS
1857 return ((direction > 0)
1858 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1859 }
facdc750
RS
1860 else
1861 cursor += stride_for_teases; /* <sigh> we lose - */
ca1d1d23 1862 }
facdc750
RS
1863 pos_byte += cursor - p2;
1864 }
1865 else
1866 /* Now we'll pick up a clump that has to be done the hard */
1867 /* way because it covers a discontinuity */
1868 {
1869 limit = ((direction > 0)
1870 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1871 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1872 limit = ((direction > 0)
1873 ? min (limit + len_byte, lim_byte - 1)
1874 : max (limit - len_byte, lim_byte));
1875 /* LIMIT is now the last value POS_BYTE can have
1876 and still be valid for a possible match. */
1877 while (1)
ca1d1d23 1878 {
facdc750
RS
1879 /* This loop can be coded for space rather than */
1880 /* speed because it will usually run only once. */
1881 /* (the reach is at most len + 21, and typically */
177c0ea7 1882 /* does not exceed len) */
facdc750
RS
1883 while ((limit - pos_byte) * direction >= 0)
1884 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1885 /* now run the same tests to distinguish going off the */
1886 /* end, a match or a phony match. */
1887 if ((pos_byte - limit) * direction <= len_byte)
1888 break; /* ran off the end */
1889 /* Found what might be a match.
1890 Set POS_BYTE back to last (first if reverse) pos. */
1891 pos_byte -= infinity;
1892 i = dirlen - direction;
1893 while ((i -= direction) + direction != 0)
ca1d1d23 1894 {
facdc750
RS
1895 int ch;
1896 unsigned char *ptr;
1897 pos_byte -= direction;
1898 ptr = BYTE_POS_ADDR (pos_byte);
1899 /* Translate only the last byte of a character. */
1900 if (! multibyte
1901 || ((ptr == tail_end_ptr
1902 || CHAR_HEAD_P (ptr[1]))
1903 && (CHAR_HEAD_P (ptr[0])
1904 || (translate_prev_byte == ptr[-1]
1905 && (CHAR_HEAD_P (translate_prev_byte)
1906 || translate_anteprev_byte == ptr[-2])))))
1907 ch = simple_translate[*ptr];
1908 else
1909 ch = *ptr;
1910 if (pat[i] != ch)
1911 break;
1912 }
1913 /* Above loop has moved POS_BYTE part or all the way
1914 back to the first pos (last pos if reverse).
1915 Set it once again at the last (first if reverse) char. */
1916 pos_byte += dirlen - i- direction;
1917 if (i + direction == 0)
1918 {
1919 int position;
1920 pos_byte -= direction;
1113d9db 1921
facdc750 1922 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
0c8533c6 1923
facdc750 1924 set_search_regs (position, len_byte);
ca325161 1925
facdc750
RS
1926 if ((n -= direction) != 0)
1927 pos_byte += dirlen; /* to resume search */
ca1d1d23 1928 else
facdc750
RS
1929 return ((direction > 0)
1930 ? search_regs.end[0] : search_regs.start[0]);
ca1d1d23 1931 }
facdc750
RS
1932 else
1933 pos_byte += stride_for_teases;
1934 }
1935 }
1936 /* We have done one clump. Can we continue? */
1937 if ((lim_byte - pos_byte) * direction < 0)
1938 return ((0 - n) * direction);
ca1d1d23 1939 }
facdc750 1940 return BYTE_TO_CHAR (pos_byte);
ca1d1d23 1941}
ca325161 1942
fa8ed3e0 1943/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
a7e4cdde
RS
1944 for the overall match just found in the current buffer.
1945 Also clear out the match data for registers 1 and up. */
ca325161
RS
1946
1947static void
fa8ed3e0
RS
1948set_search_regs (beg_byte, nbytes)
1949 int beg_byte, nbytes;
ca325161 1950{
a7e4cdde
RS
1951 int i;
1952
ca325161
RS
1953 /* Make sure we have registers in which to store
1954 the match position. */
1955 if (search_regs.num_regs == 0)
1956 {
2d4a771a
RS
1957 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1958 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
487282dc 1959 search_regs.num_regs = 2;
ca325161
RS
1960 }
1961
a7e4cdde
RS
1962 /* Clear out the other registers. */
1963 for (i = 1; i < search_regs.num_regs; i++)
1964 {
1965 search_regs.start[i] = -1;
1966 search_regs.end[i] = -1;
1967 }
1968
fa8ed3e0
RS
1969 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1970 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
a3668d92 1971 XSETBUFFER (last_thing_searched, current_buffer);
ca325161 1972}
ca1d1d23
JB
1973\f
1974/* Given a string of words separated by word delimiters,
1975 compute a regexp that matches those exact words
1976 separated by arbitrary punctuation. */
1977
1978static Lisp_Object
1979wordify (string)
1980 Lisp_Object string;
1981{
1982 register unsigned char *p, *o;
0c8533c6 1983 register int i, i_byte, len, punct_count = 0, word_count = 0;
ca1d1d23 1984 Lisp_Object val;
0c8533c6
RS
1985 int prev_c = 0;
1986 int adjust;
ca1d1d23 1987
b7826503 1988 CHECK_STRING (string);
d5db4077
KR
1989 p = SDATA (string);
1990 len = SCHARS (string);
ca1d1d23 1991
0c8533c6
RS
1992 for (i = 0, i_byte = 0; i < len; )
1993 {
1994 int c;
177c0ea7 1995
eb99a8dd 1996 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
0c8533c6
RS
1997
1998 if (SYNTAX (c) != Sword)
1999 {
2000 punct_count++;
2001 if (i > 0 && SYNTAX (prev_c) == Sword)
2002 word_count++;
2003 }
ca1d1d23 2004
0c8533c6
RS
2005 prev_c = c;
2006 }
2007
2008 if (SYNTAX (prev_c) == Sword)
2009 word_count++;
2010 if (!word_count)
b07b65aa 2011 return empty_string;
0c8533c6
RS
2012
2013 adjust = - punct_count + 5 * (word_count - 1) + 4;
8a2df937
RS
2014 if (STRING_MULTIBYTE (string))
2015 val = make_uninit_multibyte_string (len + adjust,
d5db4077 2016 SBYTES (string)
8a2df937
RS
2017 + adjust);
2018 else
2019 val = make_uninit_string (len + adjust);
ca1d1d23 2020
d5db4077 2021 o = SDATA (val);
ca1d1d23
JB
2022 *o++ = '\\';
2023 *o++ = 'b';
1e9582d4 2024 prev_c = 0;
ca1d1d23 2025
1e9582d4
RS
2026 for (i = 0, i_byte = 0; i < len; )
2027 {
2028 int c;
2029 int i_byte_orig = i_byte;
177c0ea7 2030
eb99a8dd 2031 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1e9582d4
RS
2032
2033 if (SYNTAX (c) == Sword)
2034 {
5d69fe10 2035 bcopy (SDATA (string) + i_byte_orig, o,
1e9582d4
RS
2036 i_byte - i_byte_orig);
2037 o += i_byte - i_byte_orig;
2038 }
2039 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2040 {
2041 *o++ = '\\';
2042 *o++ = 'W';
2043 *o++ = '\\';
2044 *o++ = 'W';
2045 *o++ = '*';
2046 }
2047
2048 prev_c = c;
2049 }
ca1d1d23
JB
2050
2051 *o++ = '\\';
2052 *o++ = 'b';
2053
2054 return val;
2055}
2056\f
2057DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
8c1a1077
PJ
2058 "MSearch backward: ",
2059 doc: /* Search backward from point for STRING.
2060Set point to the beginning of the occurrence found, and return point.
2061An optional second argument bounds the search; it is a buffer position.
2062The match found must not extend before that position.
2063Optional third argument, if t, means if fail just return nil (no error).
2064 If not nil and not t, position at limit of search and return nil.
2065Optional fourth argument is repeat count--search for successive occurrences.
2066
2067Search case-sensitivity is determined by the value of the variable
2068`case-fold-search', which see.
2069
2070See also the functions `match-beginning', `match-end' and `replace-match'. */)
2071 (string, bound, noerror, count)
ca1d1d23
JB
2072 Lisp_Object string, bound, noerror, count;
2073{
b819a390 2074 return search_command (string, bound, noerror, count, -1, 0, 0);
ca1d1d23
JB
2075}
2076
6af43974 2077DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
8c1a1077
PJ
2078 doc: /* Search forward from point for STRING.
2079Set point to the end of the occurrence found, and return point.
2080An optional second argument bounds the search; it is a buffer position.
2081The match found must not extend after that position. nil is equivalent
2082 to (point-max).
2083Optional third argument, if t, means if fail just return nil (no error).
2084 If not nil and not t, move to limit of search and return nil.
2085Optional fourth argument is repeat count--search for successive occurrences.
2086
2087Search case-sensitivity is determined by the value of the variable
2088`case-fold-search', which see.
2089
2090See also the functions `match-beginning', `match-end' and `replace-match'. */)
2091 (string, bound, noerror, count)
ca1d1d23
JB
2092 Lisp_Object string, bound, noerror, count;
2093{
b819a390 2094 return search_command (string, bound, noerror, count, 1, 0, 0);
ca1d1d23
JB
2095}
2096
2097DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
8c1a1077
PJ
2098 "sWord search backward: ",
2099 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2100Set point to the beginning of the occurrence found, and return point.
2101An optional second argument bounds the search; it is a buffer position.
2102The match found must not extend before that position.
2103Optional third argument, if t, means if fail just return nil (no error).
2104 If not nil and not t, move to limit of search and return nil.
2105Optional fourth argument is repeat count--search for successive occurrences. */)
2106 (string, bound, noerror, count)
ca1d1d23
JB
2107 Lisp_Object string, bound, noerror, count;
2108{
b819a390 2109 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2110}
2111
2112DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
8c1a1077
PJ
2113 "sWord search: ",
2114 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2115Set point to the end of the occurrence found, and return point.
2116An optional second argument bounds the search; it is a buffer position.
2117The match found must not extend after that position.
2118Optional third argument, if t, means if fail just return nil (no error).
2119 If not nil and not t, move to limit of search and return nil.
2120Optional fourth argument is repeat count--search for successive occurrences. */)
2121 (string, bound, noerror, count)
ca1d1d23
JB
2122 Lisp_Object string, bound, noerror, count;
2123{
b819a390 2124 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
ca1d1d23
JB
2125}
2126
2127DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
8c1a1077
PJ
2128 "sRE search backward: ",
2129 doc: /* Search backward from point for match for regular expression REGEXP.
2130Set point to the beginning of the match, and return point.
2131The match found is the one starting last in the buffer
2132and yet ending before the origin of the search.
2133An optional second argument bounds the search; it is a buffer position.
2134The match found must start at or after that position.
2135Optional third argument, if t, means if fail just return nil (no error).
2136 If not nil and not t, move to limit of search and return nil.
2137Optional fourth argument is repeat count--search for successive occurrences.
2138See also the functions `match-beginning', `match-end', `match-string',
2139and `replace-match'. */)
2140 (regexp, bound, noerror, count)
19c0a730 2141 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2142{
b819a390 2143 return search_command (regexp, bound, noerror, count, -1, 1, 0);
ca1d1d23
JB
2144}
2145
2146DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
8c1a1077
PJ
2147 "sRE search: ",
2148 doc: /* Search forward from point for regular expression REGEXP.
2149Set point to the end of the occurrence found, and return point.
2150An optional second argument bounds the search; it is a buffer position.
2151The match found must not extend after that position.
2152Optional third argument, if t, means if fail just return nil (no error).
2153 If not nil and not t, move to limit of search and return nil.
2154Optional fourth argument is repeat count--search for successive occurrences.
2155See also the functions `match-beginning', `match-end', `match-string',
2156and `replace-match'. */)
2157 (regexp, bound, noerror, count)
19c0a730 2158 Lisp_Object regexp, bound, noerror, count;
ca1d1d23 2159{
b819a390
RS
2160 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2161}
2162
2163DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
8c1a1077
PJ
2164 "sPosix search backward: ",
2165 doc: /* Search backward from point for match for regular expression REGEXP.
2166Find the longest match in accord with Posix regular expression rules.
2167Set point to the beginning of the match, and return point.
2168The match found is the one starting last in the buffer
2169and yet ending before the origin of the search.
2170An optional second argument bounds the search; it is a buffer position.
2171The match found must start at or after that position.
2172Optional third argument, if t, means if fail just return nil (no error).
2173 If not nil and not t, move to limit of search and return nil.
2174Optional fourth argument is repeat count--search for successive occurrences.
2175See also the functions `match-beginning', `match-end', `match-string',
2176and `replace-match'. */)
2177 (regexp, bound, noerror, count)
b819a390
RS
2178 Lisp_Object regexp, bound, noerror, count;
2179{
2180 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2181}
2182
2183DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
8c1a1077
PJ
2184 "sPosix search: ",
2185 doc: /* Search forward from point for regular expression REGEXP.
2186Find the longest match in accord with Posix regular expression rules.
2187Set point to the end of the occurrence found, and return point.
2188An optional second argument bounds the search; it is a buffer position.
2189The match found must not extend after that position.
2190Optional third argument, if t, means if fail just return nil (no error).
2191 If not nil and not t, move to limit of search and return nil.
2192Optional fourth argument is repeat count--search for successive occurrences.
2193See also the functions `match-beginning', `match-end', `match-string',
2194and `replace-match'. */)
2195 (regexp, bound, noerror, count)
b819a390
RS
2196 Lisp_Object regexp, bound, noerror, count;
2197{
2198 return search_command (regexp, bound, noerror, count, 1, 1, 1);
ca1d1d23
JB
2199}
2200\f
d7a5ad5f 2201DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
8c1a1077 2202 doc: /* Replace text matched by last search with NEWTEXT.
4dd0c271
RS
2203Leave point at the end of the replacement text.
2204
8c1a1077
PJ
2205If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2206Otherwise maybe capitalize the whole text, or maybe just word initials,
2207based on the replaced text.
2208If the replaced text has only capital letters
2209and has at least one multiletter word, convert NEWTEXT to all caps.
4dd0c271
RS
2210Otherwise if all words are capitalized in the replaced text,
2211capitalize each word in NEWTEXT.
8c1a1077
PJ
2212
2213If third arg LITERAL is non-nil, insert NEWTEXT literally.
2214Otherwise treat `\\' as special:
2215 `\\&' in NEWTEXT means substitute original matched text.
2216 `\\N' means substitute what matched the Nth `\\(...\\)'.
2217 If Nth parens didn't match, substitute nothing.
2218 `\\\\' means insert one `\\'.
4dd0c271
RS
2219Case conversion does not apply to these substitutions.
2220
8c1a1077 2221FIXEDCASE and LITERAL are optional arguments.
8c1a1077
PJ
2222
2223The optional fourth argument STRING can be a string to modify.
2224This is meaningful when the previous match was done against STRING,
2225using `string-match'. When used this way, `replace-match'
2226creates and returns a new string made by copying STRING and replacing
2227the part of STRING that was matched.
2228
2229The optional fifth argument SUBEXP specifies a subexpression;
2230it says to replace just that subexpression with NEWTEXT,
2231rather than replacing the entire matched text.
2232This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2233`\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2234NEWTEXT in place of subexp N.
2235This is useful only after a regular expression search or match,
2236since only regular expressions have distinguished subexpressions. */)
2237 (newtext, fixedcase, literal, string, subexp)
d7a5ad5f 2238 Lisp_Object newtext, fixedcase, literal, string, subexp;
ca1d1d23
JB
2239{
2240 enum { nochange, all_caps, cap_initial } case_action;
ac3b28b1 2241 register int pos, pos_byte;
ca1d1d23 2242 int some_multiletter_word;
97832bd0 2243 int some_lowercase;
73dc8771 2244 int some_uppercase;
208767c3 2245 int some_nonuppercase_initial;
ca1d1d23 2246 register int c, prevc;
d7a5ad5f 2247 int sub;
3e18eecf 2248 int opoint, newpoint;
ca1d1d23 2249
b7826503 2250 CHECK_STRING (newtext);
ca1d1d23 2251
080c45fd 2252 if (! NILP (string))
b7826503 2253 CHECK_STRING (string);
080c45fd 2254
ca1d1d23
JB
2255 case_action = nochange; /* We tried an initialization */
2256 /* but some C compilers blew it */
4746118a
JB
2257
2258 if (search_regs.num_regs <= 0)
2259 error ("replace-match called before any match found");
2260
d7a5ad5f
RS
2261 if (NILP (subexp))
2262 sub = 0;
2263 else
2264 {
b7826503 2265 CHECK_NUMBER (subexp);
d7a5ad5f
RS
2266 sub = XINT (subexp);
2267 if (sub < 0 || sub >= search_regs.num_regs)
2268 args_out_of_range (subexp, make_number (search_regs.num_regs));
2269 }
2270
080c45fd
RS
2271 if (NILP (string))
2272 {
d7a5ad5f
RS
2273 if (search_regs.start[sub] < BEGV
2274 || search_regs.start[sub] > search_regs.end[sub]
2275 || search_regs.end[sub] > ZV)
2276 args_out_of_range (make_number (search_regs.start[sub]),
2277 make_number (search_regs.end[sub]));
080c45fd
RS
2278 }
2279 else
2280 {
d7a5ad5f
RS
2281 if (search_regs.start[sub] < 0
2282 || search_regs.start[sub] > search_regs.end[sub]
d5db4077 2283 || search_regs.end[sub] > SCHARS (string))
d7a5ad5f
RS
2284 args_out_of_range (make_number (search_regs.start[sub]),
2285 make_number (search_regs.end[sub]));
080c45fd 2286 }
ca1d1d23
JB
2287
2288 if (NILP (fixedcase))
2289 {
2290 /* Decide how to casify by examining the matched text. */
ac3b28b1 2291 int last;
ca1d1d23 2292
ac3b28b1
KH
2293 pos = search_regs.start[sub];
2294 last = search_regs.end[sub];
fa8ed3e0
RS
2295
2296 if (NILP (string))
ac3b28b1 2297 pos_byte = CHAR_TO_BYTE (pos);
fa8ed3e0 2298 else
ac3b28b1 2299 pos_byte = string_char_to_byte (string, pos);
fa8ed3e0 2300
ca1d1d23
JB
2301 prevc = '\n';
2302 case_action = all_caps;
2303
2304 /* some_multiletter_word is set nonzero if any original word
2305 is more than one letter long. */
2306 some_multiletter_word = 0;
97832bd0 2307 some_lowercase = 0;
208767c3 2308 some_nonuppercase_initial = 0;
73dc8771 2309 some_uppercase = 0;
ca1d1d23 2310
ac3b28b1 2311 while (pos < last)
ca1d1d23 2312 {
080c45fd 2313 if (NILP (string))
ac3b28b1
KH
2314 {
2315 c = FETCH_CHAR (pos_byte);
2316 INC_BOTH (pos, pos_byte);
2317 }
080c45fd 2318 else
ac3b28b1 2319 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
080c45fd 2320
ca1d1d23
JB
2321 if (LOWERCASEP (c))
2322 {
2323 /* Cannot be all caps if any original char is lower case */
2324
97832bd0 2325 some_lowercase = 1;
ca1d1d23 2326 if (SYNTAX (prevc) != Sword)
208767c3 2327 some_nonuppercase_initial = 1;
ca1d1d23
JB
2328 else
2329 some_multiletter_word = 1;
2330 }
2331 else if (!NOCASEP (c))
2332 {
73dc8771 2333 some_uppercase = 1;
97832bd0 2334 if (SYNTAX (prevc) != Sword)
c4d460ce 2335 ;
97832bd0 2336 else
ca1d1d23
JB
2337 some_multiletter_word = 1;
2338 }
208767c3
RS
2339 else
2340 {
2341 /* If the initial is a caseless word constituent,
2342 treat that like a lowercase initial. */
2343 if (SYNTAX (prevc) != Sword)
2344 some_nonuppercase_initial = 1;
2345 }
ca1d1d23
JB
2346
2347 prevc = c;
2348 }
2349
97832bd0
RS
2350 /* Convert to all caps if the old text is all caps
2351 and has at least one multiletter word. */
2352 if (! some_lowercase && some_multiletter_word)
2353 case_action = all_caps;
c4d460ce 2354 /* Capitalize each word, if the old text has all capitalized words. */
208767c3 2355 else if (!some_nonuppercase_initial && some_multiletter_word)
ca1d1d23 2356 case_action = cap_initial;
208767c3 2357 else if (!some_nonuppercase_initial && some_uppercase)
73dc8771
KH
2358 /* Should x -> yz, operating on X, give Yz or YZ?
2359 We'll assume the latter. */
2360 case_action = all_caps;
97832bd0
RS
2361 else
2362 case_action = nochange;
ca1d1d23
JB
2363 }
2364
080c45fd
RS
2365 /* Do replacement in a string. */
2366 if (!NILP (string))
2367 {
2368 Lisp_Object before, after;
2369
2370 before = Fsubstring (string, make_number (0),
d7a5ad5f
RS
2371 make_number (search_regs.start[sub]));
2372 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
080c45fd 2373
636a5e28
RS
2374 /* Substitute parts of the match into NEWTEXT
2375 if desired. */
080c45fd
RS
2376 if (NILP (literal))
2377 {
d131e79c
RS
2378 int lastpos = 0;
2379 int lastpos_byte = 0;
080c45fd
RS
2380 /* We build up the substituted string in ACCUM. */
2381 Lisp_Object accum;
2382 Lisp_Object middle;
d5db4077 2383 int length = SBYTES (newtext);
080c45fd
RS
2384
2385 accum = Qnil;
2386
ac3b28b1 2387 for (pos_byte = 0, pos = 0; pos_byte < length;)
080c45fd
RS
2388 {
2389 int substart = -1;
6bbd7a29 2390 int subend = 0;
1e79ec24 2391 int delbackslash = 0;
080c45fd 2392
0c8533c6
RS
2393 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2394
080c45fd
RS
2395 if (c == '\\')
2396 {
0c8533c6 2397 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
177c0ea7 2398
080c45fd
RS
2399 if (c == '&')
2400 {
d7a5ad5f
RS
2401 substart = search_regs.start[sub];
2402 subend = search_regs.end[sub];
080c45fd 2403 }
5fbbc83b 2404 else if (c >= '1' && c <= '9')
080c45fd 2405 {
5fbbc83b
RS
2406 if (search_regs.start[c - '0'] >= 0
2407 && c <= search_regs.num_regs + '0')
080c45fd
RS
2408 {
2409 substart = search_regs.start[c - '0'];
2410 subend = search_regs.end[c - '0'];
2411 }
5fbbc83b
RS
2412 else
2413 {
2414 /* If that subexp did not match,
2415 replace \\N with nothing. */
2416 substart = 0;
2417 subend = 0;
2418 }
080c45fd 2419 }
1e79ec24
KH
2420 else if (c == '\\')
2421 delbackslash = 1;
636a5e28
RS
2422 else
2423 error ("Invalid use of `\\' in replacement text");
080c45fd
RS
2424 }
2425 if (substart >= 0)
2426 {
d131e79c
RS
2427 if (pos - 2 != lastpos)
2428 middle = substring_both (newtext, lastpos,
2429 lastpos_byte,
2430 pos - 2, pos_byte - 2);
080c45fd
RS
2431 else
2432 middle = Qnil;
2433 accum = concat3 (accum, middle,
0c8533c6
RS
2434 Fsubstring (string,
2435 make_number (substart),
080c45fd
RS
2436 make_number (subend)));
2437 lastpos = pos;
0c8533c6 2438 lastpos_byte = pos_byte;
080c45fd 2439 }
1e79ec24
KH
2440 else if (delbackslash)
2441 {
d131e79c
RS
2442 middle = substring_both (newtext, lastpos,
2443 lastpos_byte,
2444 pos - 1, pos_byte - 1);
0c8533c6 2445
1e79ec24
KH
2446 accum = concat2 (accum, middle);
2447 lastpos = pos;
0c8533c6 2448 lastpos_byte = pos_byte;
1e79ec24 2449 }
080c45fd
RS
2450 }
2451
d131e79c
RS
2452 if (pos != lastpos)
2453 middle = substring_both (newtext, lastpos,
2454 lastpos_byte,
0c8533c6 2455 pos, pos_byte);
080c45fd
RS
2456 else
2457 middle = Qnil;
2458
2459 newtext = concat2 (accum, middle);
2460 }
2461
636a5e28 2462 /* Do case substitution in NEWTEXT if desired. */
080c45fd
RS
2463 if (case_action == all_caps)
2464 newtext = Fupcase (newtext);
2465 else if (case_action == cap_initial)
2b2eead9 2466 newtext = Fupcase_initials (newtext);
080c45fd
RS
2467
2468 return concat3 (before, newtext, after);
2469 }
2470
09c4719e 2471 /* Record point, then move (quietly) to the start of the match. */
9160906f 2472 if (PT >= search_regs.end[sub])
b0eba991 2473 opoint = PT - ZV;
9160906f
RS
2474 else if (PT > search_regs.start[sub])
2475 opoint = search_regs.end[sub] - ZV;
b0eba991
RS
2476 else
2477 opoint = PT;
2478
886ed6ec
RS
2479 /* If we want non-literal replacement,
2480 perform substitution on the replacement string. */
2481 if (NILP (literal))
ca1d1d23 2482 {
d5db4077 2483 int length = SBYTES (newtext);
68e69fbd
RS
2484 unsigned char *substed;
2485 int substed_alloc_size, substed_len;
3bc25e52
KH
2486 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2487 int str_multibyte = STRING_MULTIBYTE (newtext);
2488 Lisp_Object rev_tbl;
886ed6ec 2489 int really_changed = 0;
3bc25e52
KH
2490
2491 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2492 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2493 make_number (0))
2494 : Qnil);
ac3b28b1 2495
68e69fbd
RS
2496 substed_alloc_size = length * 2 + 100;
2497 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2498 substed_len = 0;
2499
3bc25e52
KH
2500 /* Go thru NEWTEXT, producing the actual text to insert in
2501 SUBSTED while adjusting multibyteness to that of the current
2502 buffer. */
ca1d1d23 2503
ac3b28b1 2504 for (pos_byte = 0, pos = 0; pos_byte < length;)
ca1d1d23 2505 {
68e69fbd 2506 unsigned char str[MAX_MULTIBYTE_LENGTH];
f8ce8a0d
GM
2507 unsigned char *add_stuff = NULL;
2508 int add_len = 0;
68e69fbd 2509 int idx = -1;
9a76659d 2510
3bc25e52
KH
2511 if (str_multibyte)
2512 {
eb99a8dd 2513 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
3bc25e52
KH
2514 if (!buf_multibyte)
2515 c = multibyte_char_to_unibyte (c, rev_tbl);
2516 }
2517 else
2518 {
2519 /* Note that we don't have to increment POS. */
5d69fe10 2520 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2521 if (buf_multibyte)
2522 c = unibyte_char_to_multibyte (c);
2523 }
ac3b28b1 2524
68e69fbd
RS
2525 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2526 or set IDX to a match index, which means put that part
2527 of the buffer text into SUBSTED. */
2528
ca1d1d23
JB
2529 if (c == '\\')
2530 {
886ed6ec
RS
2531 really_changed = 1;
2532
3bc25e52
KH
2533 if (str_multibyte)
2534 {
eb99a8dd
KH
2535 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2536 pos, pos_byte);
3bc25e52
KH
2537 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2538 c = multibyte_char_to_unibyte (c, rev_tbl);
2539 }
2540 else
2541 {
d5db4077 2542 c = SREF (newtext, pos_byte++);
3bc25e52
KH
2543 if (buf_multibyte)
2544 c = unibyte_char_to_multibyte (c);
2545 }
2546
ca1d1d23 2547 if (c == '&')
68e69fbd 2548 idx = sub;
78445046 2549 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
ca1d1d23
JB
2550 {
2551 if (search_regs.start[c - '0'] >= 1)
68e69fbd 2552 idx = c - '0';
ca1d1d23 2553 }
636a5e28 2554 else if (c == '\\')
68e69fbd 2555 add_len = 1, add_stuff = "\\";
636a5e28 2556 else
3bc25e52
KH
2557 {
2558 xfree (substed);
2559 error ("Invalid use of `\\' in replacement text");
2560 }
ca1d1d23
JB
2561 }
2562 else
68e69fbd
RS
2563 {
2564 add_len = CHAR_STRING (c, str);
2565 add_stuff = str;
2566 }
2567
2568 /* If we want to copy part of a previous match,
2569 set up ADD_STUFF and ADD_LEN to point to it. */
2570 if (idx >= 0)
2571 {
2572 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2573 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2574 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2575 move_gap (search_regs.start[idx]);
2576 add_stuff = BYTE_POS_ADDR (begbyte);
2577 }
2578
2579 /* Now the stuff we want to add to SUBSTED
2580 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2581
2582 /* Make sure SUBSTED is big enough. */
2583 if (substed_len + add_len >= substed_alloc_size)
2584 {
2585 substed_alloc_size = substed_len + add_len + 500;
2586 substed = (unsigned char *) xrealloc (substed,
2587 substed_alloc_size + 1);
2588 }
2589
2590 /* Now add to the end of SUBSTED. */
f8ce8a0d
GM
2591 if (add_stuff)
2592 {
2593 bcopy (add_stuff, substed + substed_len, add_len);
2594 substed_len += add_len;
2595 }
ca1d1d23 2596 }
68e69fbd 2597
886ed6ec 2598 if (really_changed)
80460525
KH
2599 {
2600 if (buf_multibyte)
2601 {
2602 int nchars = multibyte_chars_in_text (substed, substed_len);
68e69fbd 2603
80460525
KH
2604 newtext = make_multibyte_string (substed, nchars, substed_len);
2605 }
2606 else
2607 newtext = make_unibyte_string (substed, substed_len);
2608 }
68e69fbd 2609 xfree (substed);
ca1d1d23
JB
2610 }
2611
886ed6ec
RS
2612 /* Replace the old text with the new in the cleanest possible way. */
2613 replace_range (search_regs.start[sub], search_regs.end[sub],
2614 newtext, 1, 0, 1);
d5db4077 2615 newpoint = search_regs.start[sub] + SCHARS (newtext);
ca1d1d23
JB
2616
2617 if (case_action == all_caps)
886ed6ec
RS
2618 Fupcase_region (make_number (search_regs.start[sub]),
2619 make_number (newpoint));
ca1d1d23 2620 else if (case_action == cap_initial)
886ed6ec
RS
2621 Fupcase_initials_region (make_number (search_regs.start[sub]),
2622 make_number (newpoint));
3e18eecf 2623
98e942e0
RS
2624 /* Adjust search data for this change. */
2625 {
5b88a2c5 2626 int oldend = search_regs.end[sub];
41c01205 2627 int oldstart = search_regs.start[sub];
98e942e0
RS
2628 int change = newpoint - search_regs.end[sub];
2629 int i;
2630
2631 for (i = 0; i < search_regs.num_regs; i++)
2632 {
41c01205 2633 if (search_regs.start[i] >= oldend)
98e942e0 2634 search_regs.start[i] += change;
41c01205
DK
2635 else if (search_regs.start[i] > oldstart)
2636 search_regs.start[i] = oldstart;
2637 if (search_regs.end[i] >= oldend)
98e942e0 2638 search_regs.end[i] += change;
41c01205
DK
2639 else if (search_regs.end[i] > oldstart)
2640 search_regs.end[i] = oldstart;
98e942e0
RS
2641 }
2642 }
2643
b0eba991 2644 /* Put point back where it was in the text. */
8d808a65 2645 if (opoint <= 0)
fa8ed3e0 2646 TEMP_SET_PT (opoint + ZV);
b0eba991 2647 else
fa8ed3e0 2648 TEMP_SET_PT (opoint);
b0eba991
RS
2649
2650 /* Now move point "officially" to the start of the inserted replacement. */
3e18eecf 2651 move_if_not_intangible (newpoint);
177c0ea7 2652
ca1d1d23
JB
2653 return Qnil;
2654}
2655\f
2656static Lisp_Object
2657match_limit (num, beginningp)
2658 Lisp_Object num;
2659 int beginningp;
2660{
2661 register int n;
2662
b7826503 2663 CHECK_NUMBER (num);
ca1d1d23 2664 n = XINT (num);
f90a5bf5 2665 if (n < 0)
bd2cbd56 2666 args_out_of_range (num, make_number (0));
f90a5bf5
RS
2667 if (search_regs.num_regs <= 0)
2668 error ("No match data, because no search succeeded");
9b9ceb61 2669 if (n >= search_regs.num_regs
4746118a 2670 || search_regs.start[n] < 0)
ca1d1d23
JB
2671 return Qnil;
2672 return (make_number ((beginningp) ? search_regs.start[n]
2673 : search_regs.end[n]));
2674}
2675
2676DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
8c1a1077
PJ
2677 doc: /* Return position of start of text matched by last search.
2678SUBEXP, a number, specifies which parenthesized expression in the last
2679 regexp.
2680Value is nil if SUBEXPth pair didn't match, or there were less than
2681 SUBEXP pairs.
2682Zero means the entire text matched by the whole regexp or whole string. */)
2683 (subexp)
5806161b 2684 Lisp_Object subexp;
ca1d1d23 2685{
5806161b 2686 return match_limit (subexp, 1);
ca1d1d23
JB
2687}
2688
2689DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
8c1a1077
PJ
2690 doc: /* Return position of end of text matched by last search.
2691SUBEXP, a number, specifies which parenthesized expression in the last
2692 regexp.
2693Value is nil if SUBEXPth pair didn't match, or there were less than
2694 SUBEXP pairs.
2695Zero means the entire text matched by the whole regexp or whole string. */)
2696 (subexp)
5806161b 2697 Lisp_Object subexp;
ca1d1d23 2698{
5806161b 2699 return match_limit (subexp, 0);
177c0ea7 2700}
ca1d1d23 2701
56256c2a 2702DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
8c1a1077
PJ
2703 doc: /* Return a list containing all info on what the last search matched.
2704Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2705All the elements are markers or nil (nil if the Nth pair didn't match)
2706if the last match was on a buffer; integers or nil if a string was matched.
2707Use `store-match-data' to reinstate the data in this list.
2708
41c01205
DK
2709If INTEGERS (the optional first argument) is non-nil, always use
2710integers \(rather than markers) to represent buffer positions. In
2711this case, and if the last match was in a buffer, the buffer will get
2712stored as one additional element at the end of the list.
2713
8c1a1077 2714If REUSE is a list, reuse it as part of the value. If REUSE is long enough
140a6b7e
KS
2715to hold all the values, and if INTEGERS is non-nil, no consing is done.
2716
2717Return value is undefined if the last search failed. */)
8c1a1077 2718 (integers, reuse)
56256c2a 2719 Lisp_Object integers, reuse;
ca1d1d23 2720{
56256c2a 2721 Lisp_Object tail, prev;
4746118a 2722 Lisp_Object *data;
ca1d1d23
JB
2723 int i, len;
2724
daa37602 2725 if (NILP (last_thing_searched))
c36bcf1b 2726 return Qnil;
daa37602 2727
6bbd7a29
GM
2728 prev = Qnil;
2729
41c01205 2730 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
4746118a
JB
2731 * sizeof (Lisp_Object));
2732
41c01205 2733 len = 0;
4746118a 2734 for (i = 0; i < search_regs.num_regs; i++)
ca1d1d23
JB
2735 {
2736 int start = search_regs.start[i];
2737 if (start >= 0)
2738 {
56256c2a
RS
2739 if (EQ (last_thing_searched, Qt)
2740 || ! NILP (integers))
ca1d1d23 2741 {
c235cce7
KH
2742 XSETFASTINT (data[2 * i], start);
2743 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
ca1d1d23 2744 }
0ed62dc7 2745 else if (BUFFERP (last_thing_searched))
ca1d1d23
JB
2746 {
2747 data[2 * i] = Fmake_marker ();
daa37602
JB
2748 Fset_marker (data[2 * i],
2749 make_number (start),
2750 last_thing_searched);
ca1d1d23
JB
2751 data[2 * i + 1] = Fmake_marker ();
2752 Fset_marker (data[2 * i + 1],
177c0ea7 2753 make_number (search_regs.end[i]),
daa37602 2754 last_thing_searched);
ca1d1d23 2755 }
daa37602
JB
2756 else
2757 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2758 abort ();
2759
41c01205 2760 len = 2*(i+1);
ca1d1d23
JB
2761 }
2762 else
2763 data[2 * i] = data [2 * i + 1] = Qnil;
2764 }
56256c2a 2765
bd2cbd56 2766 if (BUFFERP (last_thing_searched) && !NILP (integers))
41c01205 2767 {
bd2cbd56 2768 data[len] = last_thing_searched;
41c01205
DK
2769 len++;
2770 }
2771
56256c2a
RS
2772 /* If REUSE is not usable, cons up the values and return them. */
2773 if (! CONSP (reuse))
41c01205 2774 return Flist (len, data);
56256c2a
RS
2775
2776 /* If REUSE is a list, store as many value elements as will fit
2777 into the elements of REUSE. */
2778 for (i = 0, tail = reuse; CONSP (tail);
c1d497be 2779 i++, tail = XCDR (tail))
56256c2a 2780 {
41c01205 2781 if (i < len)
f3fbd155 2782 XSETCAR (tail, data[i]);
56256c2a 2783 else
f3fbd155 2784 XSETCAR (tail, Qnil);
56256c2a
RS
2785 prev = tail;
2786 }
2787
2788 /* If we couldn't fit all value elements into REUSE,
2789 cons up the rest of them and add them to the end of REUSE. */
41c01205
DK
2790 if (i < len)
2791 XSETCDR (prev, Flist (len - i, data + i));
56256c2a
RS
2792
2793 return reuse;
ca1d1d23
JB
2794}
2795
2796
3f1c005b 2797DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
8c1a1077
PJ
2798 doc: /* Set internal data on last search match from elements of LIST.
2799LIST should have been created by calling `match-data' previously. */)
2800 (list)
ca1d1d23
JB
2801 register Lisp_Object list;
2802{
2803 register int i;
2804 register Lisp_Object marker;
2805
7074fde6
FP
2806 if (running_asynch_code)
2807 save_search_regs ();
2808
ca1d1d23 2809 if (!CONSP (list) && !NILP (list))
b37902c8 2810 list = wrong_type_argument (Qconsp, list);
ca1d1d23 2811
41c01205
DK
2812 /* Unless we find a marker with a buffer or an explicit buffer
2813 in LIST, assume that this match data came from a string. */
daa37602
JB
2814 last_thing_searched = Qt;
2815
4746118a
JB
2816 /* Allocate registers if they don't already exist. */
2817 {
d084e942 2818 int length = XFASTINT (Flength (list)) / 2;
4746118a
JB
2819
2820 if (length > search_regs.num_regs)
2821 {
1113d9db
JB
2822 if (search_regs.num_regs == 0)
2823 {
2824 search_regs.start
2825 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2826 search_regs.end
2827 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2828 }
4746118a 2829 else
1113d9db
JB
2830 {
2831 search_regs.start
2832 = (regoff_t *) xrealloc (search_regs.start,
2833 length * sizeof (regoff_t));
2834 search_regs.end
2835 = (regoff_t *) xrealloc (search_regs.end,
2836 length * sizeof (regoff_t));
2837 }
4746118a 2838
e62371e9
KH
2839 for (i = search_regs.num_regs; i < length; i++)
2840 search_regs.start[i] = -1;
2841
487282dc 2842 search_regs.num_regs = length;
4746118a 2843 }
ca1d1d23 2844
c3762cbd 2845 for (i = 0;; i++)
41c01205
DK
2846 {
2847 marker = Fcar (list);
bd2cbd56 2848 if (BUFFERP (marker))
c3762cbd 2849 {
bd2cbd56 2850 last_thing_searched = marker;
c3762cbd
DK
2851 break;
2852 }
2853 if (i >= length)
2854 break;
41c01205
DK
2855 if (NILP (marker))
2856 {
2857 search_regs.start[i] = -1;
2858 list = Fcdr (list);
2859 }
2860 else
2861 {
2862 int from;
2863
2864 if (MARKERP (marker))
2865 {
2866 if (XMARKER (marker)->buffer == 0)
2867 XSETFASTINT (marker, 0);
2868 else
2869 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2870 }
2871
2872 CHECK_NUMBER_COERCE_MARKER (marker);
2873 from = XINT (marker);
2874 list = Fcdr (list);
2875
2876 marker = Fcar (list);
2877 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2878 XSETFASTINT (marker, 0);
2879
2880 CHECK_NUMBER_COERCE_MARKER (marker);
2881 search_regs.start[i] = from;
2882 search_regs.end[i] = XINT (marker);
2883 }
2884 list = Fcdr (list);
2885 }
ca1d1d23 2886
41c01205
DK
2887 for (; i < search_regs.num_regs; i++)
2888 search_regs.start[i] = -1;
2889 }
ca1d1d23 2890
177c0ea7 2891 return Qnil;
ca1d1d23
JB
2892}
2893
7074fde6
FP
2894/* If non-zero the match data have been saved in saved_search_regs
2895 during the execution of a sentinel or filter. */
75ebf74b 2896static int search_regs_saved;
7074fde6 2897static struct re_registers saved_search_regs;
41c01205 2898static Lisp_Object saved_last_thing_searched;
7074fde6
FP
2899
2900/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2901 if asynchronous code (filter or sentinel) is running. */
2902static void
2903save_search_regs ()
2904{
2905 if (!search_regs_saved)
2906 {
2907 saved_search_regs.num_regs = search_regs.num_regs;
2908 saved_search_regs.start = search_regs.start;
2909 saved_search_regs.end = search_regs.end;
41c01205
DK
2910 saved_last_thing_searched = last_thing_searched;
2911 last_thing_searched = Qnil;
7074fde6 2912 search_regs.num_regs = 0;
2d4a771a
RS
2913 search_regs.start = 0;
2914 search_regs.end = 0;
7074fde6
FP
2915
2916 search_regs_saved = 1;
2917 }
2918}
2919
2920/* Called upon exit from filters and sentinels. */
2921void
2922restore_match_data ()
2923{
2924 if (search_regs_saved)
2925 {
2926 if (search_regs.num_regs > 0)
2927 {
2928 xfree (search_regs.start);
2929 xfree (search_regs.end);
2930 }
2931 search_regs.num_regs = saved_search_regs.num_regs;
2932 search_regs.start = saved_search_regs.start;
2933 search_regs.end = saved_search_regs.end;
41c01205
DK
2934 last_thing_searched = saved_last_thing_searched;
2935 saved_last_thing_searched = Qnil;
7074fde6
FP
2936 search_regs_saved = 0;
2937 }
2938}
2939
ca1d1d23
JB
2940/* Quote a string to inactivate reg-expr chars */
2941
2942DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
8c1a1077
PJ
2943 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
2944 (string)
5806161b 2945 Lisp_Object string;
ca1d1d23
JB
2946{
2947 register unsigned char *in, *out, *end;
2948 register unsigned char *temp;
0c8533c6 2949 int backslashes_added = 0;
ca1d1d23 2950
b7826503 2951 CHECK_STRING (string);
ca1d1d23 2952
d5db4077 2953 temp = (unsigned char *) alloca (SBYTES (string) * 2);
ca1d1d23
JB
2954
2955 /* Now copy the data into the new string, inserting escapes. */
2956
d5db4077
KR
2957 in = SDATA (string);
2958 end = in + SBYTES (string);
177c0ea7 2959 out = temp;
ca1d1d23
JB
2960
2961 for (; in != end; in++)
2962 {
2963 if (*in == '[' || *in == ']'
2964 || *in == '*' || *in == '.' || *in == '\\'
2965 || *in == '?' || *in == '+'
2966 || *in == '^' || *in == '$')
0c8533c6 2967 *out++ = '\\', backslashes_added++;
ca1d1d23
JB
2968 *out++ = *in;
2969 }
2970
3f8100f1 2971 return make_specified_string (temp,
d5db4077 2972 SCHARS (string) + backslashes_added,
3f8100f1
RS
2973 out - temp,
2974 STRING_MULTIBYTE (string));
ca1d1d23 2975}
177c0ea7 2976\f
dfcf069d 2977void
ca1d1d23
JB
2978syms_of_search ()
2979{
2980 register int i;
2981
487282dc
KH
2982 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2983 {
2984 searchbufs[i].buf.allocated = 100;
b23c0a83 2985 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
487282dc
KH
2986 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2987 searchbufs[i].regexp = Qnil;
ecdb561e 2988 searchbufs[i].whitespace_regexp = Qnil;
487282dc
KH
2989 staticpro (&searchbufs[i].regexp);
2990 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2991 }
2992 searchbuf_head = &searchbufs[0];
ca1d1d23
JB
2993
2994 Qsearch_failed = intern ("search-failed");
2995 staticpro (&Qsearch_failed);
2996 Qinvalid_regexp = intern ("invalid-regexp");
2997 staticpro (&Qinvalid_regexp);
2998
2999 Fput (Qsearch_failed, Qerror_conditions,
3000 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3001 Fput (Qsearch_failed, Qerror_message,
3002 build_string ("Search failed"));
3003
3004 Fput (Qinvalid_regexp, Qerror_conditions,
3005 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3006 Fput (Qinvalid_regexp, Qerror_message,
3007 build_string ("Invalid regexp"));
3008
daa37602
JB
3009 last_thing_searched = Qnil;
3010 staticpro (&last_thing_searched);
3011
0f6af254
DK
3012 saved_last_thing_searched = Qnil;
3013 staticpro (&saved_last_thing_searched);
3014
f31a9a68
RS
3015 DEFVAR_LISP ("search-whitespace-regexp", &Vsearch_whitespace_regexp,
3016 /* doc: Regexp to substitute for bunches of spaces in regexp search.
3017Some commands use this for user-specified regexps.
3018Spaces that occur inside character classes or repetition operators
3019or other such regexp constructs are not replaced with this.
3020A value of nil (which is the normal value) means treat spaces literally. */);
3021 Vsearch_whitespace_regexp = Qnil;
3022
ca1d1d23 3023 defsubr (&Slooking_at);
b819a390
RS
3024 defsubr (&Sposix_looking_at);
3025 defsubr (&Sstring_match);
3026 defsubr (&Sposix_string_match);
ca1d1d23
JB
3027 defsubr (&Ssearch_forward);
3028 defsubr (&Ssearch_backward);
3029 defsubr (&Sword_search_forward);
3030 defsubr (&Sword_search_backward);
3031 defsubr (&Sre_search_forward);
3032 defsubr (&Sre_search_backward);
b819a390
RS
3033 defsubr (&Sposix_search_forward);
3034 defsubr (&Sposix_search_backward);
ca1d1d23
JB
3035 defsubr (&Sreplace_match);
3036 defsubr (&Smatch_beginning);
3037 defsubr (&Smatch_end);
3038 defsubr (&Smatch_data);
3f1c005b 3039 defsubr (&Sset_match_data);
ca1d1d23
JB
3040 defsubr (&Sregexp_quote);
3041}
ab5796a9
MB
3042
3043/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3044 (do not change this comment) */